]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/unicode/symtable.h
ICU-6.2.8.tar.gz
[apple/icu.git] / icuSources / common / unicode / symtable.h
1 /*
2 **********************************************************************
3 * Copyright (c) 2000-2004, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * Date Name Description
7 * 02/04/00 aliu Creation.
8 **********************************************************************
9 */
10 #ifndef SYMTABLE_H
11 #define SYMTABLE_H
12
13 #include "unicode/utypes.h"
14 #include "unicode/uobject.h"
15
16 U_NAMESPACE_BEGIN
17
18 class ParsePosition;
19 class UnicodeFunctor;
20 class UnicodeSet;
21 class UnicodeString;
22
23 /**
24 * An interface that defines both lookup protocol and parsing of
25 * symbolic names.
26 *
27 * <p>A symbol table maintains two kinds of mappings. The first is
28 * between symbolic names and their values. For example, if the
29 * variable with the name "start" is set to the value "alpha"
30 * (perhaps, though not necessarily, through an expression such as
31 * "$start=alpha"), then the call lookup("start") will return the
32 * char[] array ['a', 'l', 'p', 'h', 'a'].
33 *
34 * <p>The second kind of mapping is between character values and
35 * UnicodeMatcher objects. This is used by RuleBasedTransliterator,
36 * which uses characters in the private use area to represent objects
37 * such as UnicodeSets. If U+E015 is mapped to the UnicodeSet [a-z],
38 * then lookupMatcher(0xE015) will return the UnicodeSet [a-z].
39 *
40 * <p>Finally, a symbol table defines parsing behavior for symbolic
41 * names. All symbolic names start with the SYMBOL_REF character.
42 * When a parser encounters this character, it calls parseReference()
43 * with the position immediately following the SYMBOL_REF. The symbol
44 * table parses the name, if there is one, and returns it.
45 *
46 * @draft ICU 2.8
47 */
48 class U_COMMON_API SymbolTable /* not : public UObject because this is an interface/mixin class */ {
49 public:
50
51 /**
52 * The character preceding a symbol reference name.
53 * @draft ICU 2.8
54 */
55 enum { SYMBOL_REF = 0x0024 /*$*/ };
56
57 /**
58 * Destructor.
59 * @draft ICU 2.8
60 */
61 virtual ~SymbolTable();
62
63 /**
64 * Lookup the characters associated with this string and return it.
65 * Return <tt>NULL</tt> if no such name exists. The resultant
66 * string may have length zero.
67 * @param s the symbolic name to lookup
68 * @return a string containing the name's value, or <tt>NULL</tt> if
69 * there is no mapping for s.
70 * @draft ICU 2.8
71 */
72 virtual const UnicodeString* lookup(const UnicodeString& s) const = 0;
73
74 /**
75 * Lookup the UnicodeMatcher associated with the given character, and
76 * return it. Return <tt>NULL</tt> if not found.
77 * @param ch a 32-bit code point from 0 to 0x10FFFF inclusive.
78 * @return the UnicodeMatcher object represented by the given
79 * character, or NULL if there is no mapping for ch.
80 * @draft ICU 2.8
81 */
82 virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const = 0;
83
84 /**
85 * Parse a symbol reference name from the given string, starting
86 * at the given position. If no valid symbol reference name is
87 * found, return the empty string and leave pos unchanged. That is, if the
88 * character at pos cannot start a name, or if pos is at or after
89 * text.length(), then return an empty string. This indicates an
90 * isolated SYMBOL_REF character.
91 * @param text the text to parse for the name
92 * @param pos on entry, the index of the first character to parse.
93 * This is the character following the SYMBOL_REF character. On
94 * exit, the index after the last parsed character. If the parse
95 * failed, pos is unchanged on exit.
96 * @param limit the index after the last character to be parsed.
97 * @return the parsed name, or an empty string if there is no
98 * valid symbolic name at the given position.
99 * @draft ICU 2.8
100 */
101 virtual UnicodeString parseReference(const UnicodeString& text,
102 ParsePosition& pos, int32_t limit) const = 0;
103 };
104 U_NAMESPACE_END
105
106 #endif