]> git.saurik.com Git - wxWidgets.git/blob - include/wx/stringops.h
2cfbe188e40ad3a746ffa81f32ea9acfd9368c24
[wxWidgets.git] / include / wx / stringops.h
1 ///////////////////////////////////////////////////////////////////////////////
2 // Name: wx/stringops.h
3 // Purpose: implementation of wxString primitive operations
4 // Author: Vaclav Slavik
5 // Modified by:
6 // Created: 2007-04-16
7 // RCS-ID: $Id$
8 // Copyright: (c) 2007 REA Elektronik GmbH
9 // Licence: wxWindows licence
10 ///////////////////////////////////////////////////////////////////////////////
11
12 #ifndef _WX_WXSTRINGOPS_H__
13 #define _WX_WXSTRINGOPS_H__
14
15 #include "wx/chartype.h"
16 #include "wx/stringimpl.h"
17 #include "wx/unichar.h"
18 #include "wx/buffer.h"
19
20 // This header contains wxStringOperations "namespace" class that implements
21 // elementary operations on string data as static methods; wxString methods and
22 // iterators are implemented in terms of it. Two implementations are available,
23 // one for UTF-8 encoded char* string and one for "raw" wchar_t* strings (or
24 // char* in ANSI build).
25
26 // FIXME-UTF8: only wchar after we remove ANSI build
27 #if wxUSE_UNICODE_WCHAR || !wxUSE_UNICODE
28 struct WXDLLIMPEXP_BASE wxStringOperationsWchar
29 {
30 // moves the iterator to the next Unicode character
31 static void IncIter(wxStringImpl::iterator& i) { ++i; }
32 static void IncIter(wxStringImpl::const_iterator& i) { ++i; }
33
34 // moves the iterator to the previous Unicode character
35 static void DecIter(wxStringImpl::iterator& i) { --i; }
36 static void DecIter(wxStringImpl::const_iterator& i) { --i; }
37
38 // moves the iterator by n Unicode characters
39 static wxStringImpl::iterator AddToIter(const wxStringImpl::iterator& i, int n)
40 { return i + n; }
41 static wxStringImpl::const_iterator AddToIter(const wxStringImpl::const_iterator& i, int n)
42 { return i + n; }
43 static const wxChar* AddToIter(const wxChar *i, int n)
44 { return i + n; }
45
46 // returns distance of the two iterators in Unicode characters
47 static int DiffIters(const wxStringImpl::iterator& i1,
48 const wxStringImpl::iterator& i2)
49 { return i1 - i2; }
50 static int DiffIters(const wxStringImpl::const_iterator& i1,
51 const wxStringImpl::const_iterator& i2)
52 { return i1 - i2; }
53
54 // encodes the character to a form used to represent it in internal
55 // representation (returns a string in UTF8 version)
56 static wxChar EncodeChar(const wxUniChar& ch) { return (wxChar)ch; }
57
58 static wxUniChar DecodeChar(const wxStringImpl::const_iterator& i)
59 { return *i; }
60 };
61 #endif // wxUSE_UNICODE_WCHAR || !wxUSE_UNICODE
62
63
64 #if wxUSE_UNICODE_UTF8
65 struct WXDLLIMPEXP_BASE wxStringOperationsUtf8
66 {
67 // checks correctness of UTF-8 sequence
68 static bool IsValidUtf8String(const char *c);
69 #ifdef __WXDEBUG__
70 static bool IsValidUtf8LeadByte(unsigned char c);
71 #endif
72
73 // table of offsets to skip forward when iterating over UTF-8 sequence
74 static unsigned char ms_utf8IterTable[256];
75
76
77 template<typename Iterator>
78 static void IncIter(Iterator& i)
79 {
80 wxASSERT( IsValidUtf8LeadByte(*i) );
81 i += ms_utf8IterTable[(unsigned char)*i];
82 }
83
84 template<typename Iterator>
85 static void DecIter(Iterator& i)
86 {
87 wxASSERT( IsValidUtf8LeadByte(*i) );
88
89 // Non-lead bytes are all in the 0x80..0xBF range (i.e. 10xxxxxx in
90 // binary), so we just have to go back until we hit a byte that is
91 // either < 0x80 (i.e. 0xxxxxxx in binary) or 0xC0..0xFF (11xxxxxx in
92 // binary; this includes some invalid values, but we can ignore it
93 // here, because we assume valid UTF-8 input for the purpose of
94 // efficient implementation).
95 --i;
96 while ( ((*i) & 0xC0) == 0x80 /* 2 highest bits are '10' */ )
97 --i;
98 }
99
100 template<typename Iterator>
101 static Iterator AddToIter(const Iterator& i, int n)
102 {
103 Iterator out(i);
104
105 if ( n > 0 )
106 {
107 for ( int j = 0; j < n; ++j )
108 IncIter(out);
109 }
110 else if ( n < 0 )
111 {
112 for ( int j = 0; j > n; --j )
113 DecIter(out);
114 }
115
116 return out;
117 }
118
119 template<typename Iterator>
120 static int DiffIters(Iterator i1, Iterator i2)
121 {
122 int dist = 0;
123
124 if ( i1 < i2 )
125 {
126 while ( i1 != i2 )
127 {
128 IncIter(i1);
129 dist--;
130 }
131 }
132 else if ( i2 < i1 )
133 {
134 while ( i2 != i1 )
135 {
136 IncIter(i2);
137 dist++;
138 }
139 }
140
141 return dist;
142 }
143
144 // buffer for single UTF-8 character
145 struct Utf8CharBuffer
146 {
147 char data[5];
148 operator const char*() const { return data; }
149 };
150
151 // encodes the character as UTF-8:
152 static Utf8CharBuffer EncodeChar(const wxUniChar& ch);
153
154 // returns n copies of ch encoded in UTF-8 string
155 static wxCharBuffer EncodeNChars(size_t n, const wxUniChar& ch);
156
157 // returns the length of UTF-8 encoding of the character with lead byte 'c'
158 static size_t GetUtf8CharLength(char c)
159 {
160 wxASSERT( IsValidUtf8LeadByte(c) );
161 return ms_utf8IterTable[(unsigned char)c];
162 }
163
164 // decodes single UTF-8 character from UTF-8 string
165 static wxUniChar DecodeChar(wxStringImpl::const_iterator i);
166 };
167 #endif // wxUSE_UNICODE_UTF8
168
169
170 #if wxUSE_UNICODE_UTF8
171 typedef wxStringOperationsUtf8 wxStringOperations;
172 #else
173 typedef wxStringOperationsWchar wxStringOperations;
174 #endif
175
176 #endif // _WX_WXSTRINGOPS_H_