]> git.saurik.com Git - wxWidgets.git/blob - include/wx/stringops.h
added wxStreamBuffer::Truncate() (patch 1687081)
[wxWidgets.git] / include / wx / stringops.h
1 ///////////////////////////////////////////////////////////////////////////////
2 // Name: wx/stringops.h
3 // Purpose: implementation of wxString primitive operations
4 // Author: Vaclav Slavik
5 // Modified by:
6 // Created: 2007-04-16
7 // RCS-ID: $Id$
8 // Copyright: (c) 2007 REA Elektronik GmbH
9 // Licence: wxWindows licence
10 ///////////////////////////////////////////////////////////////////////////////
11
12 #ifndef _WX_WXSTRINGOPS_H__
13 #define _WX_WXSTRINGOPS_H__
14
15 #include "wx/chartype.h"
16 #include "wx/stringimpl.h"
17 #include "wx/unichar.h"
18
19 // This header contains wxStringOperations "namespace" class that implements
20 // elementary operations on string data as static methods; wxString methods and
21 // iterators are implemented in terms of it. Two implementations are available,
22 // one for UTF-8 encoded char* string and one for "raw" wchar_t* strings (or
23 // char* in ANSI build).
24
25 // FIXME-UTF8: only wchar after we remove ANSI build
26 #if wxUSE_UNICODE_WCHAR || !wxUSE_UNICODE
27 struct WXDLLIMPEXP_BASE wxStringOperationsWchar
28 {
29 // moves the iterator to the next Unicode character
30 static void IncIter(wxStringImpl::iterator& i) { ++i; }
31 static void IncIter(wxStringImpl::const_iterator& i) { ++i; }
32
33 // moves the iterator to the previous Unicode character
34 static void DecIter(wxStringImpl::iterator& i) { --i; }
35 static void DecIter(wxStringImpl::const_iterator& i) { --i; }
36
37 // moves the iterator by n Unicode characters
38 static wxStringImpl::iterator AddToIter(const wxStringImpl::iterator& i, int n)
39 { return i + n; }
40 static wxStringImpl::const_iterator AddToIter(const wxStringImpl::const_iterator& i, int n)
41 { return i + n; }
42
43 // returns distance of the two iterators in Unicode characters
44 static int DiffIters(const wxStringImpl::iterator& i1,
45 const wxStringImpl::iterator& i2)
46 { return i1 - i2; }
47 static int DiffIters(const wxStringImpl::const_iterator& i1,
48 const wxStringImpl::const_iterator& i2)
49 { return i1 - i2; }
50
51 // encodes the character to a form used to represent it in internal
52 // representation (returns a string in UTF8 version)
53 static wxChar EncodeChar(const wxUniChar& ch) { return (wxChar)ch; }
54
55 static wxUniChar DecodeChar(const wxStringImpl::const_iterator& i)
56 { return *i; }
57 };
58 #endif // wxUSE_UNICODE_WCHAR || !wxUSE_UNICODE
59
60
61 #if wxUSE_UNICODE_UTF8
62 struct WXDLLIMPEXP_BASE wxStringOperationsUtf8
63 {
64 // checks correctness of UTF-8 sequence
65 static bool IsValidUtf8String(const char *c);
66 #ifdef __WXDEBUG__
67 static bool IsValidUtf8LeadByte(unsigned char c);
68 #endif
69
70 // table of offsets to skip forward when iterating over UTF-8 sequence
71 static unsigned char ms_utf8IterTable[256];
72
73
74 template<typename Iterator>
75 static void IncIter(Iterator& i)
76 {
77 wxASSERT( IsValidUtf8LeadByte(*i) );
78 i += ms_utf8IterTable[(unsigned char)*i];
79 }
80
81 template<typename Iterator>
82 static void DecIter(Iterator& i)
83 {
84 wxASSERT( IsValidUtf8LeadByte(*i) );
85
86 // Non-lead bytes are all in the 0x80..0xBF range (i.e. 10xxxxxx in
87 // binary), so we just have to go back until we hit a byte that is
88 // either < 0x80 (i.e. 0xxxxxxx in binary) or 0xC0..0xFF (11xxxxxx in
89 // binary; this includes some invalid values, but we can ignore it
90 // here, because we assume valid UTF-8 input for the purpose of
91 // efficient implementation).
92 --i;
93 while ( ((*i) & 0xC0) == 0x80 /* 2 highest bits are '10' */ )
94 --i;
95 }
96
97 template<typename Iterator>
98 static Iterator AddToIter(const Iterator& i, int n)
99 {
100 Iterator out(i);
101
102 if ( n > 0 )
103 {
104 for ( int j = 0; j < n; ++j )
105 IncIter(out);
106 }
107 else if ( n < 0 )
108 {
109 for ( int j = 0; j > n; --j )
110 DecIter(out);
111 }
112
113 return out;
114 }
115
116 template<typename Iterator>
117 static int DiffIters(Iterator i1, Iterator i2)
118 {
119 int dist = 0;
120
121 if ( i1 < i2 )
122 {
123 while ( i1 != i2 )
124 {
125 IncIter(i1);
126 dist--;
127 }
128 }
129 else if ( i2 < i1 )
130 {
131 while ( i2 != i1 )
132 {
133 IncIter(i2);
134 dist++;
135 }
136 }
137
138 return dist;
139 }
140
141 // buffer for single UTF-8 character
142 struct Utf8CharBuffer
143 {
144 char data[5];
145 operator const char*() const { return data; }
146 };
147
148 // encodes the character as UTF-8:
149 static Utf8CharBuffer EncodeChar(const wxUniChar& ch);
150
151 // returns n copies of ch encoded in UTF-8 string
152 static wxCharBuffer EncodeNChars(size_t n, const wxUniChar& ch);
153
154 // returns the length of UTF-8 encoding of the character with lead byte 'c'
155 static size_t GetUtf8CharLength(char c)
156 {
157 wxASSERT( IsValidUtf8LeadByte(c) );
158 return ms_utf8IterTable[(unsigned char)c];
159 }
160
161 // decodes single UTF-8 character from UTF-8 string
162 static wxUniChar DecodeChar(wxStringImpl::const_iterator i);
163 };
164 #endif // wxUSE_UNICODE_UTF8
165
166
167 #if wxUSE_UNICODE_UTF8
168 typedef wxStringOperationsUtf8 wxStringOperations;
169 #else
170 typedef wxStringOperationsWchar wxStringOperations;
171 #endif
172
173 #endif // _WX_WXSTRINGOPS_H_