1 ///////////////////////////////////////////////////////////////////////////////
2 // Name: wx/stringops.h
3 // Purpose: implementation of wxString primitive operations
4 // Author: Vaclav Slavik
8 // Copyright: (c) 2007 REA Elektronik GmbH
9 // Licence: wxWindows licence
10 ///////////////////////////////////////////////////////////////////////////////
12 #ifndef _WX_WXSTRINGOPS_H__
13 #define _WX_WXSTRINGOPS_H__
15 #include "wx/chartype.h"
16 #include "wx/stringimpl.h"
17 #include "wx/unichar.h"
18 #include "wx/buffer.h"
20 // This header contains wxStringOperations "namespace" class that implements
21 // elementary operations on string data as static methods; wxString methods and
22 // iterators are implemented in terms of it. Two implementations are available,
23 // one for UTF-8 encoded char* string and one for "raw" wchar_t* strings (or
24 // char* in ANSI build).
26 // FIXME-UTF8: only wchar after we remove ANSI build
27 #if wxUSE_UNICODE_WCHAR || !wxUSE_UNICODE
28 struct WXDLLIMPEXP_BASE wxStringOperationsWchar
30 // moves the iterator to the next Unicode character
31 static void IncIter(wxStringImpl::iterator
& i
) { ++i
; }
32 static void IncIter(wxStringImpl::const_iterator
& i
) { ++i
; }
34 // moves the iterator to the previous Unicode character
35 static void DecIter(wxStringImpl::iterator
& i
) { --i
; }
36 static void DecIter(wxStringImpl::const_iterator
& i
) { --i
; }
38 // moves the iterator by n Unicode characters
39 static wxStringImpl::iterator
AddToIter(const wxStringImpl::iterator
& i
, int n
)
41 static wxStringImpl::const_iterator
AddToIter(const wxStringImpl::const_iterator
& i
, int n
)
43 static const wxChar
* AddToIter(const wxChar
*i
, int n
)
46 // returns distance of the two iterators in Unicode characters
47 static int DiffIters(const wxStringImpl::iterator
& i1
,
48 const wxStringImpl::iterator
& i2
)
50 static int DiffIters(const wxStringImpl::const_iterator
& i1
,
51 const wxStringImpl::const_iterator
& i2
)
54 // encodes the character to a form used to represent it in internal
55 // representation (returns a string in UTF8 version)
56 static wxChar
EncodeChar(const wxUniChar
& ch
) { return (wxChar
)ch
; }
58 static wxUniChar
DecodeChar(const wxStringImpl::const_iterator
& i
)
61 #endif // wxUSE_UNICODE_WCHAR || !wxUSE_UNICODE
64 #if wxUSE_UNICODE_UTF8
65 struct WXDLLIMPEXP_BASE wxStringOperationsUtf8
67 // checks correctness of UTF-8 sequence
68 static bool IsValidUtf8String(const char *c
,
69 size_t len
= wxStringImpl::npos
);
71 static bool IsValidUtf8LeadByte(unsigned char c
);
74 // table of offsets to skip forward when iterating over UTF-8 sequence
75 static unsigned char ms_utf8IterTable
[256];
78 template<typename Iterator
>
79 static void IncIter(Iterator
& i
)
81 wxASSERT( IsValidUtf8LeadByte(*i
) );
82 i
+= ms_utf8IterTable
[(unsigned char)*i
];
85 template<typename Iterator
>
86 static void DecIter(Iterator
& i
)
88 wxASSERT( IsValidUtf8LeadByte(*i
) );
90 // Non-lead bytes are all in the 0x80..0xBF range (i.e. 10xxxxxx in
91 // binary), so we just have to go back until we hit a byte that is
92 // either < 0x80 (i.e. 0xxxxxxx in binary) or 0xC0..0xFF (11xxxxxx in
93 // binary; this includes some invalid values, but we can ignore it
94 // here, because we assume valid UTF-8 input for the purpose of
95 // efficient implementation).
97 while ( ((*i
) & 0xC0) == 0x80 /* 2 highest bits are '10' */ )
101 template<typename Iterator
>
102 static Iterator
AddToIter(const Iterator
& i
, int n
)
108 for ( int j
= 0; j
< n
; ++j
)
113 for ( int j
= 0; j
> n
; --j
)
120 template<typename Iterator
>
121 static int DiffIters(Iterator i1
, Iterator i2
)
145 // buffer for single UTF-8 character
146 struct Utf8CharBuffer
149 operator const char*() const { return data
; }
152 // encodes the character as UTF-8:
153 static Utf8CharBuffer
EncodeChar(const wxUniChar
& ch
);
155 // returns n copies of ch encoded in UTF-8 string
156 static wxCharBuffer
EncodeNChars(size_t n
, const wxUniChar
& ch
);
158 // returns the length of UTF-8 encoding of the character with lead byte 'c'
159 static size_t GetUtf8CharLength(char c
)
161 wxASSERT( IsValidUtf8LeadByte(c
) );
162 return ms_utf8IterTable
[(unsigned char)c
];
165 // decodes single UTF-8 character from UTF-8 string
166 static wxUniChar
DecodeChar(wxStringImpl::const_iterator i
);
168 #endif // wxUSE_UNICODE_UTF8
171 #if wxUSE_UNICODE_UTF8
172 typedef wxStringOperationsUtf8 wxStringOperations
;
174 typedef wxStringOperationsWchar wxStringOperations
;
177 #endif // _WX_WXSTRINGOPS_H_