1 ///////////////////////////////////////////////////////////////////////////////
2 // Name: wx/stringops.h
3 // Purpose: implementation of wxString primitive operations
4 // Author: Vaclav Slavik
7 // Copyright: (c) 2007 REA Elektronik GmbH
8 // Licence: wxWindows licence
9 ///////////////////////////////////////////////////////////////////////////////
11 #ifndef _WX_WXSTRINGOPS_H__
12 #define _WX_WXSTRINGOPS_H__
14 #include "wx/chartype.h"
15 #include "wx/stringimpl.h"
16 #include "wx/unichar.h"
17 #include "wx/buffer.h"
19 // This header contains wxStringOperations "namespace" class that implements
20 // elementary operations on string data as static methods; wxString methods and
21 // iterators are implemented in terms of it. Two implementations are available,
22 // one for UTF-8 encoded char* string and one for "raw" wchar_t* strings (or
23 // char* in ANSI build).
25 // FIXME-UTF8: only wchar after we remove ANSI build
26 #if wxUSE_UNICODE_WCHAR || !wxUSE_UNICODE
27 struct WXDLLIMPEXP_BASE wxStringOperationsWchar
29 // moves the iterator to the next Unicode character
30 template <typename Iterator
>
31 static void IncIter(Iterator
& i
) { ++i
; }
33 // moves the iterator to the previous Unicode character
34 template <typename Iterator
>
35 static void DecIter(Iterator
& i
) { --i
; }
37 // moves the iterator by n Unicode characters
38 template <typename Iterator
>
39 static Iterator
AddToIter(const Iterator
& i
, ptrdiff_t n
)
42 // returns distance of the two iterators in Unicode characters
43 template <typename Iterator
>
44 static ptrdiff_t DiffIters(const Iterator
& i1
, const Iterator
& i2
)
47 // encodes the character to a form used to represent it in internal
48 // representation (returns a string in UTF8 version)
49 static wxChar
EncodeChar(const wxUniChar
& ch
) { return (wxChar
)ch
; }
51 static wxUniChar
DecodeChar(const wxStringImpl::const_iterator
& i
)
54 #endif // wxUSE_UNICODE_WCHAR || !wxUSE_UNICODE
57 #if wxUSE_UNICODE_UTF8
58 struct WXDLLIMPEXP_BASE wxStringOperationsUtf8
60 // checks correctness of UTF-8 sequence
61 static bool IsValidUtf8String(const char *c
,
62 size_t len
= wxStringImpl::npos
);
63 static bool IsValidUtf8LeadByte(unsigned char c
)
65 return (c
<= 0x7F) || (c
>= 0xC2 && c
<= 0xF4);
68 // table of offsets to skip forward when iterating over UTF-8 sequence
69 static const unsigned char ms_utf8IterTable
[256];
72 template<typename Iterator
>
73 static void IncIter(Iterator
& i
)
75 wxASSERT( IsValidUtf8LeadByte(*i
) );
76 i
+= ms_utf8IterTable
[(unsigned char)*i
];
79 template<typename Iterator
>
80 static void DecIter(Iterator
& i
)
82 wxASSERT( IsValidUtf8LeadByte(*i
) );
84 // Non-lead bytes are all in the 0x80..0xBF range (i.e. 10xxxxxx in
85 // binary), so we just have to go back until we hit a byte that is
86 // either < 0x80 (i.e. 0xxxxxxx in binary) or 0xC0..0xFF (11xxxxxx in
87 // binary; this includes some invalid values, but we can ignore it
88 // here, because we assume valid UTF-8 input for the purpose of
89 // efficient implementation).
91 while ( ((*i
) & 0xC0) == 0x80 /* 2 highest bits are '10' */ )
95 template<typename Iterator
>
96 static Iterator
AddToIter(const Iterator
& i
, ptrdiff_t n
)
102 for ( ptrdiff_t j
= 0; j
< n
; ++j
)
107 for ( ptrdiff_t j
= 0; j
> n
; --j
)
114 template<typename Iterator
>
115 static ptrdiff_t DiffIters(Iterator i1
, Iterator i2
)
139 // encodes the character as UTF-8:
140 typedef wxUniChar::Utf8CharBuffer Utf8CharBuffer
;
141 static Utf8CharBuffer
EncodeChar(const wxUniChar
& ch
)
142 { return ch
.AsUTF8(); }
144 // returns n copies of ch encoded in UTF-8 string
145 static wxCharBuffer
EncodeNChars(size_t n
, const wxUniChar
& ch
);
147 // returns the length of UTF-8 encoding of the character with lead byte 'c'
148 static size_t GetUtf8CharLength(char c
)
150 wxASSERT( IsValidUtf8LeadByte(c
) );
151 return ms_utf8IterTable
[(unsigned char)c
];
154 // decodes single UTF-8 character from UTF-8 string
155 static wxUniChar
DecodeChar(wxStringImpl::const_iterator i
)
157 if ( (unsigned char)*i
< 0x80 )
159 return DecodeNonAsciiChar(i
);
163 static wxUniChar
DecodeNonAsciiChar(wxStringImpl::const_iterator i
);
165 #endif // wxUSE_UNICODE_UTF8
168 #if wxUSE_UNICODE_UTF8
169 typedef wxStringOperationsUtf8 wxStringOperations
;
171 typedef wxStringOperationsWchar wxStringOperations
;
174 #endif // _WX_WXSTRINGOPS_H_