]> git.saurik.com Git - wxWidgets.git/blame - include/wx/stringops.h
Applied #15226 with modifications: wxRichTextCtrl: Implement setting properties with...
[wxWidgets.git] / include / wx / stringops.h
CommitLineData
467175ab
VS
1///////////////////////////////////////////////////////////////////////////////
2// Name: wx/stringops.h
3// Purpose: implementation of wxString primitive operations
4// Author: Vaclav Slavik
5// Modified by:
6// Created: 2007-04-16
467175ab
VS
7// Copyright: (c) 2007 REA Elektronik GmbH
8// Licence: wxWindows licence
9///////////////////////////////////////////////////////////////////////////////
10
11#ifndef _WX_WXSTRINGOPS_H__
12#define _WX_WXSTRINGOPS_H__
13
14#include "wx/chartype.h"
15#include "wx/stringimpl.h"
16#include "wx/unichar.h"
04d29fda 17#include "wx/buffer.h"
467175ab
VS
18
19// This header contains wxStringOperations "namespace" class that implements
20// elementary operations on string data as static methods; wxString methods and
21// iterators are implemented in terms of it. Two implementations are available,
22// one for UTF-8 encoded char* string and one for "raw" wchar_t* strings (or
23// char* in ANSI build).
24
25// FIXME-UTF8: only wchar after we remove ANSI build
26#if wxUSE_UNICODE_WCHAR || !wxUSE_UNICODE
27struct WXDLLIMPEXP_BASE wxStringOperationsWchar
28{
29 // moves the iterator to the next Unicode character
5b119b8b
VZ
30 template <typename Iterator>
31 static void IncIter(Iterator& i) { ++i; }
467175ab
VS
32
33 // moves the iterator to the previous Unicode character
5b119b8b
VZ
34 template <typename Iterator>
35 static void DecIter(Iterator& i) { --i; }
467175ab
VS
36
37 // moves the iterator by n Unicode characters
5b119b8b
VZ
38 template <typename Iterator>
39 static Iterator AddToIter(const Iterator& i, ptrdiff_t n)
467175ab
VS
40 { return i + n; }
41
42 // returns distance of the two iterators in Unicode characters
5b119b8b
VZ
43 template <typename Iterator>
44 static ptrdiff_t DiffIters(const Iterator& i1, const Iterator& i2)
467175ab
VS
45 { return i1 - i2; }
46
47 // encodes the character to a form used to represent it in internal
48 // representation (returns a string in UTF8 version)
49 static wxChar EncodeChar(const wxUniChar& ch) { return (wxChar)ch; }
50
51 static wxUniChar DecodeChar(const wxStringImpl::const_iterator& i)
52 { return *i; }
53};
54#endif // wxUSE_UNICODE_WCHAR || !wxUSE_UNICODE
55
56
57#if wxUSE_UNICODE_UTF8
58struct WXDLLIMPEXP_BASE wxStringOperationsUtf8
59{
60 // checks correctness of UTF-8 sequence
111d9948
VS
61 static bool IsValidUtf8String(const char *c,
62 size_t len = wxStringImpl::npos);
657a8a35
VZ
63 static bool IsValidUtf8LeadByte(unsigned char c)
64 {
65 return (c <= 0x7F) || (c >= 0xC2 && c <= 0xF4);
66 }
467175ab
VS
67
68 // table of offsets to skip forward when iterating over UTF-8 sequence
1774c3c5 69 static const unsigned char ms_utf8IterTable[256];
467175ab
VS
70
71
72 template<typename Iterator>
73 static void IncIter(Iterator& i)
74 {
75 wxASSERT( IsValidUtf8LeadByte(*i) );
76 i += ms_utf8IterTable[(unsigned char)*i];
77 }
78
79 template<typename Iterator>
80 static void DecIter(Iterator& i)
81 {
82 wxASSERT( IsValidUtf8LeadByte(*i) );
83
84 // Non-lead bytes are all in the 0x80..0xBF range (i.e. 10xxxxxx in
85 // binary), so we just have to go back until we hit a byte that is
86 // either < 0x80 (i.e. 0xxxxxxx in binary) or 0xC0..0xFF (11xxxxxx in
87 // binary; this includes some invalid values, but we can ignore it
88 // here, because we assume valid UTF-8 input for the purpose of
89 // efficient implementation).
90 --i;
91 while ( ((*i) & 0xC0) == 0x80 /* 2 highest bits are '10' */ )
92 --i;
93 }
94
95 template<typename Iterator>
b5343e06 96 static Iterator AddToIter(const Iterator& i, ptrdiff_t n)
467175ab
VS
97 {
98 Iterator out(i);
99
100 if ( n > 0 )
101 {
b5343e06 102 for ( ptrdiff_t j = 0; j < n; ++j )
467175ab
VS
103 IncIter(out);
104 }
105 else if ( n < 0 )
106 {
b5343e06 107 for ( ptrdiff_t j = 0; j > n; --j )
467175ab
VS
108 DecIter(out);
109 }
110
111 return out;
112 }
113
114 template<typename Iterator>
b5343e06 115 static ptrdiff_t DiffIters(Iterator i1, Iterator i2)
467175ab 116 {
b5343e06 117 ptrdiff_t dist = 0;
467175ab
VS
118
119 if ( i1 < i2 )
120 {
121 while ( i1 != i2 )
122 {
123 IncIter(i1);
124 dist--;
125 }
126 }
127 else if ( i2 < i1 )
128 {
129 while ( i2 != i1 )
130 {
131 IncIter(i2);
132 dist++;
133 }
134 }
135
136 return dist;
137 }
138
467175ab 139 // encodes the character as UTF-8:
1fc10687
VS
140 typedef wxUniChar::Utf8CharBuffer Utf8CharBuffer;
141 static Utf8CharBuffer EncodeChar(const wxUniChar& ch)
142 { return ch.AsUTF8(); }
467175ab
VS
143
144 // returns n copies of ch encoded in UTF-8 string
145 static wxCharBuffer EncodeNChars(size_t n, const wxUniChar& ch);
146
147 // returns the length of UTF-8 encoding of the character with lead byte 'c'
148 static size_t GetUtf8CharLength(char c)
149 {
150 wxASSERT( IsValidUtf8LeadByte(c) );
151 return ms_utf8IterTable[(unsigned char)c];
152 }
153
154 // decodes single UTF-8 character from UTF-8 string
ac2d749e
VS
155 static wxUniChar DecodeChar(wxStringImpl::const_iterator i)
156 {
157 if ( (unsigned char)*i < 0x80 )
158 return (int)*i;
159 return DecodeNonAsciiChar(i);
160 }
161
162private:
163 static wxUniChar DecodeNonAsciiChar(wxStringImpl::const_iterator i);
467175ab
VS
164};
165#endif // wxUSE_UNICODE_UTF8
166
167
168#if wxUSE_UNICODE_UTF8
169typedef wxStringOperationsUtf8 wxStringOperations;
170#else
171typedef wxStringOperationsWchar wxStringOperations;
172#endif
173
174#endif // _WX_WXSTRINGOPS_H_