]> git.saurik.com Git - wxWidgets.git/blame - include/wx/stringops.h
Ensure wxCharTypeBuffer data is NUL-terminated after extend() call.
[wxWidgets.git] / include / wx / stringops.h
CommitLineData
467175ab
VS
1///////////////////////////////////////////////////////////////////////////////
2// Name: wx/stringops.h
3// Purpose: implementation of wxString primitive operations
4// Author: Vaclav Slavik
5// Modified by:
6// Created: 2007-04-16
7// RCS-ID: $Id$
8// Copyright: (c) 2007 REA Elektronik GmbH
9// Licence: wxWindows licence
10///////////////////////////////////////////////////////////////////////////////
11
12#ifndef _WX_WXSTRINGOPS_H__
13#define _WX_WXSTRINGOPS_H__
14
15#include "wx/chartype.h"
16#include "wx/stringimpl.h"
17#include "wx/unichar.h"
04d29fda 18#include "wx/buffer.h"
467175ab
VS
19
20// This header contains wxStringOperations "namespace" class that implements
21// elementary operations on string data as static methods; wxString methods and
22// iterators are implemented in terms of it. Two implementations are available,
23// one for UTF-8 encoded char* string and one for "raw" wchar_t* strings (or
24// char* in ANSI build).
25
26// FIXME-UTF8: only wchar after we remove ANSI build
27#if wxUSE_UNICODE_WCHAR || !wxUSE_UNICODE
28struct WXDLLIMPEXP_BASE wxStringOperationsWchar
29{
30 // moves the iterator to the next Unicode character
5b119b8b
VZ
31 template <typename Iterator>
32 static void IncIter(Iterator& i) { ++i; }
467175ab
VS
33
34 // moves the iterator to the previous Unicode character
5b119b8b
VZ
35 template <typename Iterator>
36 static void DecIter(Iterator& i) { --i; }
467175ab
VS
37
38 // moves the iterator by n Unicode characters
5b119b8b
VZ
39 template <typename Iterator>
40 static Iterator AddToIter(const Iterator& i, ptrdiff_t n)
467175ab
VS
41 { return i + n; }
42
43 // returns distance of the two iterators in Unicode characters
5b119b8b
VZ
44 template <typename Iterator>
45 static ptrdiff_t DiffIters(const Iterator& i1, const Iterator& i2)
467175ab
VS
46 { return i1 - i2; }
47
48 // encodes the character to a form used to represent it in internal
49 // representation (returns a string in UTF8 version)
50 static wxChar EncodeChar(const wxUniChar& ch) { return (wxChar)ch; }
51
52 static wxUniChar DecodeChar(const wxStringImpl::const_iterator& i)
53 { return *i; }
54};
55#endif // wxUSE_UNICODE_WCHAR || !wxUSE_UNICODE
56
57
58#if wxUSE_UNICODE_UTF8
59struct WXDLLIMPEXP_BASE wxStringOperationsUtf8
60{
61 // checks correctness of UTF-8 sequence
111d9948
VS
62 static bool IsValidUtf8String(const char *c,
63 size_t len = wxStringImpl::npos);
657a8a35
VZ
64 static bool IsValidUtf8LeadByte(unsigned char c)
65 {
66 return (c <= 0x7F) || (c >= 0xC2 && c <= 0xF4);
67 }
467175ab
VS
68
69 // table of offsets to skip forward when iterating over UTF-8 sequence
1774c3c5 70 static const unsigned char ms_utf8IterTable[256];
467175ab
VS
71
72
73 template<typename Iterator>
74 static void IncIter(Iterator& i)
75 {
76 wxASSERT( IsValidUtf8LeadByte(*i) );
77 i += ms_utf8IterTable[(unsigned char)*i];
78 }
79
80 template<typename Iterator>
81 static void DecIter(Iterator& i)
82 {
83 wxASSERT( IsValidUtf8LeadByte(*i) );
84
85 // Non-lead bytes are all in the 0x80..0xBF range (i.e. 10xxxxxx in
86 // binary), so we just have to go back until we hit a byte that is
87 // either < 0x80 (i.e. 0xxxxxxx in binary) or 0xC0..0xFF (11xxxxxx in
88 // binary; this includes some invalid values, but we can ignore it
89 // here, because we assume valid UTF-8 input for the purpose of
90 // efficient implementation).
91 --i;
92 while ( ((*i) & 0xC0) == 0x80 /* 2 highest bits are '10' */ )
93 --i;
94 }
95
96 template<typename Iterator>
b5343e06 97 static Iterator AddToIter(const Iterator& i, ptrdiff_t n)
467175ab
VS
98 {
99 Iterator out(i);
100
101 if ( n > 0 )
102 {
b5343e06 103 for ( ptrdiff_t j = 0; j < n; ++j )
467175ab
VS
104 IncIter(out);
105 }
106 else if ( n < 0 )
107 {
b5343e06 108 for ( ptrdiff_t j = 0; j > n; --j )
467175ab
VS
109 DecIter(out);
110 }
111
112 return out;
113 }
114
115 template<typename Iterator>
b5343e06 116 static ptrdiff_t DiffIters(Iterator i1, Iterator i2)
467175ab 117 {
b5343e06 118 ptrdiff_t dist = 0;
467175ab
VS
119
120 if ( i1 < i2 )
121 {
122 while ( i1 != i2 )
123 {
124 IncIter(i1);
125 dist--;
126 }
127 }
128 else if ( i2 < i1 )
129 {
130 while ( i2 != i1 )
131 {
132 IncIter(i2);
133 dist++;
134 }
135 }
136
137 return dist;
138 }
139
467175ab 140 // encodes the character as UTF-8:
1fc10687
VS
141 typedef wxUniChar::Utf8CharBuffer Utf8CharBuffer;
142 static Utf8CharBuffer EncodeChar(const wxUniChar& ch)
143 { return ch.AsUTF8(); }
467175ab
VS
144
145 // returns n copies of ch encoded in UTF-8 string
146 static wxCharBuffer EncodeNChars(size_t n, const wxUniChar& ch);
147
148 // returns the length of UTF-8 encoding of the character with lead byte 'c'
149 static size_t GetUtf8CharLength(char c)
150 {
151 wxASSERT( IsValidUtf8LeadByte(c) );
152 return ms_utf8IterTable[(unsigned char)c];
153 }
154
155 // decodes single UTF-8 character from UTF-8 string
ac2d749e
VS
156 static wxUniChar DecodeChar(wxStringImpl::const_iterator i)
157 {
158 if ( (unsigned char)*i < 0x80 )
159 return (int)*i;
160 return DecodeNonAsciiChar(i);
161 }
162
163private:
164 static wxUniChar DecodeNonAsciiChar(wxStringImpl::const_iterator i);
467175ab
VS
165};
166#endif // wxUSE_UNICODE_UTF8
167
168
169#if wxUSE_UNICODE_UTF8
170typedef wxStringOperationsUtf8 wxStringOperations;
171#else
172typedef wxStringOperationsWchar wxStringOperations;
173#endif
174
175#endif // _WX_WXSTRINGOPS_H_