1 /////////////////////////////////////////////////////////////////////////////// 
   3 // Purpose:     wxUniChar and wxUniCharRef classes 
   4 // Author:      Vaclav Slavik 
   7 // Copyright:   (c) 2007 REA Elektronik GmbH 
   8 // Licence:     wxWindows licence 
   9 /////////////////////////////////////////////////////////////////////////////// 
  11 #ifndef _WX_UNICHAR_H_ 
  12 #define _WX_UNICHAR_H_ 
  15 #include "wx/chartype.h" 
  16 #include "wx/stringimpl.h" 
  18 class WXDLLIMPEXP_FWD_BASE wxUniCharRef
; 
  19 class WXDLLIMPEXP_FWD_BASE wxStringIteratorNode
; 
  21 // This class represents single Unicode character. It can be converted to 
  22 // and from char or wchar_t and implements commonly used character operations. 
  23 class WXDLLIMPEXP_BASE wxUniChar
 
  26     // NB: this is not wchar_t on purpose, it needs to represent the entire 
  27     //     Unicode code points range and wchar_t may be too small for that 
  28     //     (e.g. on Win32 where wchar_t* is encoded in UTF-16) 
  29     typedef wxUint32 value_type
; 
  31     wxUniChar() : m_value(0) {} 
  33     // Create the character from 8bit character value encoded in the current 
  35     wxUniChar(char c
) { m_value 
= From8bit(c
); } 
  36     wxUniChar(unsigned char c
) { m_value 
= From8bit((char)c
); } 
  38     // Create the character from a wchar_t character value. 
  39 #if wxWCHAR_T_IS_REAL_TYPE 
  40     wxUniChar(wchar_t c
) { m_value 
= c
; } 
  43     wxUniChar(int c
) { m_value 
= c
; } 
  44     wxUniChar(unsigned int c
) { m_value 
= c
; } 
  45     wxUniChar(long int c
) { m_value 
= c
; } 
  46     wxUniChar(unsigned long int c
) { m_value 
= c
; } 
  47     wxUniChar(short int c
) { m_value 
= c
; } 
  48     wxUniChar(unsigned short int c
) { m_value 
= c
; } 
  50     wxUniChar(const wxUniCharRef
& c
); 
  52     // Returns Unicode code point value of the character 
  53     value_type 
GetValue() const { return m_value
; } 
  55 #if wxUSE_UNICODE_UTF8 
  56     // buffer for single UTF-8 character 
  60         operator const char*() const { return data
; } 
  63     // returns the character encoded as UTF-8 
  64     // (NB: implemented in stringops.cpp) 
  65     Utf8CharBuffer 
AsUTF8() const; 
  66 #endif // wxUSE_UNICODE_UTF8 
  68     // Returns true if the character is an ASCII character: 
  69     bool IsAscii() const { return m_value 
< 0x80; } 
  71     // Conversions to char and wchar_t types: all of those are needed to be 
  72     // able to pass wxUniChars to verious standard narrow and wide character 
  74     operator char() const { return To8bit(m_value
); } 
  75     operator unsigned char() const { return (unsigned char)To8bit(m_value
); } 
  76 #if wxWCHAR_T_IS_REAL_TYPE 
  77     operator wchar_t() const { return (wchar_t)m_value
; } 
  79     operator int() const { return (int)m_value
; } 
  80     operator unsigned int() const { return (unsigned int)m_value
; } 
  81     operator long int() const { return (long int)m_value
; } 
  82     operator unsigned long int() const { return (unsigned long)m_value
; } 
  83     operator short int() const { return (short int)m_value
; } 
  84     operator unsigned short int() const { return (unsigned short int)m_value
; } 
  86     // We need this operator for the "*p" part of expressions like "for ( 
  87     // const_iterator p = begin() + nStart; *p; ++p )". In this case, 
  88     // compilation would fail without it because the conversion to bool would 
  89     // be ambiguous (there are all these int types conversions...). (And adding 
  90     // operator unspecified_bool_type() would only makes the ambiguity worse.) 
  91     operator bool() const { return m_value 
!= 0; } 
  92     bool operator!() const { return !((bool)*this); } 
  94     // And this one is needed by some (not all, but not using ifdefs makes the 
  95     // code easier) compilers to parse "str[0] && *p" successfully 
  96     bool operator&&(bool v
) const { return (bool)*this && v
; } 
  98     // Assignment operators: 
  99     wxUniChar
& operator=(const wxUniChar
& c
) { if (&c 
!= this) m_value 
= c
.m_value
; return *this; } 
 100     wxUniChar
& operator=(const wxUniCharRef
& c
); 
 101     wxUniChar
& operator=(char c
) { m_value 
= From8bit(c
); return *this; } 
 102     wxUniChar
& operator=(unsigned char c
) { m_value 
= From8bit((char)c
); return *this; } 
 103 #if wxWCHAR_T_IS_REAL_TYPE 
 104     wxUniChar
& operator=(wchar_t c
) { m_value 
= c
; return *this; } 
 106     wxUniChar
& operator=(int c
) { m_value 
= c
; return *this; } 
 107     wxUniChar
& operator=(unsigned int c
) { m_value 
= c
; return *this; } 
 108     wxUniChar
& operator=(long int c
) { m_value 
= c
; return *this; } 
 109     wxUniChar
& operator=(unsigned long int c
) { m_value 
= c
; return *this; } 
 110     wxUniChar
& operator=(short int c
) { m_value 
= c
; return *this; } 
 111     wxUniChar
& operator=(unsigned short int c
) { m_value 
= c
; return *this; } 
 113     // Comparison operators: 
 115     // define the given comparison operator for all the types 
 116 #define wxDEFINE_UNICHAR_OPERATOR(op)                                         \ 
 117     bool operator op(const wxUniChar& c) const { return m_value op c.m_value; }\ 
 118     bool operator op(char c) const { return m_value op From8bit(c); }         \ 
 119     bool operator op(unsigned char c) const { return m_value op From8bit((char)c); } \ 
 120     wxIF_WCHAR_T_TYPE( bool operator op(wchar_t c) const { return m_value op (value_type)c; } )    \ 
 121     bool operator op(int c) const { return m_value op (value_type)c; }        \ 
 122     bool operator op(unsigned int c) const { return m_value op (value_type)c; }        \ 
 123     bool operator op(short int c) const { return m_value op (value_type)c; }  \ 
 124     bool operator op(unsigned short int c) const { return m_value op (value_type)c; }  \ 
 125     bool operator op(long int c) const { return m_value op (value_type)c; }   \ 
 126     bool operator op(unsigned long int c) const { return m_value op (value_type)c; } 
 128     wxFOR_ALL_COMPARISONS(wxDEFINE_UNICHAR_OPERATOR
) 
 130 #undef wxDEFINE_UNICHAR_OPERATOR 
 132     // this is needed for expressions like 'Z'-c 
 133     int operator-(const wxUniChar
& c
) const { return m_value 
- c
.m_value
; } 
 134     int operator-(char c
) const { return m_value 
- From8bit(c
); } 
 135     int operator-(unsigned char c
) const { return m_value 
- From8bit((char)c
); } 
 136     int operator-(wchar_t c
) const { return m_value 
- (value_type
)c
; } 
 140     // notice that we implement these functions inline for 7-bit ASCII 
 141     // characters purely for performance reasons 
 142     static value_type 
From8bit(char c
) 
 145         if ( (unsigned char)c 
< 0x80 ) 
 148         return FromHi8bit(c
); 
 154     static char To8bit(value_type c
) 
 166     // helpers of the functions above called to deal with non-ASCII chars 
 167     static value_type 
FromHi8bit(char c
); 
 168     static char ToHi8bit(value_type c
); 
 175 // Writeable reference to a character in wxString. 
 177 // This class can be used in the same way wxChar is used, except that changing 
 178 // its value updates the underlying string object. 
 179 class WXDLLIMPEXP_BASE wxUniCharRef
 
 182     typedef wxStringImpl::iterator iterator
; 
 184     // create the reference 
 185 #if wxUSE_UNICODE_UTF8 
 186     wxUniCharRef(wxStringIteratorNode
& node
, iterator pos
) : m_node(node
), m_pos(pos
) {} 
 188     wxUniCharRef(iterator pos
) : m_pos(pos
) {} 
 192     // NB: we have to make this public, because we don't have wxString 
 193     //     declaration available here and so can't declare wxString::iterator 
 194     //     as friend; so at least don't use a ctor but a static function 
 195     //     that must be used explicitly (this is more than using 'explicit' 
 196     //     keyword on ctor!): 
 197 #if wxUSE_UNICODE_UTF8 
 198     static wxUniCharRef 
CreateForString(wxStringIteratorNode
& node
, iterator pos
) 
 199         { return wxUniCharRef(node
, pos
); } 
 201     static wxUniCharRef 
CreateForString(iterator pos
) 
 202         { return wxUniCharRef(pos
); } 
 205     wxUniChar::value_type 
GetValue() const { return UniChar().GetValue(); } 
 207 #if wxUSE_UNICODE_UTF8 
 208     wxUniChar::Utf8CharBuffer 
AsUTF8() const { return UniChar().AsUTF8(); } 
 209 #endif // wxUSE_UNICODE_UTF8 
 211     bool IsAscii() const { return UniChar().IsAscii(); } 
 213     // Assignment operators: 
 214 #if wxUSE_UNICODE_UTF8 
 215     wxUniCharRef
& operator=(const wxUniChar
& c
); 
 217     wxUniCharRef
& operator=(const wxUniChar
& c
) { *m_pos 
= c
; return *this; } 
 220     wxUniCharRef
& operator=(const wxUniCharRef
& c
) 
 221         { if (&c 
!= this) *this = c
.UniChar(); return *this; } 
 223     wxUniCharRef
& operator=(char c
) { return *this = wxUniChar(c
); } 
 224     wxUniCharRef
& operator=(unsigned char c
) { return *this = wxUniChar(c
); } 
 225 #if wxWCHAR_T_IS_REAL_TYPE 
 226     wxUniCharRef
& operator=(wchar_t c
) { return *this = wxUniChar(c
); } 
 228     wxUniCharRef
& operator=(int c
) { return *this = wxUniChar(c
); } 
 229     wxUniCharRef
& operator=(unsigned int c
) { return *this = wxUniChar(c
); } 
 230     wxUniCharRef
& operator=(short int c
) { return *this = wxUniChar(c
); } 
 231     wxUniCharRef
& operator=(unsigned short int c
) { return *this = wxUniChar(c
); } 
 232     wxUniCharRef
& operator=(long int c
) { return *this = wxUniChar(c
); } 
 233     wxUniCharRef
& operator=(unsigned long int c
) { return *this = wxUniChar(c
); } 
 235     // Conversions to the same types as wxUniChar is convertible too: 
 236     operator char() const { return UniChar(); } 
 237     operator unsigned char() const { return UniChar(); } 
 238 #if wxWCHAR_T_IS_REAL_TYPE 
 239     operator wchar_t() const { return UniChar(); } 
 241     operator int() const { return UniChar(); } 
 242     operator unsigned int() const { return UniChar(); } 
 243     operator short int() const { return UniChar(); } 
 244     operator unsigned short int() const { return UniChar(); } 
 245     operator long int() const { return UniChar(); } 
 246     operator unsigned long int() const { return UniChar(); } 
 248     // see wxUniChar::operator bool etc. for explanation 
 249     operator bool() const { return (bool)UniChar(); } 
 250     bool operator!() const { return !UniChar(); } 
 251     bool operator&&(bool v
) const { return UniChar() && v
; } 
 253     // Comparison operators: 
 254 #define wxDEFINE_UNICHARREF_OPERATOR(op)                                      \ 
 255     bool operator op(const wxUniCharRef& c) const { return UniChar() op c.UniChar(); }\ 
 256     bool operator op(const wxUniChar& c) const { return UniChar() op c; }     \ 
 257     bool operator op(char c) const { return UniChar() op c; }                 \ 
 258     bool operator op(unsigned char c) const { return UniChar() op c; }        \ 
 259     wxIF_WCHAR_T_TYPE( bool operator op(wchar_t c) const { return UniChar() op c; } ) \ 
 260     bool operator op(int c) const { return UniChar() op c; }                  \ 
 261     bool operator op(unsigned int c) const { return UniChar() op c; }         \ 
 262     bool operator op(short int c) const { return UniChar() op c; }             \ 
 263     bool operator op(unsigned short int c) const { return UniChar() op c; }    \ 
 264     bool operator op(long int c) const { return UniChar() op c; }             \ 
 265     bool operator op(unsigned long int c) const { return UniChar() op c; } 
 267     wxFOR_ALL_COMPARISONS(wxDEFINE_UNICHARREF_OPERATOR
) 
 269 #undef wxDEFINE_UNICHARREF_OPERATOR 
 271     // for expressions like c-'A': 
 272     int operator-(const wxUniCharRef
& c
) const { return UniChar() - c
.UniChar(); } 
 273     int operator-(const wxUniChar
& c
) const { return UniChar() - c
; } 
 274     int operator-(char c
) const { return UniChar() - c
; } 
 275     int operator-(unsigned char c
) const { return UniChar() - c
; } 
 276     int operator-(wchar_t c
) const { return UniChar() - c
; } 
 279 #if wxUSE_UNICODE_UTF8 
 280     wxUniChar 
UniChar() const; 
 282     wxUniChar 
UniChar() const { return *m_pos
; } 
 285     friend class WXDLLIMPEXP_FWD_BASE wxUniChar
; 
 288     // reference to the string and pointer to the character in string 
 289 #if wxUSE_UNICODE_UTF8 
 290     wxStringIteratorNode
& m_node
; 
 295 inline wxUniChar::wxUniChar(const wxUniCharRef
& c
) 
 297     m_value 
= c
.UniChar().m_value
; 
 300 inline wxUniChar
& wxUniChar::operator=(const wxUniCharRef
& c
) 
 302     m_value 
= c
.UniChar().m_value
; 
 306 // Comparison operators for the case when wxUniChar(Ref) is the second operand 
 307 // implemented in terms of member comparison functions 
 309 #define wxCMP_REVERSE(c1, c2, op) c2 op c1 
 311 wxDEFINE_COMPARISONS(char, const wxUniChar
&, wxCMP_REVERSE
) 
 312 wxDEFINE_COMPARISONS(char, const wxUniCharRef
&, wxCMP_REVERSE
) 
 314 wxDEFINE_COMPARISONS(wchar_t, const wxUniChar
&, wxCMP_REVERSE
) 
 315 wxDEFINE_COMPARISONS(wchar_t, const wxUniCharRef
&, wxCMP_REVERSE
) 
 317 wxDEFINE_COMPARISONS(const wxUniChar
&, const wxUniCharRef
&, wxCMP_REVERSE
) 
 321 // for expressions like c-'A': 
 322 inline int operator-(char c1
, const wxUniCharRef
& c2
) { return -(c2 
- c1
); } 
 323 inline int operator-(const wxUniChar
& c1
, const wxUniCharRef
& c2
) { return -(c2 
- c1
); } 
 324 inline int operator-(wchar_t c1
, const wxUniCharRef
& c2
) { return -(c2 
- c1
); } 
 326 #endif /* _WX_UNICHAR_H_ */