]>
git.saurik.com Git - wxWidgets.git/blob - src/common/convauto.cpp
   1 /////////////////////////////////////////////////////////////////////////////// 
   2 // Name:        src/common/convauto.cpp 
   3 // Purpose:     implementation of wxConvAuto 
   4 // Author:      Vadim Zeitlin 
   7 // Copyright:   (c) 2006 Vadim Zeitlin <vadim@wxwindows.org> 
   8 // Licence:     wxWindows licence 
   9 /////////////////////////////////////////////////////////////////////////////// 
  11 // ============================================================================ 
  13 // ============================================================================ 
  15 // ---------------------------------------------------------------------------- 
  17 // ---------------------------------------------------------------------------- 
  19 // for compilers that support precompilation, includes "wx.h". 
  20 #include "wx/wxprec.h" 
  31 #include "wx/convauto.h" 
  33 // we use latin1 by default as it seems the least bad choice: the files we need 
  34 // to detect input of don't always come from the user system (they are often 
  35 // received from other machines) and so using wxFONTENCODING_SYSTEM doesn't 
  36 // seem to be a good idea and there is no other reasonable alternative 
  37 wxFontEncoding 
wxConvAuto::ms_defaultMBEncoding 
= wxFONTENCODING_ISO8859_1
; 
  39 // ============================================================================ 
  41 // ============================================================================ 
  44 void wxConvAuto::SetFallbackEncoding(wxFontEncoding enc
) 
  46     wxASSERT_MSG( enc 
!= wxFONTENCODING_DEFAULT
, 
  47                   _T("wxFONTENCODING_DEFAULT doesn't make sense here") ); 
  49     ms_defaultMBEncoding 
= enc
; 
  53 wxConvAuto::BOMType 
wxConvAuto::DetectBOM(const char *src
, size_t srcLen
) 
  57         // minimal BOM is 2 bytes so bail out immediately and simplify the code 
  58         // below which wouldn't need to check for length for UTF-16 cases 
  62     // examine the buffer for BOM presence 
  64     // see http://www.unicode.org/faq/utf_bom.html#BOM 
  68             // could only be big endian UTF-32 (00 00 FE FF) 
  79             // could only be big endian UTF-16 (FE FF) 
  80             if ( *src
++ == '\xff' ) 
  87             // could be either little endian UTF-16 or UTF-32, both start 
  89             if ( *src
++ == '\xfe' ) 
  91                 return srcLen 
>= 4 && src
[0] == '\0' && src
[1] == '\0' 
  98             // is this UTF-8 BOM (EF BB BF)? 
  99             if ( srcLen 
>= 3 && src
[0] == '\xbb' && src
[1] == '\xbf' ) 
 109 void wxConvAuto::InitFromBOM(BOMType bomType
) 
 111     m_consumedBOM 
= false; 
 116             m_conv 
= new wxMBConvUTF32BE
; 
 121             m_conv 
= new wxMBConvUTF32LE
; 
 126             m_conv 
= new wxMBConvUTF16BE
; 
 131             m_conv 
= new wxMBConvUTF16LE
; 
 140             wxFAIL_MSG( _T("unexpected BOM type") ); 
 141             // fall through: still need to create something 
 145             m_consumedBOM 
= true; // as there is nothing to consume 
 149 void wxConvAuto::SkipBOM(const char **src
, size_t *len
) const 
 169             wxFAIL_MSG( _T("unexpected BOM type") ); 
 170             // fall through: still need to create something 
 177     if ( *len 
!= (size_t)-1 ) 
 181 void wxConvAuto::InitFromInput(const char **src
, size_t *len
) 
 183     m_bomType 
= DetectBOM(*src
, *len
); 
 184     InitFromBOM(m_bomType
); 
 189 wxConvAuto::ToWChar(wchar_t *dst
, size_t dstLen
, 
 190                     const char *src
, size_t srcLen
) const 
 192     // we check BOM and create the appropriate conversion the first time we're 
 193     // called but we also need to ensure that the BOM is skipped not only 
 194     // during this initial call but also during the first call with non-NULL 
 195     // dst as typically we're first called with NULL dst to calculate the 
 196     // needed buffer size 
 197     wxConvAuto 
*self 
= wx_const_cast(wxConvAuto 
*, this); 
 200         self
->InitFromInput(&src
, &srcLen
); 
 202             self
->m_consumedBOM 
= true; 
 205     if ( !m_consumedBOM 
&& dst 
) 
 207         self
->m_consumedBOM 
= true; 
 208         SkipBOM(&src
, &srcLen
); 
 211     // try to convert using the auto-detected encoding 
 212     size_t rc 
= m_conv
->ToWChar(dst
, dstLen
, src
, srcLen
); 
 213     if ( rc 
== wxCONV_FAILED 
&& m_bomType 
== BOM_None 
) 
 215         // if the conversion failed but we didn't really detect anything and 
 216         // simply tried UTF-8 by default, retry it using the fall-back 
 217         if ( m_encDefault 
!= wxFONTENCODING_MAX 
) 
 222             self
->m_conv 
= new wxCSConv(m_encDefault 
== wxFONTENCODING_DEFAULT
 
 223                                             ? GetFallbackEncoding() 
 225             self
->m_ownsConv 
= true; 
 227             rc 
= m_conv
->ToWChar(dst
, dstLen
, src
, srcLen
); 
 235 wxConvAuto::FromWChar(char *dst
, size_t dstLen
, 
 236                       const wchar_t *src
, size_t srcLen
) const 
 240         // default to UTF-8 for the multibyte output 
 241         wx_const_cast(wxConvAuto 
*, this)->InitWithUTF8(); 
 244     return m_conv
->FromWChar(dst
, dstLen
, src
, srcLen
); 
 247 #endif // wxUSE_WCHAR_T