X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/8d94819c437fdf28c45e9f328e6c38fd1c639ddf..41fec01fa9e009d84fe353aa494725328d850af1:/src/common/convauto.cpp?ds=sidebyside diff --git a/src/common/convauto.cpp b/src/common/convauto.cpp index 8d8c24c0a3..3fcccd849c 100644 --- a/src/common/convauto.cpp +++ b/src/common/convauto.cpp @@ -23,10 +23,6 @@ #pragma hdrstop #endif -#ifndef WX_PRECOMP - #include "wx/wx.h" -#endif //WX_PRECOMP - #include "wx/convauto.h" // we use latin1 by default as it seems the least bad choice: the files we need @@ -35,6 +31,17 @@ // seem to be a good idea and there is no other reasonable alternative wxFontEncoding wxConvAuto::ms_defaultMBEncoding = wxFONTENCODING_ISO8859_1; +namespace +{ + +const char BOM_UTF32BE[] = { '\x00', '\x00', '\xFE', '\xFF' }; +const char BOM_UTF32LE[] = { '\xFF', '\xFE', '\x00', '\x00' }; +const char BOM_UTF16BE[] = { '\xFE', '\xFF' }; +const char BOM_UTF16LE[] = { '\xFF', '\xFE' }; +const char BOM_UTF8[] = { '\xEF', '\xBB', '\xBF' }; + +} // anonymous namespace + // ============================================================================ // implementation // ============================================================================ @@ -49,7 +56,29 @@ void wxConvAuto::SetFallbackEncoding(wxFontEncoding enc) } /* static */ -wxConvAuto::BOMType wxConvAuto::DetectBOM(const char *src, size_t srcLen) +const char* wxConvAuto::GetBOMChars(wxBOM bom, size_t* count) +{ + wxCHECK_MSG( count , NULL, wxS("count pointer must be provided") ); + + switch ( bom ) + { + case wxBOM_UTF32BE: *count = WXSIZEOF(BOM_UTF32BE); return BOM_UTF32BE; + case wxBOM_UTF32LE: *count = WXSIZEOF(BOM_UTF32LE); return BOM_UTF32LE; + case wxBOM_UTF16BE: *count = WXSIZEOF(BOM_UTF16BE); return BOM_UTF16BE; + case wxBOM_UTF16LE: *count = WXSIZEOF(BOM_UTF16LE); return BOM_UTF16LE; + case wxBOM_UTF8 : *count = WXSIZEOF(BOM_UTF8 ); return BOM_UTF8; + case wxBOM_Unknown: + case wxBOM_None: + wxFAIL_MSG( wxS("Invalid BOM type") ); + return NULL; + } + + wxFAIL_MSG( wxS("Unknown BOM type") ); + return NULL; +} + +/* static */ +wxBOM wxConvAuto::DetectBOM(const char *src, size_t srcLen) { // examine the buffer for BOM presence // @@ -69,14 +98,14 @@ wxConvAuto::BOMType wxConvAuto::DetectBOM(const char *src, size_t srcLen) switch ( srcLen ) { case 0: - return BOM_Unknown; + return wxBOM_Unknown; case 1: if ( src[0] == '\x00' || src[0] == '\xFF' || src[0] == '\xFE' || src[0] == '\xEF') { // this could be a BOM but we don't know yet - return BOM_Unknown; + return wxBOM_Unknown; } break; @@ -85,22 +114,22 @@ wxConvAuto::BOMType wxConvAuto::DetectBOM(const char *src, size_t srcLen) if ( src[0] == '\xEF' && src[1] == '\xBB' ) { if ( srcLen == 3 ) - return src[2] == '\xBF' ? BOM_UTF8 : BOM_None; + return src[2] == '\xBF' ? wxBOM_UTF8 : wxBOM_None; - return BOM_Unknown; + return wxBOM_Unknown; } if ( src[0] == '\xFE' && src[1] == '\xFF' ) - return BOM_UTF16BE; + return wxBOM_UTF16BE; if ( src[0] == '\xFF' && src[1] == '\xFE' ) { // if the next byte is 0, it could be an UTF-32LE BOM but if it // isn't we can be sure it's UTF-16LE if ( srcLen == 3 && src[2] != '\x00' ) - return BOM_UTF16LE; + return wxBOM_UTF16LE; - return BOM_Unknown; + return wxBOM_Unknown; } if ( src[0] == '\x00' && src[1] == '\x00' ) @@ -108,9 +137,9 @@ wxConvAuto::BOMType wxConvAuto::DetectBOM(const char *src, size_t srcLen) // this could only be UTF-32BE, check that the data we have so // far allows for it if ( srcLen == 3 && src[2] != '\xFE' ) - return BOM_None; + return wxBOM_None; - return BOM_Unknown; + return wxBOM_Unknown; } break; @@ -118,61 +147,61 @@ wxConvAuto::BOMType wxConvAuto::DetectBOM(const char *src, size_t srcLen) // we have at least 4 characters so we may finally decide whether // we have a BOM or not if ( src[0] == '\xEF' && src[1] == '\xBB' && src[2] == '\xBF' ) - return BOM_UTF8; + return wxBOM_UTF8; if ( src[0] == '\x00' && src[1] == '\x00' && src[2] == '\xFE' && src[3] == '\xFF' ) - return BOM_UTF32BE; + return wxBOM_UTF32BE; if ( src[0] == '\xFF' && src[1] == '\xFE' && src[2] == '\x00' && src[3] == '\x00' ) - return BOM_UTF32LE; + return wxBOM_UTF32LE; if ( src[0] == '\xFE' && src[1] == '\xFF' ) - return BOM_UTF16BE; + return wxBOM_UTF16BE; if ( src[0] == '\xFF' && src[1] == '\xFE' ) - return BOM_UTF16LE; + return wxBOM_UTF16LE; } - return BOM_None; + return wxBOM_None; } -void wxConvAuto::InitFromBOM(BOMType bomType) +void wxConvAuto::InitFromBOM(wxBOM bomType) { m_consumedBOM = false; switch ( bomType ) { - case BOM_Unknown: + case wxBOM_Unknown: wxFAIL_MSG( "shouldn't be called for this BOM type" ); break; - case BOM_None: + case wxBOM_None: // use the default break; - case BOM_UTF32BE: + case wxBOM_UTF32BE: m_conv = new wxMBConvUTF32BE; m_ownsConv = true; break; - case BOM_UTF32LE: + case wxBOM_UTF32LE: m_conv = new wxMBConvUTF32LE; m_ownsConv = true; break; - case BOM_UTF16BE: + case wxBOM_UTF16BE: m_conv = new wxMBConvUTF16BE; m_ownsConv = true; break; - case BOM_UTF16LE: + case wxBOM_UTF16LE: m_conv = new wxMBConvUTF16LE; m_ownsConv = true; break; - case BOM_UTF8: + case wxBOM_UTF8: InitWithUTF8(); break; @@ -195,25 +224,25 @@ void wxConvAuto::SkipBOM(const char **src, size_t *len) const int ofs; switch ( m_bomType ) { - case BOM_Unknown: + case wxBOM_Unknown: wxFAIL_MSG( "shouldn't be called for this BOM type" ); return; - case BOM_None: + case wxBOM_None: ofs = 0; break; - case BOM_UTF32BE: - case BOM_UTF32LE: + case wxBOM_UTF32BE: + case wxBOM_UTF32LE: ofs = 4; break; - case BOM_UTF16BE: - case BOM_UTF16LE: + case wxBOM_UTF16BE: + case wxBOM_UTF16LE: ofs = 2; break; - case BOM_UTF8: + case wxBOM_UTF8: ofs = 3; break; @@ -229,8 +258,8 @@ void wxConvAuto::SkipBOM(const char **src, size_t *len) const bool wxConvAuto::InitFromInput(const char *src, size_t len) { - m_bomType = DetectBOM(src, len); - if ( m_bomType == BOM_Unknown ) + m_bomType = DetectBOM(src, len == wxNO_LEN ? strlen(src) : len); + if ( m_bomType == wxBOM_Unknown ) return false; InitFromBOM(m_bomType); @@ -279,7 +308,7 @@ wxConvAuto::ToWChar(wchar_t *dst, size_t dstLen, // try to convert using the auto-detected encoding size_t rc = m_conv->ToWChar(dst, dstLen, src, srcLen); - if ( rc == wxCONV_FAILED && m_bomType == BOM_None ) + if ( rc == wxCONV_FAILED && m_bomType == wxBOM_None ) { // if the conversion failed but we didn't really detect anything and // simply tried UTF-8 by default, retry it using the fall-back