X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/2e2cf78dc1f2db0caa8b982a072e88d13c8303cf..7344108e8a129a3f9b4df5ab0f98a1713db03b89:/include/wx/convauto.h?ds=sidebyside diff --git a/include/wx/convauto.h b/include/wx/convauto.h index 89682525ef..25019a55e5 100644 --- a/include/wx/convauto.h +++ b/include/wx/convauto.h @@ -12,24 +12,63 @@ #define _WX_CONVAUTO_H_ #include "wx/strconv.h" - -#if wxUSE_WCHAR_T +#include "wx/fontenc.h" // ---------------------------------------------------------------------------- // wxConvAuto: uses BOM to automatically detect input encoding // ---------------------------------------------------------------------------- +// All currently recognized BOM values. +enum wxBOM +{ + wxBOM_Unknown = -1, + wxBOM_None, + wxBOM_UTF32BE, + wxBOM_UTF32LE, + wxBOM_UTF16BE, + wxBOM_UTF16LE, + wxBOM_UTF8 +}; + class WXDLLIMPEXP_BASE wxConvAuto : public wxMBConv { public: // default ctor, the real conversion will be created on demand - wxConvAuto() { m_conv = NULL; /* the rest will be initialized later */ } + wxConvAuto(wxFontEncoding enc = wxFONTENCODING_DEFAULT) + { + Init(); + + m_encDefault = enc; + } // copy ctor doesn't initialize anything neither as conversion can only be // deduced on first use - wxConvAuto(const wxConvAuto& WXUNUSED(other)) : wxMBConv() { m_conv = NULL; } + wxConvAuto(const wxConvAuto& other) : wxMBConv() + { + Init(); + + m_encDefault = other.m_encDefault; + } + + virtual ~wxConvAuto() + { + if ( m_ownsConv ) + delete m_conv; + } + + // get/set the fall-back encoding used when the input text doesn't have BOM + // and isn't UTF-8 + // + // special values are wxFONTENCODING_MAX meaning not to use any fall back + // at all (but just fail to convert in this case) and wxFONTENCODING_SYSTEM + // meaning to use the encoding of the system locale + static wxFontEncoding GetFallbackEncoding() { return ms_defaultMBEncoding; } + static void SetFallbackEncoding(wxFontEncoding enc); + static void DisableFallbackEncoding() + { + SetFallbackEncoding(wxFONTENCODING_MAX); + } - virtual ~wxConvAuto() { if ( m_conv && m_ownsConv ) delete m_conv; } // override the base class virtual function(s) to use our m_conv virtual size_t ToWChar(wchar_t *dst, size_t dstLen, @@ -42,46 +81,64 @@ public: virtual wxMBConv *Clone() const { return new wxConvAuto(*this); } -private: - // all currently recognized BOM values - enum BOMType + // return the BOM type of this buffer + static wxBOM DetectBOM(const char *src, size_t srcLen); + + // return the characters composing the given BOM. + static const char* GetBOMChars(wxBOM bomType, size_t* count); + + wxBOM GetBOM() const { - BOM_None, - BOM_UTF32BE, - BOM_UTF32LE, - BOM_UTF16BE, - BOM_UTF16LE, - BOM_UTF8 - }; + return m_bomType; + } - // return the BOM type of this buffer - static BOMType DetectBOM(const char *src, size_t srcLen); +private: + // common part of all ctors + void Init() + { + // We don't initialize m_encDefault here as different ctors do it + // differently. + m_conv = NULL; + m_bomType = wxBOM_Unknown; + m_ownsConv = false; + m_consumedBOM = false; + } - // initialize m_conv with the conversion to use by default (UTF-8) - void InitWithDefault() + // initialize m_conv with the UTF-8 conversion + void InitWithUTF8() { m_conv = &wxConvUTF8; m_ownsConv = false; } // create the correct conversion object for the given BOM type - void InitFromBOM(BOMType bomType); + void InitFromBOM(wxBOM bomType); // create the correct conversion object for the BOM present in the - // beginning of the buffer; adjust the buffer to skip the BOM if found - void InitFromInput(const char **src, size_t *len); + // beginning of the buffer + // + // return false if the buffer is too short to allow us to determine if we + // have BOM or not + bool InitFromInput(const char *src, size_t len); // adjust src and len to skip over the BOM (identified by m_bomType) at the // start of the buffer void SkipBOM(const char **src, size_t *len) const; + // fall-back multibyte encoding to use, may be wxFONTENCODING_SYSTEM or + // wxFONTENCODING_MAX but not wxFONTENCODING_DEFAULT + static wxFontEncoding ms_defaultMBEncoding; + // conversion object which we really use, NULL until the first call to // either ToWChar() or FromWChar() wxMBConv *m_conv; + // the multibyte encoding to use by default if input isn't Unicode + wxFontEncoding m_encDefault; + // our BOM type - BOMType m_bomType; + wxBOM m_bomType; // true if we allocated m_conv ourselves, false if we just use an existing // global conversion @@ -92,10 +149,8 @@ private: bool m_consumedBOM; - DECLARE_NO_ASSIGN_CLASS(wxConvAuto); + wxDECLARE_NO_ASSIGN_CLASS(wxConvAuto); }; -#endif // wxUSE_WCHAR_T - #endif // _WX_CONVAUTO_H_