1. changed all "wxMBConv& conv" parameters to "const wxMBConv&"

author Vadim Zeitlin <vadim@wxwidgets.org>

Wed, 5 Apr 2006 14:37:47 +0000 (14:37 +0000)

committer Vadim Zeitlin <vadim@wxwidgets.org>

Wed, 5 Apr 2006 14:37:47 +0000 (14:37 +0000)
author Vadim Zeitlin <vadim@wxwidgets.org>
Wed, 5 Apr 2006 14:37:47 +0000 (14:37 +0000)
committer Vadim Zeitlin <vadim@wxwidgets.org>
Wed, 5 Apr 2006 14:37:47 +0000 (14:37 +0000)
diff --git a/build/bakefiles/files.bkl b/build/bakefiles/files.bkl

index 0c01325b7ffe2c604aabad59d3837819f2a38317..6257d1f853d61141e688cbf3ce7a07ac1728fb67 100644 (file)
--- a/build/bakefiles/files.bkl
+++ b/build/bakefiles/files.bkl
@@ -290,6 +290,7 @@ IMPORTANT: please read docs/tech/tn0016.txt before modifying this file!
      src/common/clntdata.cpp
      src/common/cmdline.cpp
      src/common/config.cpp
+    src/common/convauto.cpp
      src/common/datetime.cpp
      src/common/datstrm.cpp
      src/common/dircmn.cpp
@@ -362,6 +363,7 @@ IMPORTANT: please read docs/tech/tn0016.txt before modifying this file!
      wx/cmdline.h
      wx/confbase.h
      wx/config.h
+    wx/convauto.h
      wx/containr.h
      wx/datetime.h
      wx/datstrm.h
diff --git a/docs/changes.txt b/docs/changes.txt

index cc5d1829fb4e27e60f4dec95e9a8d9362ca6993e..e24ae287c864fbc388f1b056c3f3db8496c329c0 100644 (file)
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -27,6 +27,7 @@ Deprecated methods since 2.6.x and their replacements
  
  All:
  
+- wx(F)File, wxTextFile and wxInputStreams recognize Unicode BOM now
  - wxLaunchDefaultBrowser() now supports wxBROWSER_NEW_WINDOW flag.
  - Added wxStandardPaths::GetResourcesDir() and GetLocalizedResourcesDir()
  - Added wxStringTokenizer::GetLastDelimiter(); improved documentation.
diff --git a/include/wx/convauto.h b/include/wx/convauto.h

new file mode 100644 (file)

index 0000000..ec32b93
--- /dev/null
+++ b/include/wx/convauto.h
@@ -0,0 +1,99 @@
+///////////////////////////////////////////////////////////////////////////////
+// Name:        wx/convauto.h
+// Purpose:     wxConvAuto class declaration
+// Author:      Vadim Zeitlin
+// Created:     2006-04-03
+// RCS-ID:      $Id$
+// Copyright:   (c) 2006 Vadim Zeitlin
+// Licence:     wxWindows licence
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef _WX_CONVAUTO_H_
+#define _WX_CONVAUTO_H_
+
+#include "wx/strconv.h"
+
+#if wxUSE_WCHAR_T
+
+// ----------------------------------------------------------------------------
+// wxConvAuto: uses BOM to automatically detect input encoding
+// ----------------------------------------------------------------------------
+
+class WXDLLIMPEXP_BASE wxConvAuto : public wxMBConv
+{
+public:
+    // default ctor, the real conversion will be created on demand
+    wxConvAuto() { m_conv = NULL; /* the rest will be initialized later */ }
+
+    // copy ctor doesn't initialize anything neither as conversion can only be
+    // deduced on first use
+    wxConvAuto(const wxConvAuto& WXUNUSED(other)) { m_conv = NULL; }
+
+    virtual ~wxConvAuto() { if ( m_conv && m_ownsConv ) delete m_conv; }
+
+    // override the base class virtual function(s) to use our m_conv
+    virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
+                           const char *src, size_t srcLen = -1) const;
+
+    virtual size_t FromWChar(char *dst, size_t dstLen,
+                             const wchar_t *src, size_t srcLen = -1) const;
+
+    virtual size_t GetMBNulLen() const { return m_conv->GetMBNulLen(); }
+
+private:
+    // all currently recognized BOM values
+    enum BOMType
+    {
+        BOM_None,
+        BOM_UTF32BE,
+        BOM_UTF32LE,
+        BOM_UTF16BE,
+        BOM_UTF16LE,
+        BOM_UTF8
+    };
+
+    // return the BOM type of this buffer
+    static BOMType DetectBOM(const char *src, size_t srcLen);
+
+    // initialize m_conv with the conversion to use by default (UTF-8)
+    void InitWithDefault()
+    {
+        m_conv = &wxConvUTF8;
+        m_ownsConv = false;
+    }
+
+    // create the correct conversion object for the given BOM type
+    void InitFromBOM(BOMType bomType);
+
+    // create the correct conversion object for the BOM present in the
+    // beginning of the buffer; adjust the buffer to skip the BOM if found
+    void InitFromInput(const char **src, size_t *len);
+
+    // adjust src and len to skip over the BOM (identified by m_bomType) at the
+    // start of the buffer
+    void SkipBOM(const char **src, size_t *len) const;
+
+
+    // conversion object which we really use, NULL until the first call to
+    // either ToWChar() or FromWChar()
+    wxMBConv *m_conv;
+
+    // our BOM type
+    BOMType m_bomType;
+
+    // true if we allocated m_conv ourselves, false if we just use an existing
+    // global conversion
+    bool m_ownsConv;
+
+    // true if we already skipped BOM when converting (and not just calculating
+    // the size)
+    bool m_consumedBOM;
+
+
+    DECLARE_NO_ASSIGN_CLASS(wxConvAuto);
+};
+
+#endif // wxUSE_WCHAR_T
+
+#endif // _WX_CONVAUTO_H_
+
diff --git a/include/wx/datstrm.h b/include/wx/datstrm.h

index f49620a6aef16bf72355c8022dd7e634784cfe68..80fa7e285e1e02e7de4f48f98d5942e715cc8b99 100644 (file)
--- a/include/wx/datstrm.h
+++ b/include/wx/datstrm.h
@@ -14,7 +14,7 @@
  
  #include "wx/stream.h"
  #include "wx/longlong.h"
-#include "wx/strconv.h"
+#include "wx/convauto.h"
  
  #if wxUSE_STREAMS
  
@@ -22,7 +22,7 @@ class WXDLLIMPEXP_BASE wxDataInputStream
  {
  public:
  #if wxUSE_UNICODE
-    wxDataInputStream(wxInputStream& s, wxMBConv& conv = wxConvUTF8);
+    wxDataInputStream(wxInputStream& s, const wxMBConv& conv = wxConvAuto());
  #else
      wxDataInputStream(wxInputStream& s);
  #endif
@@ -83,7 +83,7 @@ protected:
      wxInputStream *m_input;
      bool m_be_order;
  #if wxUSE_UNICODE
-    wxMBConv& m_conv;
+    wxMBConv m_conv;
  #endif
  
      DECLARE_NO_COPY_CLASS(wxDataInputStream)
@@ -93,7 +93,7 @@ class WXDLLIMPEXP_BASE wxDataOutputStream
  {
  public:
  #if wxUSE_UNICODE
-    wxDataOutputStream(wxOutputStream& s, wxMBConv& conv = wxConvUTF8);
+    wxDataOutputStream(wxOutputStream& s, const wxMBConv& conv = wxConvAuto());
  #else
      wxDataOutputStream(wxOutputStream& s);
  #endif
@@ -157,7 +157,7 @@ protected:
      wxOutputStream *m_output;
      bool m_be_order;
  #if wxUSE_UNICODE
-    wxMBConv& m_conv;
+    wxMBConv m_conv;
  #endif
  
      DECLARE_NO_COPY_CLASS(wxDataOutputStream)
diff --git a/include/wx/ffile.h b/include/wx/ffile.h

index c254ed04ed9fcc27b5fe3a6f2a856cae319df8f1..58aca44eafa46d6341b6ebb0387aed30fa2130f5 100644 (file)
--- a/include/wx/ffile.h
+++ b/include/wx/ffile.h
@@ -18,6 +18,7 @@
  
  #include  "wx/string.h"
  #include  "wx/filefn.h"
+#include  "wx/convauto.h"
  
  #include <stdio.h>
  
@@ -54,14 +55,14 @@ public:
  
    // read/write (unbuffered)
      // read all data from the file into a string (useful for text files)
-  bool ReadAll(wxString *str, wxMBConv& conv = wxConvUTF8);
+  bool ReadAll(wxString *str, const wxMBConv& conv = wxConvAuto());
      // returns number of bytes read - use Eof() and Error() to see if an error
      // occurred or not
    size_t Read(void *pBuf, size_t nCount);
      // returns the number of bytes written
    size_t Write(const void *pBuf, size_t nCount);
      // returns true on success
-  bool Write(const wxString& s, wxMBConv& conv = wxConvUTF8)
+  bool Write(const wxString& s, const wxMBConv& conv = wxConvAuto())
    {
        const wxWX2MBbuf buf = s.mb_str(conv);
        size_t size = strlen(buf);
diff --git a/include/wx/file.h b/include/wx/file.h

index 243d2b83f83a63815c2819d80f8a9d1002a5f948..7f3f0e8184add500a6d9a4ecfae377a305c37676 100644 (file)
--- a/include/wx/file.h
+++ b/include/wx/file.h
@@ -97,7 +97,7 @@ public:
      // returns the number of bytes written
    size_t Write(const void *pBuf, size_t nCount);
      // returns true on success
-  bool Write(const wxString& s, wxMBConv& conv = wxConvUTF8)
+  bool Write(const wxString& s, const wxMBConv& conv = wxConvUTF8)
    {
        const wxWX2MBbuf buf = s.mb_str(conv);
        size_t size = strlen(buf);
@@ -172,7 +172,7 @@ public:
  
    // I/O (both functions return true on success, false on failure)
    bool Write(const void *p, size_t n) { return m_file.Write(p, n) == n; }
-  bool Write(const wxString& str, wxMBConv& conv = wxConvUTF8)
+  bool Write(const wxString& str, const wxMBConv& conv = wxConvUTF8)
      { return m_file.Write(str, conv); }
  
    // different ways to close the file
diff --git a/include/wx/fileconf.h b/include/wx/fileconf.h

index 79828da710737c55c6f529d4225444edb7dc073e..ebb4ab5b29a2cf3f364b62617ef3395955acc4a1 100644 (file)
--- a/include/wx/fileconf.h
+++ b/include/wx/fileconf.h
@@ -122,11 +122,11 @@ public:
                 const wxString& localFilename = wxEmptyString,
                 const wxString& globalFilename = wxEmptyString,
                 long style = wxCONFIG_USE_LOCAL_FILE | wxCONFIG_USE_GLOBAL_FILE,
-               wxMBConv& conv = wxConvUTF8);
+               const wxMBConv& conv = wxConvAuto());
  
  #if wxUSE_STREAMS
      // ctor that takes an input stream.
-  wxFileConfig(wxInputStream &inStream, wxMBConv& conv = wxConvUTF8);
+  wxFileConfig(wxInputStream &inStream, const wxMBConv& conv = wxConvAuto());
  #endif // wxUSE_STREAMS
  
      // dtor will save unsaved data
@@ -169,7 +169,7 @@ public:
    // save the entire config file text to the given stream, note that the text
    // won't be saved again in dtor when Flush() is called if you use this method
    // as it won't be "changed" any more
-  virtual bool Save(wxOutputStream& os, wxMBConv& conv = wxConvUTF8);
+  virtual bool Save(wxOutputStream& os, const wxMBConv& conv = wxConvAuto());
  #endif // wxUSE_STREAMS
  
  public:
@@ -227,7 +227,7 @@ private:
    wxFileConfigGroup *m_pRootGroup,      // the top (unnamed) group
                      *m_pCurrentGroup;   // the current group
  
-  wxMBConv   &m_conv;
+  wxMBConv    m_conv;
  
  #ifdef __UNIX__
    int m_umask;                          // the umask to use for file creation
diff --git a/include/wx/memtext.h b/include/wx/memtext.h

index aa2df7b63756336a77163e70144ff756524dc6e1..a093f40b040c5e305248e62e57d11b0344dfbba0 100644 (file)
--- a/include/wx/memtext.h
+++ b/include/wx/memtext.h
@@ -37,11 +37,11 @@ protected:
      virtual bool OnClose()
          { return true; }
  
-    virtual bool OnRead(wxMBConv& WXUNUSED(conv))
+    virtual bool OnRead(const wxMBConv& WXUNUSED(conv))
          { return true; }
  
      virtual bool OnWrite(wxTextFileType WXUNUSED(typeNew),
-                         wxMBConv& WXUNUSED(conv) = wxConvUTF8)
+                         const wxMBConv& WXUNUSED(conv) = wxConvUTF8)
          { return true; }
  
  private:
diff --git a/include/wx/string.h b/include/wx/string.h

index 5f9bdf042c988826ec8523211f5dc96bd2fc15b8..6cf19b5022d671cff06cd31e1fc885309af21541 100644 (file)
--- a/include/wx/string.h
+++ b/include/wx/string.h
@@ -325,7 +325,9 @@ public:
        { InitWith(psz, 0, npos); }
    wxStringBase(const wxChar *psz, size_t nLength)
        { InitWith(psz, 0, nLength); }
-  wxStringBase(const wxChar *psz, wxMBConv& WXUNUSED(conv), size_t nLength = npos)
+  wxStringBase(const wxChar *psz,
+               const wxMBConv& WXUNUSED(conv),
+               size_t nLength = npos)
        { InitWith(psz, 0, nLength); }
      // take nLen chars starting at nPos
    wxStringBase(const wxStringBase& str, size_t nPos, size_t nLen)
@@ -650,7 +652,9 @@ public:
        : wxStringBase(psz ? psz : wxT("")) { }
    wxString(const wxChar *psz, size_t nLength)
        : wxStringBase(psz, nLength) { }
-  wxString(const wxChar *psz, wxMBConv& WXUNUSED(conv), size_t nLength = npos)
+  wxString(const wxChar *psz,
+           const wxMBConv& WXUNUSED(conv),
+           size_t nLength = npos)
        : wxStringBase(psz, nLength == npos ? wxStrlen(psz) : nLength) { }
  
    // even if we're not built with wxUSE_STL == 1 it is very convenient to allow
@@ -666,7 +670,7 @@ public:
  
  #if wxUSE_UNICODE
      // from multibyte string
-  wxString(const char *psz, wxMBConv& conv, size_t nLength = npos);
+  wxString(const char *psz, const wxMBConv& conv, size_t nLength = npos);
      // from wxWCharBuffer (i.e. return from wxGetString)
    wxString(const wxWCharBuffer& psz) : wxStringBase(psz.data()) { }
  #else // ANSI
@@ -679,7 +683,9 @@ public:
  
  #if wxUSE_WCHAR_T
      // from wide (Unicode) string
-  wxString(const wchar_t *pwz, wxMBConv& conv = wxConvLibc, size_t nLength = npos);
+  wxString(const wchar_t *pwz,
+           const wxMBConv& conv = wxConvLibc,
+           size_t nLength = npos);
  #endif // !wxUSE_WCHAR_T
  
      // from wxCharBuffer
@@ -809,14 +815,14 @@ public:
      // type differs because a function may either return pointer to the buffer
      // directly or have to use intermediate buffer for translation.
  #if wxUSE_UNICODE
-    const wxCharBuffer mb_str(wxMBConv& conv = wxConvLibc) const;
+    const wxCharBuffer mb_str(const wxMBConv& conv = wxConvLibc) const;
  
      const wxWX2MBbuf mbc_str() const { return mb_str(*wxConvCurrent); }
  
      const wxChar* wc_str() const { return c_str(); }
  
      // for compatibility with !wxUSE_UNICODE version
-    const wxChar* wc_str(wxMBConv& WXUNUSED(conv)) const { return c_str(); }
+    const wxChar* wc_str(const wxMBConv& WXUNUSED(conv)) const { return c_str(); }
  
  #if wxMBFILES
      const wxCharBuffer fn_str() const { return mb_str(wxConvFile); }
@@ -827,12 +833,12 @@ public:
      const wxChar* mb_str() const { return c_str(); }
  
      // for compatibility with wxUSE_UNICODE version
-    const wxChar* mb_str(wxMBConv& WXUNUSED(conv)) const { return c_str(); }
+    const wxChar* mb_str(const wxMBConv& WXUNUSED(conv)) const { return c_str(); }
  
      const wxWX2MBbuf mbc_str() const { return mb_str(); }
  
  #if wxUSE_WCHAR_T
-    const wxWCharBuffer wc_str(wxMBConv& conv) const;
+    const wxWCharBuffer wc_str(const wxMBConv& conv) const;
  #endif // wxUSE_WCHAR_T
  #ifdef __WXOSX__
      const wxCharBuffer fn_str() const { return wxConvFile.cWC2WX( wc_str( wxConvLocal ) ); }
diff --git a/include/wx/textbuf.h b/include/wx/textbuf.h

index 5749dd4134f483da81658e354f69e83cadf5c8f8..f768a5632471fbc2f11fd508fbc9fa5b6b49f0c5 100644 (file)
--- a/include/wx/textbuf.h
+++ b/include/wx/textbuf.h
@@ -14,6 +14,7 @@
  
  #include "wx/defs.h"
  #include "wx/arrstr.h"
+#include "wx/convauto.h"
  
  // ----------------------------------------------------------------------------
  // constants
@@ -80,10 +81,10 @@ public:
      bool Create(const wxString& strBufferName);
  
      // Open() also loads buffer in memory on success
-    bool Open(wxMBConv& conv = wxConvUTF8);
+    bool Open(const wxMBConv& conv = wxConvAuto());
  
      // same as Open() but with (another) buffer name
-    bool Open(const wxString& strBufferName, wxMBConv& conv = wxConvUTF8);
+    bool Open(const wxString& strBufferName, const wxMBConv& conv = wxConvAuto());
  
      // closes the buffer and frees memory, losing all changes
      bool Close();
@@ -161,7 +162,7 @@ public:
      // change the buffer (default argument means "don't change type")
      // possibly in another format
      bool Write(wxTextFileType typeNew = wxTextFileType_None,
-               wxMBConv& conv = wxConvUTF8);
+               const wxMBConv& conv = wxConvAuto());
  
      // dtor
      virtual ~wxTextBuffer();
@@ -183,8 +184,8 @@ protected:
      virtual bool OnOpen(const wxString &strBufferName,
                          wxTextBufferOpenMode openmode) = 0;
      virtual bool OnClose() = 0;
-    virtual bool OnRead(wxMBConv& conv) = 0;
-    virtual bool OnWrite(wxTextFileType typeNew, wxMBConv& conv) = 0;
+    virtual bool OnRead(const wxMBConv& conv) = 0;
+    virtual bool OnWrite(wxTextFileType typeNew, const wxMBConv& conv) = 0;
  
      static wxString ms_eof;     // dummy string returned at EOF
      wxString m_strBufferName;   // name of the buffer
diff --git a/include/wx/textfile.h b/include/wx/textfile.h

index f5ceaedb26c3b88fe65b7369c69623e6208bddee..588aedd24ae40a1d732c9f40f0454e277a5d167f 100644 (file)
--- a/include/wx/textfile.h
+++ b/include/wx/textfile.h
@@ -39,8 +39,8 @@ protected:
      virtual bool OnOpen(const wxString &strBufferName,
                          wxTextBufferOpenMode OpenMode);
      virtual bool OnClose();
-    virtual bool OnRead(wxMBConv& conv);
-    virtual bool OnWrite(wxTextFileType typeNew, wxMBConv& conv);
+    virtual bool OnRead(const wxMBConv& conv);
+    virtual bool OnWrite(wxTextFileType typeNew, const wxMBConv& conv);
  
  private:
  
diff --git a/include/wx/txtstrm.h b/include/wx/txtstrm.h

index 6344c8ff6233934d0a8b218aef112a749fe6fe11..d8011a8e499cd3ede1f2a54e65b35e36a6365b20 100644 (file)
--- a/include/wx/txtstrm.h
+++ b/include/wx/txtstrm.h
@@ -13,6 +13,7 @@
  #define _WX_TXTSTREAM_H_
  
  #include "wx/stream.h"
+#include "wx/convauto.h"
  
  #if wxUSE_STREAMS
  
@@ -36,9 +37,11 @@ class WXDLLIMPEXP_BASE wxTextInputStream
  {
  public:
  #if wxUSE_UNICODE
-    wxTextInputStream(wxInputStream& s, const wxString &sep=wxT(" \t"), wxMBConv& conv = wxConvUTF8 );
+    wxTextInputStream(wxInputStream& s,
+                      const wxString &sep=wxT(" \t"),
+                      const wxMBConv& conv = wxConvAuto());
  #else
-    wxTextInputStream(wxInputStream& s, const wxString &sep=wxT(" \t") );
+    wxTextInputStream(wxInputStream& s, const wxString &sep=wxT(" \t"));
  #endif
      ~wxTextInputStream();
  
@@ -78,7 +81,7 @@ protected:
      char m_lastBytes[10]; // stores the bytes that were read for the last character
  
  #if wxUSE_UNICODE
-    wxMBConv &m_conv;
+    wxMBConv m_conv;
  #endif
  
      bool   EatEOL(const wxChar &c);
@@ -102,9 +105,11 @@ class WXDLLIMPEXP_BASE wxTextOutputStream
  {
  public:
  #if wxUSE_UNICODE
-    wxTextOutputStream( wxOutputStream& s, wxEOL mode = wxEOL_NATIVE, wxMBConv& conv = wxConvUTF8  );
+    wxTextOutputStream(wxOutputStream& s,
+                       wxEOL mode = wxEOL_NATIVE,
+                       const wxMBConv& conv = wxConvAuto());
  #else
-    wxTextOutputStream( wxOutputStream& s, wxEOL mode = wxEOL_NATIVE );
+    wxTextOutputStream(wxOutputStream& s, wxEOL mode = wxEOL_NATIVE);
  #endif
      virtual ~wxTextOutputStream();
  
@@ -139,7 +144,7 @@ protected:
      wxEOL           m_mode;
  
  #if wxUSE_UNICODE
-    wxMBConv &m_conv;
+    wxMBConv m_conv;
  #endif
  
      DECLARE_NO_COPY_CLASS(wxTextOutputStream)
diff --git a/src/common/convauto.cpp b/src/common/convauto.cpp

new file mode 100644 (file)

index 0000000..d43bb6d
--- /dev/null
+++ b/src/common/convauto.cpp
@@ -0,0 +1,214 @@
+///////////////////////////////////////////////////////////////////////////////
+// Name:        src/common/convauto.cpp
+// Purpose:     implementation of wxConvAuto
+// Author:      Vadim Zeitlin
+// Created:     2006-04-04
+// RCS-ID:      $Id$
+// Copyright:   (c) 2006 Vadim Zeitlin <vadim@wxwindows.org>
+// Licence:     wxWindows licence
+///////////////////////////////////////////////////////////////////////////////
+
+// ============================================================================
+// declarations
+// ============================================================================
+
+// ----------------------------------------------------------------------------
+// headers
+// ----------------------------------------------------------------------------
+
+// for compilers that support precompilation, includes "wx.h".
+#include "wx/wxprec.h"
+
+#ifdef __BORLANDC__
+    #pragma hdrstop
+#endif
+
+#if wxUSE_WCHAR_T
+
+#ifndef WX_PRECOMP
+#endif //WX_PRECOMP
+
+#include "wx/convauto.h"
+
+// ============================================================================
+// implementation
+// ============================================================================
+
+/* static */
+wxConvAuto::BOMType wxConvAuto::DetectBOM(const char *src, size_t srcLen)
+{
+    if ( srcLen < 2 )
+    {
+        // minimal BOM is 2 bytes so bail out immediately and simplify the code
+        // below which wouldn't need to check for length for UTF-16 cases
+        return BOM_None;
+    }
+
+    // examine the buffer for BOM presence
+    //
+    // see http://www.unicode.org/faq/utf_bom.html#BOM
+    switch ( *src++ )
+    {
+        case '\0':
+            // could only be big endian UTF-32 (00 00 FE FF)
+            if ( srcLen >= 4 &&
+                    src[0] == '\0' &&
+                        src[1] == '\xfe' &&
+                            src[2] == '\xff' )
+            {
+                return BOM_UTF32BE;
+            }
+            break;
+
+        case '\xfe':
+            // could only be big endian UTF-16 (FE FF)
+            if ( *src++ == '\xff' )
+            {
+                return BOM_UTF16BE;
+            }
+            break;
+
+        case '\xff':
+            // could be either little endian UTF-16 or UTF-32, both start
+            // with FF FE
+            if ( *src++ == '\xfe' )
+            {
+                return srcLen >= 4 && src[0] == '\0' && src[1] == '\0'
+                            ? BOM_UTF32LE
+                            : BOM_UTF16LE;
+            }
+            break;
+
+        case '\xef':
+            // is this UTF-8 BOM (EF BB BF)?
+            if ( srcLen >= 3 && src[0] == '\xbb' && src[1] == '\xbf' )
+            {
+                return BOM_UTF8;
+            }
+            break;
+    }
+
+    return BOM_None;
+}
+
+void wxConvAuto::InitFromBOM(BOMType bomType)
+{
+    m_consumedBOM = false;
+
+    switch ( bomType )
+    {
+        case BOM_UTF32BE:
+            m_conv = new wxMBConvUTF32BE;
+            m_ownsConv = true;
+            break;
+
+        case BOM_UTF32LE:
+            m_conv = new wxMBConvUTF32LE;
+            m_ownsConv = true;
+            break;
+
+        case BOM_UTF16BE:
+            m_conv = new wxMBConvUTF16BE;
+            m_ownsConv = true;
+            break;
+
+        case BOM_UTF16LE:
+            m_conv = new wxMBConvUTF16LE;
+            m_ownsConv = true;
+            break;
+
+        case BOM_UTF8:
+            m_conv = &wxConvUTF8;
+            m_ownsConv = false;
+            break;
+
+        default:
+            wxFAIL_MSG( _T("unexpected BOM type") );
+            // fall through: still need to create something
+
+        case BOM_None:
+            InitWithDefault();
+            m_consumedBOM = true; // as there is nothing to consume
+    }
+}
+
+void wxConvAuto::SkipBOM(const char **src, size_t *len) const
+{
+    int ofs;
+    switch ( m_bomType )
+    {
+        case BOM_UTF32BE:
+        case BOM_UTF32LE:
+            ofs = 4;
+            break;
+
+        case BOM_UTF16BE:
+        case BOM_UTF16LE:
+            ofs = 2;
+            break;
+
+        case BOM_UTF8:
+            ofs = 3;
+            break;
+
+        default:
+            wxFAIL_MSG( _T("unexpected BOM type") );
+            // fall through: still need to create something
+
+        case BOM_None:
+            ofs = 0;
+    }
+
+    *src += ofs;
+    if ( *len != (size_t)-1 )
+        *len -= ofs;
+}
+
+void wxConvAuto::InitFromInput(const char **src, size_t *len)
+{
+    m_bomType = DetectBOM(*src, *len);
+    InitFromBOM(m_bomType);
+    SkipBOM(src, len);
+}
+
+size_t
+wxConvAuto::ToWChar(wchar_t *dst, size_t dstLen,
+                    const char *src, size_t srcLen) const
+{
+    // we check BOM and create the appropriate conversion the first time we're
+    // called but we also need to ensure that the BOM is skipped not only
+    // during this initial call but also during the first call with non-NULL
+    // dst as typically we're first called with NULL dst to calculate the
+    // needed buffer size
+    wxConvAuto *self = wx_const_cast(wxConvAuto *, this);
+    if ( !m_conv )
+    {
+        self->InitFromInput(&src, &srcLen);
+        if ( dst )
+            self->m_consumedBOM = true;
+    }
+
+    if ( !m_consumedBOM && dst )
+    {
+        self->m_consumedBOM = true;
+        SkipBOM(&src, &srcLen);
+    }
+
+    return m_conv->ToWChar(dst, dstLen, src, srcLen);
+}
+
+size_t
+wxConvAuto::FromWChar(char *dst, size_t dstLen,
+                      const wchar_t *src, size_t srcLen) const
+{
+    if ( !m_conv )
+    {
+        // default to UTF-8 for the multibyte output
+        wx_const_cast(wxConvAuto *, this)->InitWithDefault();
+    }
+
+    return m_conv->FromWChar(dst, dstLen, src, srcLen);
+}
+
+#endif // wxUSE_WCHAR_T
+
diff --git a/src/common/datstrm.cpp b/src/common/datstrm.cpp

index a1961c3f021af8ce810f9b1f1efd78bb08574006..67a2e458050716b9e217faccfdafc981671df2b2 100644 (file)
--- a/src/common/datstrm.cpp
+++ b/src/common/datstrm.cpp
@@ -26,7 +26,7 @@
  // ---------------------------------------------------------------------------
  
  #if wxUSE_UNICODE
-wxDataInputStream::wxDataInputStream(wxInputStream& s, wxMBConv& conv)
+wxDataInputStream::wxDataInputStream(wxInputStream& s, const wxMBConv& conv)
    : m_input(&s), m_be_order(false), m_conv(conv)
  #else
  wxDataInputStream::wxDataInputStream(wxInputStream& s)
@@ -445,7 +445,7 @@ wxDataInputStream& wxDataInputStream::operator>>(float& f)
  // ---------------------------------------------------------------------------
  
  #if wxUSE_UNICODE
-wxDataOutputStream::wxDataOutputStream(wxOutputStream& s, wxMBConv& conv)
+wxDataOutputStream::wxDataOutputStream(wxOutputStream& s, const wxMBConv& conv)
    : m_output(&s), m_be_order(false), m_conv(conv)
  #else
  wxDataOutputStream::wxDataOutputStream(wxOutputStream& s)
diff --git a/src/common/ffile.cpp b/src/common/ffile.cpp

index d34756376a59073b83fac9034a7afde8c07d7aba..997bf03d2b56a5bb7a3fbb2cde74a9a5d7b72865 100644 (file)
--- a/src/common/ffile.cpp
+++ b/src/common/ffile.cpp
@@ -103,7 +103,7 @@ bool wxFFile::Close()
  // read/write
  // ----------------------------------------------------------------------------
  
-bool wxFFile::ReadAll(wxString *str, wxMBConv& conv)
+bool wxFFile::ReadAll(wxString *str, const wxMBConv& conv)
  {
      wxCHECK_MSG( str, false, wxT("invalid parameter") );
      wxCHECK_MSG( IsOpened(), false, wxT("can't read from closed file") );
diff --git a/src/common/fileconf.cpp b/src/common/fileconf.cpp

index d492e6466d0baee4b1ab34db01063b91d0b69f37..5e84cf2c415d4d1a310e47690fc9ffec73ff919e 100644 (file)
--- a/src/common/fileconf.cpp
+++ b/src/common/fileconf.cpp
@@ -426,7 +426,8 @@ void wxFileConfig::Init()
  // constructor supports creation of wxFileConfig objects of any type
  wxFileConfig::wxFileConfig(const wxString& appName, const wxString& vendorName,
                             const wxString& strLocal, const wxString& strGlobal,
-                           long style, wxMBConv& conv)
+                           long style,
+                           const wxMBConv& conv)
              : wxConfigBase(::GetAppName(appName), vendorName,
                             strLocal, strGlobal,
                             style),
@@ -474,7 +475,7 @@ wxFileConfig::wxFileConfig(const wxString& appName, const wxString& vendorName,
  
  #if wxUSE_STREAMS
  
-wxFileConfig::wxFileConfig(wxInputStream &inStream, wxMBConv& conv)
+wxFileConfig::wxFileConfig(wxInputStream &inStream, const wxMBConv& conv)
              : m_conv(conv)
  {
      // always local_file when this constructor is called (?)
@@ -1036,7 +1037,7 @@ bool wxFileConfig::Flush(bool /* bCurrentOnly */)
  
  #if wxUSE_STREAMS
  
-bool wxFileConfig::Save(wxOutputStream& os, wxMBConv& conv)
+bool wxFileConfig::Save(wxOutputStream& os, const wxMBConv& conv)
  {
      // save unconditionally, even if not dirty
      for ( wxFileConfigLineList *p = m_linesHead; p != NULL; p = p->Next() )
diff --git a/src/common/strconv.cpp b/src/common/strconv.cpp

index 01e0dc358c37bbf375ff837743ef4df34cc1da92..3670582926b24045b9157a5bcfcd301db44944fb 100644 (file)
--- a/src/common/strconv.cpp
+++ b/src/common/strconv.cpp
@@ -203,21 +203,16 @@ wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
          size_t lenChunk = MB2WC(NULL, src, 0);
          if ( lenChunk == 0 )
          {
-            // nothing left in the input string, conversion succeeded
+            // nothing left in the input string, conversion succeeded; but
+            // still account for the trailing NULL
+            dstWritten++;
              break;
          }
  
          if ( lenChunk == wxCONV_FAILED )
              return wxCONV_FAILED;
  
-        // if we already have a previous chunk, leave the NUL separating it
-        // from this one
-        if ( dstWritten )
-        {
-            dstWritten++;
-            if ( dst )
-                dst++;
-        }
+        lenChunk++; // for trailing NUL
  
          dstWritten += lenChunk;
  
@@ -226,8 +221,7 @@ wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
              if ( dstWritten > dstLen )
                  return wxCONV_FAILED;
  
-            lenChunk = MB2WC(dst, src, lenChunk + 1 /* for NUL */);
-            if ( lenChunk == wxCONV_FAILED )
+            if ( MB2WC(dst, src, lenChunk) == wxCONV_FAILED )
                  return wxCONV_FAILED;
  
              dst += lenChunk;
@@ -390,11 +384,11 @@ wxMBConv::cMB2WC(const char *in, size_t inLen, size_t *outLen) const
      const size_t dstLen = ToWChar(NULL, 0, in, inLen);
      if ( dstLen != wxCONV_FAILED )
      {
-        wxWCharBuffer wbuf(dstLen);
+        wxWCharBuffer wbuf(dstLen - 1);
          if ( ToWChar(wbuf.data(), dstLen, in, inLen) )
          {
              if ( outLen )
-                *outLen = dstLen;
+                *outLen = dstLen - 1;
              return wbuf;
          }
      }
@@ -411,11 +405,11 @@ wxMBConv::cWC2MB(const wchar_t *in, size_t inLen, size_t *outLen) const
      const size_t dstLen = FromWChar(NULL, 0, in, inLen);
      if ( dstLen != wxCONV_FAILED )
      {
-        wxCharBuffer buf(dstLen);
+        wxCharBuffer buf(dstLen - 1);
          if ( FromWChar(buf.data(), dstLen, in, inLen) )
          {
              if ( outLen )
-                *outLen = dstLen;
+                *outLen = dstLen - 1;
              return buf;
          }
      }
@@ -1825,34 +1819,26 @@ public:
          // wouldn't work if reading an incomplete MB char didn't result in an
          // error
          //
-        // note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
-        // an error (tested under Windows Server 2003) and apparently it is
-        // done on purpose, i.e. the function accepts any input in this case
-        // and although I'd prefer to return error on ill-formed output, our
-        // own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
-        // explicitly ill-formed according to RFC 2152) neither so we don't
-        // even have any fallback here...
-        //
          // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
-        // Win XP or newer and if it is specified on older versions, conversion
-        // from CP_UTF8 (which can have flags only 0 or MB_ERR_INVALID_CHARS)
-        // fails. So we can only use the flag on newer Windows versions.
-        // Additionally, the flag is not supported by UTF7, symbol and CJK
-        // encodings. See here:
+        // Win XP or newer and it is not supported for UTF-[78] so we always
+        // use our own conversions in this case. See
          //     http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
          //     http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
-        int flags = 0;
-        if ( m_CodePage != CP_UTF7 && m_CodePage != CP_SYMBOL &&
-             m_CodePage < 50000 &&
-             IsAtLeastWin2kSP4() )
+        if ( m_CodePage == CP_UTF8 )
          {
-            flags = MB_ERR_INVALID_CHARS;
+            return wxConvUTF8.MB2WC(buf, psz, n);
          }
-        else if ( m_CodePage == CP_UTF8 )
+
+        if ( m_CodePage == CP_UTF7 )
+        {
+            return wxConvUTF7.MB2WC(buf, psz, n);
+        }
+
+        int flags = 0;
+        if ( (m_CodePage < 50000 && m_CodePage != CP_SYMBOL) &&
+                IsAtLeastWin2kSP4() )
          {
-            // Avoid round-trip in the special case of UTF-8 by using our
-            // own UTF-8 conversion code:
-            return wxMBConvUTF8().MB2WC(buf, psz, n);
+            flags = MB_ERR_INVALID_CHARS;
          }
  
          const size_t len = ::MultiByteToWideChar
diff --git a/src/common/string.cpp b/src/common/string.cpp

index 4e128a124f2393e451e3f7e74e1490fa1a94d94b..de7500d0b222f7ce0460f1826950355ab708a580 100644 (file)
--- a/src/common/string.cpp
+++ b/src/common/string.cpp
@@ -1006,7 +1006,7 @@ int STRINGCLASS::compare(size_t nStart, size_t nLen,
  #if wxUSE_UNICODE
  
  // from multibyte string
-wxString::wxString(const char *psz, wxMBConv& conv, size_t nLength)
+wxString::wxString(const char *psz, const wxMBConv& conv, size_t nLength)
  {
      // anything to do?
      if ( psz && nLength != 0 )
@@ -1031,7 +1031,7 @@ wxString::wxString(const char *psz, wxMBConv& conv, size_t nLength)
  }
  
  //Convert wxString in Unicode mode to a multi-byte string
-const wxCharBuffer wxString::mb_str(wxMBConv& conv) const
+const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
  {
      return conv.cWC2MB(c_str(), length() + 1 /* size, not length */, NULL);
  }
@@ -1041,7 +1041,7 @@ const wxCharBuffer wxString::mb_str(wxMBConv& conv) const
  #if wxUSE_WCHAR_T
  
  // from wide string
-wxString::wxString(const wchar_t *pwz, wxMBConv& conv, size_t nLength)
+wxString::wxString(const wchar_t *pwz, const wxMBConv& conv, size_t nLength)
  {
      // anything to do?
      if ( pwz && nLength != 0 )
@@ -1067,7 +1067,7 @@ wxString::wxString(const wchar_t *pwz, wxMBConv& conv, size_t nLength)
  
  //Converts this string to a wide character string if unicode
  //mode is not enabled and wxUSE_WCHAR_T is enabled
-const wxWCharBuffer wxString::wc_str(wxMBConv& conv) const
+const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
  {
      return conv.cMB2WC(c_str(), length() + 1 /* size, not length */, NULL);
  }
diff --git a/src/common/textbuf.cpp b/src/common/textbuf.cpp

index b1950d24bc4590902a1c1b84fa9c90b1fdf8efbe..2aa823d9b3511deef6ce03dca404c0f0826a266b 100644 (file)
--- a/src/common/textbuf.cpp
+++ b/src/common/textbuf.cpp
@@ -181,14 +181,14 @@ bool wxTextBuffer::Create()
      return true;
  }
  
-bool wxTextBuffer::Open(const wxString& strBufferName, wxMBConv& conv)
+bool wxTextBuffer::Open(const wxString& strBufferName, const wxMBConv& conv)
  {
      m_strBufferName = strBufferName;
  
      return Open(conv);
  }
  
-bool wxTextBuffer::Open(wxMBConv& conv)
+bool wxTextBuffer::Open(const wxMBConv& conv)
  {
      // buffer name must be either given in ctor or in Open(const wxString&)
      wxASSERT( !m_strBufferName.empty() );
@@ -276,7 +276,7 @@ bool wxTextBuffer::Close()
      return true;
  }
  
-bool wxTextBuffer::Write(wxTextFileType typeNew, wxMBConv& conv)
+bool wxTextBuffer::Write(wxTextFileType typeNew, const wxMBConv& conv)
  {
      return OnWrite(typeNew, conv);
  }
diff --git a/src/common/textfile.cpp b/src/common/textfile.cpp

index 40f5d4b44d10a3edb7ba7e9df6c13dc22043402d..3334375b44ccc6027877a82873e99fab621035e6 100644 (file)
--- a/src/common/textfile.cpp
+++ b/src/common/textfile.cpp
@@ -86,7 +86,7 @@ bool wxTextFile::OnClose()
  }
  
  
-bool wxTextFile::OnRead(wxMBConv& conv)
+bool wxTextFile::OnRead(const wxMBConv& conv)
  {
      // file should be opened and we must be in it's beginning
      wxASSERT( m_file.IsOpened() && m_file.Tell() == 0 );
@@ -114,18 +114,8 @@ bool wxTextFile::OnRead(wxMBConv& conv)
              return false;
          }
  
-        eof = nRead == 0;
-        if ( eof )
-        {
-            // append 4 trailing NUL bytes: this is needed to ensure that the
-            // string is going to be NUL-terminated, whatever is the encoding
-            // used (even UTF-32)
-            block[0] =
-            block[1] =
-            block[2] =
-            block[3] = '\0';
-            nRead = 4;
-        }
+        if ( nRead == 0 )
+            break;
  
          // this shouldn't happen but don't overwrite the buffer if it does
          wxCHECK_MSG( bufPos + nRead <= bufSize, false,
@@ -136,7 +126,7 @@ bool wxTextFile::OnRead(wxMBConv& conv)
          bufPos += nRead;
      }
  
-    const wxString str(buf, conv);
+    const wxString str(buf, conv, bufPos);
  
      // this doesn't risk to happen in ANSI build
  #if wxUSE_UNICODE
@@ -211,7 +201,7 @@ bool wxTextFile::OnRead(wxMBConv& conv)
  }
  
  
-bool wxTextFile::OnWrite(wxTextFileType typeNew, wxMBConv& conv)
+bool wxTextFile::OnWrite(wxTextFileType typeNew, const wxMBConv& conv)
  {
      wxFileName fn = m_strBufferName;
  
diff --git a/src/common/txtstrm.cpp b/src/common/txtstrm.cpp

index 61260824e26937e3aca5ce46cad411d63c430f89..884c4b84c002fec379fc1e10fe8cfbd98211d521 100644 (file)
--- a/src/common/txtstrm.cpp
+++ b/src/common/txtstrm.cpp
@@ -35,7 +35,9 @@
  // ----------------------------------------------------------------------------
  
  #if wxUSE_UNICODE
-wxTextInputStream::wxTextInputStream(wxInputStream &s, const wxString &sep, wxMBConv& conv)
+wxTextInputStream::wxTextInputStream(wxInputStream &s,
+                                     const wxString &sep,
+                                     const wxMBConv& conv)
    : m_input(s), m_separators(sep), m_conv(conv)
  {
      memset((void*)m_lastBytes, 0, 10);
@@ -298,7 +300,9 @@ wxTextInputStream& wxTextInputStream::operator>>(float& f)
  
  
  #if wxUSE_UNICODE
-wxTextOutputStream::wxTextOutputStream(wxOutputStream& s, wxEOL mode, wxMBConv& conv)
+wxTextOutputStream::wxTextOutputStream(wxOutputStream& s,
+                                       wxEOL mode,
+                                       const wxMBConv& conv)
    : m_output(s), m_conv(conv)
  #else
  wxTextOutputStream::wxTextOutputStream(wxOutputStream& s, wxEOL mode)
author	Vadim Zeitlin <vadim@wxwidgets.org>
	Wed, 5 Apr 2006 14:37:47 +0000 (14:37 +0000)
committer	Vadim Zeitlin <vadim@wxwidgets.org>
	Wed, 5 Apr 2006 14:37:47 +0000 (14:37 +0000)
build/bakefiles/files.bkl		patch \| blob \| blame \| history
docs/changes.txt		patch \| blob \| blame \| history
include/wx/convauto.h	[new file with mode: 0644]	patch \| blob
include/wx/datstrm.h		patch \| blob \| blame \| history
include/wx/ffile.h		patch \| blob \| blame \| history
include/wx/file.h		patch \| blob \| blame \| history
include/wx/fileconf.h		patch \| blob \| blame \| history
include/wx/memtext.h		patch \| blob \| blame \| history
include/wx/string.h		patch \| blob \| blame \| history
include/wx/textbuf.h		patch \| blob \| blame \| history
include/wx/textfile.h		patch \| blob \| blame \| history
include/wx/txtstrm.h		patch \| blob \| blame \| history
src/common/convauto.cpp	[new file with mode: 0644]	patch \| blob
src/common/datstrm.cpp		patch \| blob \| blame \| history
src/common/ffile.cpp		patch \| blob \| blame \| history
src/common/fileconf.cpp		patch \| blob \| blame \| history
src/common/strconv.cpp		patch \| blob \| blame \| history
src/common/string.cpp		patch \| blob \| blame \| history
src/common/textbuf.cpp		patch \| blob \| blame \| history
src/common/textfile.cpp		patch \| blob \| blame \| history
src/common/txtstrm.cpp		patch \| blob \| blame \| history