From: Václav Slavík <vslavik@fastmail.fm>
Date: Mon, 21 Apr 2008 10:46:30 +0000 (+0000)
Subject: preserve TAB characters when copying HTML <pre> content to clipboard
X-Git-Url: https://git.saurik.com/wxWidgets.git/commitdiff_plain/6a603a10e77f719458939d117e46f7d8ed0b372b

preserve TAB characters when copying HTML <pre> content to clipboard

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@53282 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
---

diff --git a/include/wx/html/htmlcell.h b/include/wx/html/htmlcell.h
index 4ec02db0b0..df6dfa709a 100644
--- a/include/wx/html/htmlcell.h
+++ b/include/wx/html/htmlcell.h
@@ -375,12 +375,17 @@ public:
     void Draw(wxDC& dc, int x, int y, int view_y1, int view_y2,
               wxHtmlRenderingInfo& info);
     virtual wxCursor GetMouseCursor(wxHtmlWindowInterface *window) const;
-    wxString ConvertToText(wxHtmlSelection *sel) const;
+    virtual wxString ConvertToText(wxHtmlSelection *sel) const;
     bool IsLinebreakAllowed() const { return m_allowLinebreak; }
 
     void SetPreviousWord(wxHtmlWordCell *cell);
 
 protected:
+    virtual wxString GetAllAsText() const
+        { return m_Word; }
+    virtual wxString GetPartAsText(int begin, int end) const
+        { return m_Word.Mid(begin, end - begin); }
+
     void SetSelectionPrivPos(const wxDC& dc, wxHtmlSelection *s) const;
     void Split(const wxDC& dc,
                const wxPoint& selFrom, const wxPoint& selTo,
@@ -394,7 +399,28 @@ protected:
 };
 
 
+// wxHtmlWordCell specialization for storing text fragments with embedded
+// '\t's; these differ from normal words in that the displayed text is
+// different from the text copied to clipboard
+class WXDLLIMPEXP_HTML wxHtmlWordWithTabsCell : public wxHtmlWordCell
+{
+public:
+    wxHtmlWordWithTabsCell(const wxString& word,
+                           const wxString& wordOrig,
+                           size_t linepos,
+                           const wxDC& dc)
+        : wxHtmlWordCell(word, dc),
+          m_wordOrig(wordOrig),
+          m_linepos(linepos)
+    {}
+
+protected:
+    virtual wxString GetAllAsText() const;
+    virtual wxString GetPartAsText(int begin, int end) const;
 
+    wxString m_wordOrig;
+    size_t   m_linepos;
+};
 
 
 // Container contains other cells, thus forming tree structure of rendering
diff --git a/include/wx/html/winpars.h b/include/wx/html/winpars.h
index 8f554ce8c2..5835338889 100644
--- a/include/wx/html/winpars.h
+++ b/include/wx/html/winpars.h
@@ -145,11 +145,25 @@ public:
     // creates font depending on m_Font* members.
     virtual wxFont* CreateCurrentFont();
 
+    enum WhitespaceMode
+    {
+        Whitespace_Normal,  // normal mode, collapse whitespace
+        Whitespace_Pre      // inside <pre>, keep whitespace as-is
+    };
+
+    // change the current whitespace handling mode
+    void SetWhitespaceMode(WhitespaceMode mode) { m_whitespaceMode = mode; }
+    WhitespaceMode GetWhitespaceMode() const { return m_whitespaceMode; }
+
 protected:
     virtual void AddText(const wxString& txt);
 
 private:
-    void DoAddText(wxChar *temp, int& templen);
+    void FlushWordBuf(wxChar *temp, int& len);
+    void AddWord(wxHtmlWordCell *word);
+    void AddWord(const wxString& word)
+        { AddWord(new wxHtmlWordCell(word, *(GetDC()))); }
+    void AddPreBlock(const wxString& text);
 
     bool m_tmpLastWasSpace;
     wxChar *m_tmpStrBuf;
@@ -207,8 +221,15 @@ private:
     wxEncodingConverter *m_EncConv;
 #endif
 
+    // current whitespace handling mode
+    WhitespaceMode m_whitespaceMode;
+
     wxHtmlWordCell *m_lastWordCell;
 
+    // current position on line, in num. of characters; used to properly
+    // expand TABs; only updated while inside <pre>
+    int m_posColumn;
+
     DECLARE_NO_COPY_CLASS(wxHtmlWinParser)
 };
 
diff --git a/src/html/htmlcell.cpp b/src/html/htmlcell.cpp
index 25099e75c8..01d92107bd 100644
--- a/src/html/htmlcell.cpp
+++ b/src/html/htmlcell.cpp
@@ -464,6 +464,8 @@ void wxHtmlWordCell::Split(const wxDC& dc,
 
     pos1 = i;
     pos2 = j;
+
+    wxASSERT( pos2 >= pos1 );
 }
 
 void wxHtmlWordCell::SetSelectionPrivPos(const wxDC& dc, wxHtmlSelection *s) const
@@ -617,6 +619,17 @@ void wxHtmlWordCell::Draw(wxDC& dc, int x, int y,
     }
 }
 
+wxCursor wxHtmlWordCell::GetMouseCursor(wxHtmlWindowInterface *window) const
+{
+    if ( !GetLink() )
+    {
+        return window->GetHTMLCursor(wxHtmlWindowInterface::HTMLCursor_Text);
+    }
+    else
+    {
+        return wxHtmlCell::GetMouseCursor(window);
+    }
+}
 
 wxString wxHtmlWordCell::ConvertToText(wxHtmlSelection *s) const
 {
@@ -634,29 +647,77 @@ wxString wxHtmlWordCell::ConvertToText(wxHtmlSelection *s) const
         // TODO: but this really needs to be fixed in some better way later...
         if ( priv != wxDefaultPosition )
         {
-            int part1 = priv.x;
-            int part2 = priv.y;
-            return m_Word.Mid(part1, part2-part1);
+            const int part1 = priv.x;
+            const int part2 = priv.y;
+            if ( part1 == part2 )
+                return wxEmptyString;
+            return GetPartAsText(part1, part2);
         }
         //else: return the whole word below
     }
 
-    return m_Word;
+    return GetAllAsText();
 }
 
-wxCursor wxHtmlWordCell::GetMouseCursor(wxHtmlWindowInterface *window) const
+wxString wxHtmlWordWithTabsCell::GetAllAsText() const
 {
-    if ( !GetLink() )
+    return m_wordOrig;
+}
+
+wxString wxHtmlWordWithTabsCell::GetPartAsText(int begin, int end) const
+{
+    // NB: The 'begin' and 'end' positions are in the _displayed_ text
+    //     (stored in m_Word) and not in the text with tabs that should
+    //     be copied to clipboard (m_wordOrig).
+    //
+    // NB: Because selection is performed on displayed text, it's possible
+    //     to select e.g. "half of TAB character" -- IOW, 'begin' and 'end'
+    //     may be in the middle of TAB character expansion into ' 's. In this
+    //     case, we copy the TAB character to clipboard once.
+
+    wxASSERT( begin < end );
+
+    const unsigned SPACES_PER_TAB = 8;
+
+    wxString sel;
+
+    int pos = 0;
+    wxString::const_iterator i = m_wordOrig.begin();
+
+    // find the beginning of text to copy:
+    for ( ; pos < begin; ++i )
     {
-        return window->GetHTMLCursor(wxHtmlWindowInterface::HTMLCursor_Text);
+        if ( *i == '\t' )
+        {
+            pos += 8 - (m_linepos + pos) % SPACES_PER_TAB;
+            if ( pos >= begin )
+            {
+                sel += '\t';
+            }
+        }
+        else
+        {
+            ++pos;
+        }
     }
-    else
+
+    // copy the content until we reach 'end':
+    for ( ; pos < end; ++i )
     {
-        return wxHtmlCell::GetMouseCursor(window);
+        const wxChar c = *i;
+        sel += c;
+
+        if ( c == '\t' )
+            pos += 8 - (m_linepos + pos) % SPACES_PER_TAB;
+        else
+            ++pos;
     }
+
+    return sel;
 }
 
 
+
 //-----------------------------------------------------------------------------
 // wxHtmlContainerCell
 //-----------------------------------------------------------------------------
diff --git a/src/html/m_pre.cpp b/src/html/m_pre.cpp
index 026ae9c018..b2a5cb89e5 100644
--- a/src/html/m_pre.cpp
+++ b/src/html/m_pre.cpp
@@ -28,40 +28,28 @@
 FORCE_LINK_ME(m_pre)
 
 // replaces '\t', ' ' and '\n' with HTML markup:
-static wxString LINKAGEMODE HtmlizeWhitespaces(const wxString& str)
+static wxString LINKAGEMODE HtmlizeLinebreaks(const wxString& str)
 {
     wxString out;
+    out.reserve(str.length()); // we'll certainly need at least that
+
     size_t len = str.Len();
-    size_t linepos = 0;
     for (size_t i = 0; i < len; i++)
     {
         switch ( str[i].GetValue() )
         {
-            case wxT('<'):
-                while (i < len && str[i] != wxT('>'))
+            case '<':
+                while (i < len && str[i] != '>')
                 {
                     out << str[i++];
                 }
-                out << wxT('>');
-                break;
-            case wxT(' '):
-                out << wxT("&nbsp;");
-                linepos++;
-                break;
-            case wxT('\n'):
-                out << wxT("<br>");
-                linepos = 0;
+                out << '>';
                 break;
-            case wxT('\t'):
-                {
-                    for (size_t j = 8 - linepos % 8; j > 0; j--)
-                        out << wxT("&nbsp;");
-                    linepos += 8 - linepos % 8;
-                }
+            case '\n':
+                out << "<br>";
                 break;
             default:
                 out << str[i];
-                linepos++;
                 break;
         }
     }
@@ -81,13 +69,16 @@ TAG_HANDLER_BEGIN(PRE, "PRE")
     {
         wxHtmlContainerCell *c;
 
-        int fixed = m_WParser->GetFontFixed(),
-            italic = m_WParser->GetFontItalic(),
-            underlined = m_WParser->GetFontUnderlined(),
-            bold = m_WParser->GetFontBold(),
-            fsize = m_WParser->GetFontSize();
+        const int fixed = m_WParser->GetFontFixed();
+        const int italic = m_WParser->GetFontItalic();
+        const int underlined = m_WParser->GetFontUnderlined();
+        const int bold = m_WParser->GetFontBold();
+        const int fsize = m_WParser->GetFontSize();
+        const wxHtmlWinParser::WhitespaceMode whitespace =
+            m_WParser->GetWhitespaceMode();
 
         c = m_WParser->GetContainer();
+        m_WParser->SetWhitespaceMode(wxHtmlWinParser::Whitespace_Pre);
         m_WParser->SetFontUnderlined(false);
         m_WParser->SetFontBold(false);
         m_WParser->SetFontItalic(false);
@@ -103,12 +94,17 @@ TAG_HANDLER_BEGIN(PRE, "PRE")
         c->SetIndent(m_WParser->GetCharHeight(), wxHTML_INDENT_TOP);
 
         wxString srcMid = m_WParser->GetInnerSource(tag);
-        ParseInnerSource(HtmlizeWhitespaces(srcMid));
+
+        // setting Whitespace_Pre mode takes care of spaces and TABs, but
+        // not linebreaks, so we have to translate them into <br> by
+        // calling HtmlizeLinebreaks() here
+        ParseInnerSource(HtmlizeLinebreaks(srcMid));
 
         m_WParser->CloseContainer();
         m_WParser->CloseContainer();
         c = m_WParser->OpenContainer();
 
+        m_WParser->SetWhitespaceMode(whitespace);
         m_WParser->SetFontUnderlined(underlined);
         m_WParser->SetFontBold(bold);
         m_WParser->SetFontItalic(italic);
diff --git a/src/html/winpars.cpp b/src/html/winpars.cpp
index 3a1aff2aab..9b5adbdfb1 100644
--- a/src/html/winpars.cpp
+++ b/src/html/winpars.cpp
@@ -52,7 +52,9 @@ wxHtmlWinParser::wxHtmlWinParser(wxHtmlWindowInterface *wndIface)
     m_InputEnc = wxFONTENCODING_ISO8859_1;
     m_OutputEnc = wxFONTENCODING_DEFAULT;
 #endif
+    m_whitespaceMode = Whitespace_Normal;
     m_lastWordCell = NULL;
+    m_posColumn = 0;
 
     {
         int i, j, k, l, m;
@@ -345,103 +347,176 @@ wxFSFile *wxHtmlWinParser::OpenURL(wxHtmlURLType type,
 
 void wxHtmlWinParser::AddText(const wxString& txt)
 {
-    register wxChar d;
-    int templen = 0;
+    #define NBSP_UNICODE_VALUE  (wxChar(160))
+#if !wxUSE_UNICODE
+    if ( m_nbsp == 0 )
+        m_nbsp = GetEntitiesParser()->GetCharForCode(NBSP_UNICODE_VALUE);
+    #define CUR_NBSP_VALUE m_nbsp
+#else
+    #define CUR_NBSP_VALUE NBSP_UNICODE_VALUE
+#endif
 
-    size_t lng = txt.length();
-    if (lng+1 > m_tmpStrBufSize)
+    if ( m_whitespaceMode == Whitespace_Normal )
     {
-        delete[] m_tmpStrBuf;
-        m_tmpStrBuf = new wxChar[lng+1];
-        m_tmpStrBufSize = lng+1;
-    }
-    wxChar *temp = m_tmpStrBuf;
-
-    wxString::const_iterator i = txt.begin();
-    wxString::const_iterator end = txt.end();
+        int templen = 0;
 
-    if (m_tmpLastWasSpace)
-    {
-        while ( (i < end) &&
-                (*i == wxT('\n') || *i == wxT('\r') || *i == wxT(' ') ||
-                 *i == wxT('\t')) )
+        size_t lng = txt.length();
+        if (lng+1 > m_tmpStrBufSize)
         {
-            ++i;
+            delete[] m_tmpStrBuf;
+            m_tmpStrBuf = new wxChar[lng+1];
+            m_tmpStrBufSize = lng+1;
         }
-    }
+        wxChar *temp = m_tmpStrBuf;
 
-    while (i < end)
-    {
-        size_t x = 0;
-        d = temp[templen++] = *i;
-        if ((d == wxT('\n')) || (d == wxT('\r')) || (d == wxT(' ')) || (d == wxT('\t')))
+        wxString::const_iterator i = txt.begin();
+        const wxString::const_iterator end = txt.end();
+
+        if (m_tmpLastWasSpace)
         {
-            ++i, ++x;
             while ( (i < end) &&
-                    (*i == wxT('\n') || *i == wxT('\r') ||
-                     *i == wxT(' ') || *i == wxT('\t')) )
+                    (*i == wxT('\n') || *i == wxT('\r') || *i == wxT(' ') ||
+                     *i == wxT('\t')) )
             {
                 ++i;
-                ++x;
             }
         }
-        else
-            ++i;
 
-        if (x)
+        while (i < end)
+        {
+            size_t x = 0;
+            wxChar d = *i;
+            if ((d == wxT('\n')) || (d == wxT('\r')) || (d == wxT(' ')) || (d == wxT('\t')))
+            {
+                ++i, ++x;
+                while ( (i < end) &&
+                        (*i == wxT('\n') || *i == wxT('\r') ||
+                         *i == wxT(' ') || *i == wxT('\t')) )
+                {
+                    ++i;
+                    ++x;
+                }
+            }
+            else
+            {
+                ++i;
+            }
+
+            if (d == CUR_NBSP_VALUE)
+                d = ' ';
+
+            temp[templen++] = d;
+
+            if (x)
+            {
+                temp[templen-1] = wxT(' ');
+                FlushWordBuf(temp, templen);
+                m_tmpLastWasSpace = true;
+            }
+        }
+
+        if (templen && (templen > 1 || temp[0] != wxT(' ')))
         {
-            temp[templen-1] = wxT(' ');
-            DoAddText(temp, templen);
-            m_tmpLastWasSpace = true;
+            FlushWordBuf(temp, templen);
+            m_tmpLastWasSpace = false;
         }
     }
-
-    if (templen && (templen > 1 || temp[0] != wxT(' ')))
+    else // m_whitespaceMode == Whitespace_Pre
     {
-        DoAddText(temp, templen);
+        if ( txt.find(CUR_NBSP_VALUE) != wxString::npos )
+        {
+            // we need to substitute spaces for &nbsp; here just like we
+            // did in the Whitespace_Normal branch above
+            wxString txt2(txt);
+            txt2.Replace(CUR_NBSP_VALUE, ' ');
+            AddPreBlock(txt2);
+        }
+        else
+        {
+            AddPreBlock(txt);
+        }
+
+        // don't eat any whitespace in <pre> block
         m_tmpLastWasSpace = false;
     }
 }
 
-void wxHtmlWinParser::DoAddText(wxChar *temp, int& templen)
+void wxHtmlWinParser::FlushWordBuf(wxChar *buf, int& len)
 {
-    #define NBSP_UNICODE_VALUE 160
-#if !wxUSE_UNICODE
-    if ( m_nbsp == 0 )
-        m_nbsp = GetEntitiesParser()->GetCharForCode(NBSP_UNICODE_VALUE);
-    #define CUR_NBSP_VALUE m_nbsp
-#else
-    #define CUR_NBSP_VALUE NBSP_UNICODE_VALUE
-#endif
+    buf[len] = 0;
 
-    temp[templen] = 0;
-    templen = 0;
 #if !wxUSE_UNICODE
     if (m_EncConv)
-        m_EncConv->Convert(temp);
+        m_EncConv->Convert(buf);
 #endif
-    size_t len = wxStrlen(temp);
-    for (size_t j = 0; j < len; j++)
-    {
-        if (temp[j] == CUR_NBSP_VALUE)
-            temp[j] = wxT(' ');
-    }
 
-    wxHtmlWordCell *c = new wxHtmlWordCell(temp, *(GetDC()));
+    AddWord(wxString(buf, len));
+
+    len = 0;
+}
 
-    ApplyStateToCell(c);
+void wxHtmlWinParser::AddWord(wxHtmlWordCell *word)
+{
+    ApplyStateToCell(word);
 
-    m_Container->InsertCell(c);
-    c->SetPreviousWord(m_lastWordCell);
-    m_lastWordCell = c;
+    m_Container->InsertCell(word);
+    word->SetPreviousWord(m_lastWordCell);
+    m_lastWordCell = word;
 }
 
+void wxHtmlWinParser::AddPreBlock(const wxString& text)
+{
+    if ( text.find('\t') != wxString::npos )
+    {
+        wxString text2;
+        text2.reserve(text.length());
+
+        const wxString::const_iterator end = text.end();
+        wxString::const_iterator copyFrom = text.begin();
+        size_t posFrom = 0;
+        size_t pos = 0;
+        int posColumn = m_posColumn;
+        for ( wxString::const_iterator i = copyFrom; i != end; ++i, ++pos )
+        {
+            if ( *i == '\t' )
+            {
+                if ( copyFrom != i )
+                    text2.append(copyFrom, i);
+
+                const unsigned SPACES_PER_TAB = 8;
+                const size_t expandTo = SPACES_PER_TAB - posColumn % SPACES_PER_TAB;
+                text2.append(expandTo, ' ');
+
+                posColumn += expandTo;
+                copyFrom = i + 1;
+                posFrom = pos + 1;
+            }
+            else
+            {
+                ++posColumn;
+            }
+        }
+        if ( copyFrom != text.end() )
+            text2.append(copyFrom, text.end());
+
+        AddWord(new wxHtmlWordWithTabsCell(text2, text, m_posColumn, *(GetDC())));
+
+        m_posColumn = posColumn;
+    }
+    else
+    {
+        // no special formatting needed
+        AddWord(text);
+        m_posColumn += text.length();
+    }
+}
 
 
 wxHtmlContainerCell* wxHtmlWinParser::OpenContainer()
 {
     m_Container = new wxHtmlContainerCell(m_Container);
     m_Container->SetAlignHor(m_Align);
+    m_posColumn = 0;
     m_tmpLastWasSpace = true;
         /* to avoid space being first character in paragraph */
     return m_Container;