From: Václav Slavík Date: Mon, 21 Apr 2008 10:46:30 +0000 (+0000) Subject: preserve TAB characters when copying HTML
 content to clipboard
X-Git-Url: https://git.saurik.com/wxWidgets.git/commitdiff_plain/6a603a10e77f719458939d117e46f7d8ed0b372b?ds=inline

preserve TAB characters when copying HTML 
 content to clipboard

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@53282 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
---

diff --git a/include/wx/html/htmlcell.h b/include/wx/html/htmlcell.h
index 4ec02db0b0..df6dfa709a 100644
--- a/include/wx/html/htmlcell.h
+++ b/include/wx/html/htmlcell.h
@@ -375,12 +375,17 @@ public:
     void Draw(wxDC& dc, int x, int y, int view_y1, int view_y2,
               wxHtmlRenderingInfo& info);
     virtual wxCursor GetMouseCursor(wxHtmlWindowInterface *window) const;
-    wxString ConvertToText(wxHtmlSelection *sel) const;
+    virtual wxString ConvertToText(wxHtmlSelection *sel) const;
     bool IsLinebreakAllowed() const { return m_allowLinebreak; }
 
     void SetPreviousWord(wxHtmlWordCell *cell);
 
 protected:
+    virtual wxString GetAllAsText() const
+        { return m_Word; }
+    virtual wxString GetPartAsText(int begin, int end) const
+        { return m_Word.Mid(begin, end - begin); }
+
     void SetSelectionPrivPos(const wxDC& dc, wxHtmlSelection *s) const;
     void Split(const wxDC& dc,
                const wxPoint& selFrom, const wxPoint& selTo,
@@ -394,7 +399,28 @@ protected:
 };
 
 
+// wxHtmlWordCell specialization for storing text fragments with embedded
+// '\t's; these differ from normal words in that the displayed text is
+// different from the text copied to clipboard
+class WXDLLIMPEXP_HTML wxHtmlWordWithTabsCell : public wxHtmlWordCell
+{
+public:
+    wxHtmlWordWithTabsCell(const wxString& word,
+                           const wxString& wordOrig,
+                           size_t linepos,
+                           const wxDC& dc)
+        : wxHtmlWordCell(word, dc),
+          m_wordOrig(wordOrig),
+          m_linepos(linepos)
+    {}
+
+protected:
+    virtual wxString GetAllAsText() const;
+    virtual wxString GetPartAsText(int begin, int end) const;
 
+    wxString m_wordOrig;
+    size_t   m_linepos;
+};
 
 
 // Container contains other cells, thus forming tree structure of rendering
diff --git a/include/wx/html/winpars.h b/include/wx/html/winpars.h
index 8f554ce8c2..5835338889 100644
--- a/include/wx/html/winpars.h
+++ b/include/wx/html/winpars.h
@@ -145,11 +145,25 @@ public:
     // creates font depending on m_Font* members.
     virtual wxFont* CreateCurrentFont();
 
+    enum WhitespaceMode
+    {
+        Whitespace_Normal,  // normal mode, collapse whitespace
+        Whitespace_Pre      // inside 
, keep whitespace as-is
+    };
+
+    // change the current whitespace handling mode
+    void SetWhitespaceMode(WhitespaceMode mode) { m_whitespaceMode = mode; }
+    WhitespaceMode GetWhitespaceMode() const { return m_whitespaceMode; }
+
 protected:
     virtual void AddText(const wxString& txt);
 
 private:
-    void DoAddText(wxChar *temp, int& templen);
+    void FlushWordBuf(wxChar *temp, int& len);
+    void AddWord(wxHtmlWordCell *word);
+    void AddWord(const wxString& word)
+        { AddWord(new wxHtmlWordCell(word, *(GetDC()))); }
+    void AddPreBlock(const wxString& text);
 
     bool m_tmpLastWasSpace;
     wxChar *m_tmpStrBuf;
@@ -207,8 +221,15 @@ private:
     wxEncodingConverter *m_EncConv;
 #endif
 
+    // current whitespace handling mode
+    WhitespaceMode m_whitespaceMode;
+
     wxHtmlWordCell *m_lastWordCell;
 
+    // current position on line, in num. of characters; used to properly
+    // expand TABs; only updated while inside 
+    int m_posColumn;
+
     DECLARE_NO_COPY_CLASS(wxHtmlWinParser)
 };
 
diff --git a/src/html/htmlcell.cpp b/src/html/htmlcell.cpp
index 25099e75c8..01d92107bd 100644
--- a/src/html/htmlcell.cpp
+++ b/src/html/htmlcell.cpp
@@ -464,6 +464,8 @@ void wxHtmlWordCell::Split(const wxDC& dc,
 
     pos1 = i;
     pos2 = j;
+
+    wxASSERT( pos2 >= pos1 );
 }
 
 void wxHtmlWordCell::SetSelectionPrivPos(const wxDC& dc, wxHtmlSelection *s) const
@@ -617,6 +619,17 @@ void wxHtmlWordCell::Draw(wxDC& dc, int x, int y,
     }
 }
 
+wxCursor wxHtmlWordCell::GetMouseCursor(wxHtmlWindowInterface *window) const
+{
+    if ( !GetLink() )
+    {
+        return window->GetHTMLCursor(wxHtmlWindowInterface::HTMLCursor_Text);
+    }
+    else
+    {
+        return wxHtmlCell::GetMouseCursor(window);
+    }
+}
 
 wxString wxHtmlWordCell::ConvertToText(wxHtmlSelection *s) const
 {
@@ -634,29 +647,77 @@ wxString wxHtmlWordCell::ConvertToText(wxHtmlSelection *s) const
         // TODO: but this really needs to be fixed in some better way later...
         if ( priv != wxDefaultPosition )
         {
-            int part1 = priv.x;
-            int part2 = priv.y;
-            return m_Word.Mid(part1, part2-part1);
+            const int part1 = priv.x;
+            const int part2 = priv.y;
+            if ( part1 == part2 )
+                return wxEmptyString;
+            return GetPartAsText(part1, part2);
         }
         //else: return the whole word below
     }
 
-    return m_Word;
+    return GetAllAsText();
 }
 
-wxCursor wxHtmlWordCell::GetMouseCursor(wxHtmlWindowInterface *window) const
+wxString wxHtmlWordWithTabsCell::GetAllAsText() const
 {
-    if ( !GetLink() )
+    return m_wordOrig;
+}
+
+wxString wxHtmlWordWithTabsCell::GetPartAsText(int begin, int end) const
+{
+    // NB: The 'begin' and 'end' positions are in the _displayed_ text
+    //     (stored in m_Word) and not in the text with tabs that should
+    //     be copied to clipboard (m_wordOrig).
+    //
+    // NB: Because selection is performed on displayed text, it's possible
+    //     to select e.g. "half of TAB character" -- IOW, 'begin' and 'end'
+    //     may be in the middle of TAB character expansion into ' 's. In this
+    //     case, we copy the TAB character to clipboard once.
+
+    wxASSERT( begin < end );
+
+    const unsigned SPACES_PER_TAB = 8;
+
+    wxString sel;
+
+    int pos = 0;
+    wxString::const_iterator i = m_wordOrig.begin();
+
+    // find the beginning of text to copy:
+    for ( ; pos < begin; ++i )
     {
-        return window->GetHTMLCursor(wxHtmlWindowInterface::HTMLCursor_Text);
+        if ( *i == '\t' )
+        {
+            pos += 8 - (m_linepos + pos) % SPACES_PER_TAB;
+            if ( pos >= begin )
+            {
+                sel += '\t';
+            }
+        }
+        else
+        {
+            ++pos;
+        }
     }
-    else
+
+    // copy the content until we reach 'end':
+    for ( ; pos < end; ++i )
     {
-        return wxHtmlCell::GetMouseCursor(window);
+        const wxChar c = *i;
+        sel += c;
+
+        if ( c == '\t' )
+            pos += 8 - (m_linepos + pos) % SPACES_PER_TAB;
+        else
+            ++pos;
     }
+
+    return sel;
 }
 
 
+
 //-----------------------------------------------------------------------------
 // wxHtmlContainerCell
 //-----------------------------------------------------------------------------
diff --git a/src/html/m_pre.cpp b/src/html/m_pre.cpp
index 026ae9c018..b2a5cb89e5 100644
--- a/src/html/m_pre.cpp
+++ b/src/html/m_pre.cpp
@@ -28,40 +28,28 @@
 FORCE_LINK_ME(m_pre)
 
 // replaces '\t', ' ' and '\n' with HTML markup:
-static wxString LINKAGEMODE HtmlizeWhitespaces(const wxString& str)
+static wxString LINKAGEMODE HtmlizeLinebreaks(const wxString& str)
 {
     wxString out;
+    out.reserve(str.length()); // we'll certainly need at least that
+
     size_t len = str.Len();
-    size_t linepos = 0;
     for (size_t i = 0; i < len; i++)
     {
         switch ( str[i].GetValue() )
         {
-            case wxT('<'):
-                while (i < len && str[i] != wxT('>'))
+            case '<':
+                while (i < len && str[i] != '>')
                 {
                     out << str[i++];
                 }
-                out << wxT('>');
-                break;
-            case wxT(' '):
-                out << wxT(" ");
-                linepos++;
-                break;
-            case wxT('\n'):
-                out << wxT("
"); - linepos = 0; + out << '>'; break; - case wxT('\t'): - { - for (size_t j = 8 - linepos % 8; j > 0; j--) - out << wxT(" "); - linepos += 8 - linepos % 8; - } + case '\n': + out << "
"; break; default: out << str[i]; - linepos++; break; } } @@ -81,13 +69,16 @@ TAG_HANDLER_BEGIN(PRE, "PRE") { wxHtmlContainerCell *c; - int fixed = m_WParser->GetFontFixed(), - italic = m_WParser->GetFontItalic(), - underlined = m_WParser->GetFontUnderlined(), - bold = m_WParser->GetFontBold(), - fsize = m_WParser->GetFontSize(); + const int fixed = m_WParser->GetFontFixed(); + const int italic = m_WParser->GetFontItalic(); + const int underlined = m_WParser->GetFontUnderlined(); + const int bold = m_WParser->GetFontBold(); + const int fsize = m_WParser->GetFontSize(); + const wxHtmlWinParser::WhitespaceMode whitespace = + m_WParser->GetWhitespaceMode(); c = m_WParser->GetContainer(); + m_WParser->SetWhitespaceMode(wxHtmlWinParser::Whitespace_Pre); m_WParser->SetFontUnderlined(false); m_WParser->SetFontBold(false); m_WParser->SetFontItalic(false); @@ -103,12 +94,17 @@ TAG_HANDLER_BEGIN(PRE, "PRE") c->SetIndent(m_WParser->GetCharHeight(), wxHTML_INDENT_TOP); wxString srcMid = m_WParser->GetInnerSource(tag); - ParseInnerSource(HtmlizeWhitespaces(srcMid)); + + // setting Whitespace_Pre mode takes care of spaces and TABs, but + // not linebreaks, so we have to translate them into
by + // calling HtmlizeLinebreaks() here + ParseInnerSource(HtmlizeLinebreaks(srcMid)); m_WParser->CloseContainer(); m_WParser->CloseContainer(); c = m_WParser->OpenContainer(); + m_WParser->SetWhitespaceMode(whitespace); m_WParser->SetFontUnderlined(underlined); m_WParser->SetFontBold(bold); m_WParser->SetFontItalic(italic); diff --git a/src/html/winpars.cpp b/src/html/winpars.cpp index 3a1aff2aab..9b5adbdfb1 100644 --- a/src/html/winpars.cpp +++ b/src/html/winpars.cpp @@ -52,7 +52,9 @@ wxHtmlWinParser::wxHtmlWinParser(wxHtmlWindowInterface *wndIface) m_InputEnc = wxFONTENCODING_ISO8859_1; m_OutputEnc = wxFONTENCODING_DEFAULT; #endif + m_whitespaceMode = Whitespace_Normal; m_lastWordCell = NULL; + m_posColumn = 0; { int i, j, k, l, m; @@ -345,103 +347,176 @@ wxFSFile *wxHtmlWinParser::OpenURL(wxHtmlURLType type, void wxHtmlWinParser::AddText(const wxString& txt) { - register wxChar d; - int templen = 0; + #define NBSP_UNICODE_VALUE (wxChar(160)) +#if !wxUSE_UNICODE + if ( m_nbsp == 0 ) + m_nbsp = GetEntitiesParser()->GetCharForCode(NBSP_UNICODE_VALUE); + #define CUR_NBSP_VALUE m_nbsp +#else + #define CUR_NBSP_VALUE NBSP_UNICODE_VALUE +#endif - size_t lng = txt.length(); - if (lng+1 > m_tmpStrBufSize) + if ( m_whitespaceMode == Whitespace_Normal ) { - delete[] m_tmpStrBuf; - m_tmpStrBuf = new wxChar[lng+1]; - m_tmpStrBufSize = lng+1; - } - wxChar *temp = m_tmpStrBuf; - - wxString::const_iterator i = txt.begin(); - wxString::const_iterator end = txt.end(); + int templen = 0; - if (m_tmpLastWasSpace) - { - while ( (i < end) && - (*i == wxT('\n') || *i == wxT('\r') || *i == wxT(' ') || - *i == wxT('\t')) ) + size_t lng = txt.length(); + if (lng+1 > m_tmpStrBufSize) { - ++i; + delete[] m_tmpStrBuf; + m_tmpStrBuf = new wxChar[lng+1]; + m_tmpStrBufSize = lng+1; } - } + wxChar *temp = m_tmpStrBuf; - while (i < end) - { - size_t x = 0; - d = temp[templen++] = *i; - if ((d == wxT('\n')) || (d == wxT('\r')) || (d == wxT(' ')) || (d == wxT('\t'))) + wxString::const_iterator i = txt.begin(); + const wxString::const_iterator end = txt.end(); + + if (m_tmpLastWasSpace) { - ++i, ++x; while ( (i < end) && - (*i == wxT('\n') || *i == wxT('\r') || - *i == wxT(' ') || *i == wxT('\t')) ) + (*i == wxT('\n') || *i == wxT('\r') || *i == wxT(' ') || + *i == wxT('\t')) ) { ++i; - ++x; } } - else - ++i; - if (x) + while (i < end) + { + size_t x = 0; + wxChar d = *i; + if ((d == wxT('\n')) || (d == wxT('\r')) || (d == wxT(' ')) || (d == wxT('\t'))) + { + ++i, ++x; + while ( (i < end) && + (*i == wxT('\n') || *i == wxT('\r') || + *i == wxT(' ') || *i == wxT('\t')) ) + { + ++i; + ++x; + } + } + else + { + ++i; + } + + if (d == CUR_NBSP_VALUE) + d = ' '; + + temp[templen++] = d; + + if (x) + { + temp[templen-1] = wxT(' '); + FlushWordBuf(temp, templen); + m_tmpLastWasSpace = true; + } + } + + if (templen && (templen > 1 || temp[0] != wxT(' '))) { - temp[templen-1] = wxT(' '); - DoAddText(temp, templen); - m_tmpLastWasSpace = true; + FlushWordBuf(temp, templen); + m_tmpLastWasSpace = false; } } - - if (templen && (templen > 1 || temp[0] != wxT(' '))) + else // m_whitespaceMode == Whitespace_Pre { - DoAddText(temp, templen); + if ( txt.find(CUR_NBSP_VALUE) != wxString::npos ) + { + // we need to substitute spaces for   here just like we + // did in the Whitespace_Normal branch above + wxString txt2(txt); + txt2.Replace(CUR_NBSP_VALUE, ' '); + AddPreBlock(txt2); + } + else + { + AddPreBlock(txt); + } + + // don't eat any whitespace in
 block
         m_tmpLastWasSpace = false;
     }
 }
 
-void wxHtmlWinParser::DoAddText(wxChar *temp, int& templen)
+void wxHtmlWinParser::FlushWordBuf(wxChar *buf, int& len)
 {
-    #define NBSP_UNICODE_VALUE 160
-#if !wxUSE_UNICODE
-    if ( m_nbsp == 0 )
-        m_nbsp = GetEntitiesParser()->GetCharForCode(NBSP_UNICODE_VALUE);
-    #define CUR_NBSP_VALUE m_nbsp
-#else
-    #define CUR_NBSP_VALUE NBSP_UNICODE_VALUE
-#endif
+    buf[len] = 0;
 
-    temp[templen] = 0;
-    templen = 0;
 #if !wxUSE_UNICODE
     if (m_EncConv)
-        m_EncConv->Convert(temp);
+        m_EncConv->Convert(buf);
 #endif
-    size_t len = wxStrlen(temp);
-    for (size_t j = 0; j < len; j++)
-    {
-        if (temp[j] == CUR_NBSP_VALUE)
-            temp[j] = wxT(' ');
-    }
 
-    wxHtmlWordCell *c = new wxHtmlWordCell(temp, *(GetDC()));
+    AddWord(wxString(buf, len));
+
+    len = 0;
+}
 
-    ApplyStateToCell(c);
+void wxHtmlWinParser::AddWord(wxHtmlWordCell *word)
+{
+    ApplyStateToCell(word);
 
-    m_Container->InsertCell(c);
-    c->SetPreviousWord(m_lastWordCell);
-    m_lastWordCell = c;
+    m_Container->InsertCell(word);
+    word->SetPreviousWord(m_lastWordCell);
+    m_lastWordCell = word;
 }
 
+void wxHtmlWinParser::AddPreBlock(const wxString& text)
+{
+    if ( text.find('\t') != wxString::npos )
+    {
+        wxString text2;
+        text2.reserve(text.length());
+
+        const wxString::const_iterator end = text.end();
+        wxString::const_iterator copyFrom = text.begin();
+        size_t posFrom = 0;
+        size_t pos = 0;
+        int posColumn = m_posColumn;
+        for ( wxString::const_iterator i = copyFrom; i != end; ++i, ++pos )
+        {
+            if ( *i == '\t' )
+            {
+                if ( copyFrom != i )
+                    text2.append(copyFrom, i);
+
+                const unsigned SPACES_PER_TAB = 8;
+                const size_t expandTo = SPACES_PER_TAB - posColumn % SPACES_PER_TAB;
+                text2.append(expandTo, ' ');
+
+                posColumn += expandTo;
+                copyFrom = i + 1;
+                posFrom = pos + 1;
+            }
+            else
+            {
+                ++posColumn;
+            }
+        }
+        if ( copyFrom != text.end() )
+            text2.append(copyFrom, text.end());
+
+        AddWord(new wxHtmlWordWithTabsCell(text2, text, m_posColumn, *(GetDC())));
+
+        m_posColumn = posColumn;
+    }
+    else
+    {
+        // no special formatting needed
+        AddWord(text);
+        m_posColumn += text.length();
+    }
+}
 
 
 wxHtmlContainerCell* wxHtmlWinParser::OpenContainer()
 {
     m_Container = new wxHtmlContainerCell(m_Container);
     m_Container->SetAlignHor(m_Align);
+    m_posColumn = 0;
     m_tmpLastWasSpace = true;
         /* to avoid space being first character in paragraph */
     return m_Container;