preserve TAB characters when copying HTML <pre> content to clipboard

author Václav Slavík <vslavik@fastmail.fm>

Mon, 21 Apr 2008 10:46:30 +0000 (10:46 +0000)

committer Václav Slavík <vslavik@fastmail.fm>

Mon, 21 Apr 2008 10:46:30 +0000 (10:46 +0000)
author Václav Slavík <vslavik@fastmail.fm>
Mon, 21 Apr 2008 10:46:30 +0000 (10:46 +0000)
committer Václav Slavík <vslavik@fastmail.fm>
Mon, 21 Apr 2008 10:46:30 +0000 (10:46 +0000)
diff --git a/include/wx/html/htmlcell.h b/include/wx/html/htmlcell.h

index 4ec02db0b066bb879fa88ab87f1d849e1cc51f60..df6dfa709aa3adc77a0a0659bd27bc954b3a7e63 100644 (file)
--- a/include/wx/html/htmlcell.h
+++ b/include/wx/html/htmlcell.h
@@ -375,12 +375,17 @@ public:
      void Draw(wxDC& dc, int x, int y, int view_y1, int view_y2,
                wxHtmlRenderingInfo& info);
      virtual wxCursor GetMouseCursor(wxHtmlWindowInterface *window) const;
-    wxString ConvertToText(wxHtmlSelection *sel) const;
+    virtual wxString ConvertToText(wxHtmlSelection *sel) const;
      bool IsLinebreakAllowed() const { return m_allowLinebreak; }
  
      void SetPreviousWord(wxHtmlWordCell *cell);
  
  protected:
+    virtual wxString GetAllAsText() const
+        { return m_Word; }
+    virtual wxString GetPartAsText(int begin, int end) const
+        { return m_Word.Mid(begin, end - begin); }
+
      void SetSelectionPrivPos(const wxDC& dc, wxHtmlSelection *s) const;
      void Split(const wxDC& dc,
                 const wxPoint& selFrom, const wxPoint& selTo,
@@ -394,7 +399,28 @@ protected:
  };
  
  
+// wxHtmlWordCell specialization for storing text fragments with embedded
+// '\t's; these differ from normal words in that the displayed text is
+// different from the text copied to clipboard
+class WXDLLIMPEXP_HTML wxHtmlWordWithTabsCell : public wxHtmlWordCell
+{
+public:
+    wxHtmlWordWithTabsCell(const wxString& word,
+                           const wxString& wordOrig,
+                           size_t linepos,
+                           const wxDC& dc)
+        : wxHtmlWordCell(word, dc),
+          m_wordOrig(wordOrig),
+          m_linepos(linepos)
+    {}
+
+protected:
+    virtual wxString GetAllAsText() const;
+    virtual wxString GetPartAsText(int begin, int end) const;
  
+    wxString m_wordOrig;
+    size_t   m_linepos;
+};
  
  
  // Container contains other cells, thus forming tree structure of rendering
diff --git a/include/wx/html/winpars.h b/include/wx/html/winpars.h

index 8f554ce8c22baefe6e672ee67e4234d79e0a20b4..5835338889ef28f8d0cf8e20bb3f0f23c087752c 100644 (file)
--- a/include/wx/html/winpars.h
+++ b/include/wx/html/winpars.h
@@ -145,11 +145,25 @@ public:
      // creates font depending on m_Font* members.
      virtual wxFont* CreateCurrentFont();
  
+    enum WhitespaceMode
+    {
+        Whitespace_Normal,  // normal mode, collapse whitespace
+        Whitespace_Pre      // inside <pre>, keep whitespace as-is
+    };
+
+    // change the current whitespace handling mode
+    void SetWhitespaceMode(WhitespaceMode mode) { m_whitespaceMode = mode; }
+    WhitespaceMode GetWhitespaceMode() const { return m_whitespaceMode; }
+
  protected:
      virtual void AddText(const wxString& txt);
  
  private:
-    void DoAddText(wxChar *temp, int& templen);
+    void FlushWordBuf(wxChar *temp, int& len);
+    void AddWord(wxHtmlWordCell *word);
+    void AddWord(const wxString& word)
+        { AddWord(new wxHtmlWordCell(word, *(GetDC()))); }
+    void AddPreBlock(const wxString& text);
  
      bool m_tmpLastWasSpace;
      wxChar *m_tmpStrBuf;
@@ -207,8 +221,15 @@ private:
      wxEncodingConverter *m_EncConv;
  #endif
  
+    // current whitespace handling mode
+    WhitespaceMode m_whitespaceMode;
+
      wxHtmlWordCell *m_lastWordCell;
  
+    // current position on line, in num. of characters; used to properly
+    // expand TABs; only updated while inside <pre>
+    int m_posColumn;
+
      DECLARE_NO_COPY_CLASS(wxHtmlWinParser)
  };
  
diff --git a/src/html/htmlcell.cpp b/src/html/htmlcell.cpp

index 25099e75c8b430ee49760149569cf6994a626212..01d92107bd956baa5bb30964eaf99dbe7f321006 100644 (file)
--- a/src/html/htmlcell.cpp
+++ b/src/html/htmlcell.cpp
@@ -464,6 +464,8 @@ void wxHtmlWordCell::Split(const wxDC& dc,
  
      pos1 = i;
      pos2 = j;
+
+    wxASSERT( pos2 >= pos1 );
  }
  
  void wxHtmlWordCell::SetSelectionPrivPos(const wxDC& dc, wxHtmlSelection *s) const
@@ -617,6 +619,17 @@ void wxHtmlWordCell::Draw(wxDC& dc, int x, int y,
      }
  }
  
+wxCursor wxHtmlWordCell::GetMouseCursor(wxHtmlWindowInterface *window) const
+{
+    if ( !GetLink() )
+    {
+        return window->GetHTMLCursor(wxHtmlWindowInterface::HTMLCursor_Text);
+    }
+    else
+    {
+        return wxHtmlCell::GetMouseCursor(window);
+    }
+}
  
  wxString wxHtmlWordCell::ConvertToText(wxHtmlSelection *s) const
  {
@@ -634,29 +647,77 @@ wxString wxHtmlWordCell::ConvertToText(wxHtmlSelection *s) const
          // TODO: but this really needs to be fixed in some better way later...
          if ( priv != wxDefaultPosition )
          {
-            int part1 = priv.x;
-            int part2 = priv.y;
-            return m_Word.Mid(part1, part2-part1);
+            const int part1 = priv.x;
+            const int part2 = priv.y;
+            if ( part1 == part2 )
+                return wxEmptyString;
+            return GetPartAsText(part1, part2);
          }
          //else: return the whole word below
      }
  
-    return m_Word;
+    return GetAllAsText();
  }
  
-wxCursor wxHtmlWordCell::GetMouseCursor(wxHtmlWindowInterface *window) const
+wxString wxHtmlWordWithTabsCell::GetAllAsText() const
  {
-    if ( !GetLink() )
+    return m_wordOrig;
+}
+
+wxString wxHtmlWordWithTabsCell::GetPartAsText(int begin, int end) const
+{
+    // NB: The 'begin' and 'end' positions are in the _displayed_ text
+    //     (stored in m_Word) and not in the text with tabs that should
+    //     be copied to clipboard (m_wordOrig).
+    //
+    // NB: Because selection is performed on displayed text, it's possible
+    //     to select e.g. "half of TAB character" -- IOW, 'begin' and 'end'
+    //     may be in the middle of TAB character expansion into ' 's. In this
+    //     case, we copy the TAB character to clipboard once.
+
+    wxASSERT( begin < end );
+
+    const unsigned SPACES_PER_TAB = 8;
+
+    wxString sel;
+
+    int pos = 0;
+    wxString::const_iterator i = m_wordOrig.begin();
+
+    // find the beginning of text to copy:
+    for ( ; pos < begin; ++i )
      {
-        return window->GetHTMLCursor(wxHtmlWindowInterface::HTMLCursor_Text);
+        if ( *i == '\t' )
+        {
+            pos += 8 - (m_linepos + pos) % SPACES_PER_TAB;
+            if ( pos >= begin )
+            {
+                sel += '\t';
+            }
+        }
+        else
+        {
+            ++pos;
+        }
      }
-    else
+
+    // copy the content until we reach 'end':
+    for ( ; pos < end; ++i )
      {
-        return wxHtmlCell::GetMouseCursor(window);
+        const wxChar c = *i;
+        sel += c;
+
+        if ( c == '\t' )
+            pos += 8 - (m_linepos + pos) % SPACES_PER_TAB;
+        else
+            ++pos;
      }
+
+    return sel;
  }
  
  
+
  //-----------------------------------------------------------------------------
  // wxHtmlContainerCell
  //-----------------------------------------------------------------------------
diff --git a/src/html/m_pre.cpp b/src/html/m_pre.cpp

index 026ae9c01860ac398c8de459eea650f14df03cd7..b2a5cb89e555bf48a42f0dbd536554bdb6e071d2 100644 (file)
--- a/src/html/m_pre.cpp
+++ b/src/html/m_pre.cpp
@@ -28,40 +28,28 @@
  FORCE_LINK_ME(m_pre)
  
  // replaces '\t', ' ' and '\n' with HTML markup:
-static wxString LINKAGEMODE HtmlizeWhitespaces(const wxString& str)
+static wxString LINKAGEMODE HtmlizeLinebreaks(const wxString& str)
  {
      wxString out;
+    out.reserve(str.length()); // we'll certainly need at least that
+
      size_t len = str.Len();
-    size_t linepos = 0;
      for (size_t i = 0; i < len; i++)
      {
          switch ( str[i].GetValue() )
          {
-            case wxT('<'):
-                while (i < len && str[i] != wxT('>'))
+            case '<':
+                while (i < len && str[i] != '>')
                  {
                      out << str[i++];
                  }
-                out << wxT('>');
-                break;
-            case wxT(' '):
-                out << wxT("&nbsp;");
-                linepos++;
-                break;
-            case wxT('\n'):
-                out << wxT("<br>");
-                linepos = 0;
+                out << '>';
                  break;
-            case wxT('\t'):
-                {
-                    for (size_t j = 8 - linepos % 8; j > 0; j--)
-                        out << wxT("&nbsp;");
-                    linepos += 8 - linepos % 8;
-                }
+            case '\n':
+                out << "<br>";
                  break;
              default:
                  out << str[i];
-                linepos++;
                  break;
          }
      }
@@ -81,13 +69,16 @@ TAG_HANDLER_BEGIN(PRE, "PRE")
      {
          wxHtmlContainerCell *c;
  
-        int fixed = m_WParser->GetFontFixed(),
-            italic = m_WParser->GetFontItalic(),
-            underlined = m_WParser->GetFontUnderlined(),
-            bold = m_WParser->GetFontBold(),
-            fsize = m_WParser->GetFontSize();
+        const int fixed = m_WParser->GetFontFixed();
+        const int italic = m_WParser->GetFontItalic();
+        const int underlined = m_WParser->GetFontUnderlined();
+        const int bold = m_WParser->GetFontBold();
+        const int fsize = m_WParser->GetFontSize();
+        const wxHtmlWinParser::WhitespaceMode whitespace =
+            m_WParser->GetWhitespaceMode();
  
          c = m_WParser->GetContainer();
+        m_WParser->SetWhitespaceMode(wxHtmlWinParser::Whitespace_Pre);
          m_WParser->SetFontUnderlined(false);
          m_WParser->SetFontBold(false);
          m_WParser->SetFontItalic(false);
@@ -103,12 +94,17 @@ TAG_HANDLER_BEGIN(PRE, "PRE")
          c->SetIndent(m_WParser->GetCharHeight(), wxHTML_INDENT_TOP);
  
          wxString srcMid = m_WParser->GetInnerSource(tag);
-        ParseInnerSource(HtmlizeWhitespaces(srcMid));
+
+        // setting Whitespace_Pre mode takes care of spaces and TABs, but
+        // not linebreaks, so we have to translate them into <br> by
+        // calling HtmlizeLinebreaks() here
+        ParseInnerSource(HtmlizeLinebreaks(srcMid));
  
          m_WParser->CloseContainer();
          m_WParser->CloseContainer();
          c = m_WParser->OpenContainer();
  
+        m_WParser->SetWhitespaceMode(whitespace);
          m_WParser->SetFontUnderlined(underlined);
          m_WParser->SetFontBold(bold);
          m_WParser->SetFontItalic(italic);
diff --git a/src/html/winpars.cpp b/src/html/winpars.cpp

index 3a1aff2aab33f86669f8634c6fa0c5a6d2afd369..9b5adbdfb17dfff7824b924963a9e6d35c253ffb 100644 (file)
--- a/src/html/winpars.cpp
+++ b/src/html/winpars.cpp
@@ -52,7 +52,9 @@ wxHtmlWinParser::wxHtmlWinParser(wxHtmlWindowInterface *wndIface)
      m_InputEnc = wxFONTENCODING_ISO8859_1;
      m_OutputEnc = wxFONTENCODING_DEFAULT;
  #endif
+    m_whitespaceMode = Whitespace_Normal;
      m_lastWordCell = NULL;
+    m_posColumn = 0;
  
      {
          int i, j, k, l, m;
@@ -345,103 +347,176 @@ wxFSFile *wxHtmlWinParser::OpenURL(wxHtmlURLType type,
  
  void wxHtmlWinParser::AddText(const wxString& txt)
  {
-    register wxChar d;
-    int templen = 0;
+    #define NBSP_UNICODE_VALUE  (wxChar(160))
+#if !wxUSE_UNICODE
+    if ( m_nbsp == 0 )
+        m_nbsp = GetEntitiesParser()->GetCharForCode(NBSP_UNICODE_VALUE);
+    #define CUR_NBSP_VALUE m_nbsp
+#else
+    #define CUR_NBSP_VALUE NBSP_UNICODE_VALUE
+#endif
  
-    size_t lng = txt.length();
-    if (lng+1 > m_tmpStrBufSize)
+    if ( m_whitespaceMode == Whitespace_Normal )
      {
-        delete[] m_tmpStrBuf;
-        m_tmpStrBuf = new wxChar[lng+1];
-        m_tmpStrBufSize = lng+1;
-    }
-    wxChar *temp = m_tmpStrBuf;
-
-    wxString::const_iterator i = txt.begin();
-    wxString::const_iterator end = txt.end();
+        int templen = 0;
  
-    if (m_tmpLastWasSpace)
-    {
-        while ( (i < end) &&
-                (*i == wxT('\n') || *i == wxT('\r') || *i == wxT(' ') ||
-                 *i == wxT('\t')) )
+        size_t lng = txt.length();
+        if (lng+1 > m_tmpStrBufSize)
          {
-            ++i;
+            delete[] m_tmpStrBuf;
+            m_tmpStrBuf = new wxChar[lng+1];
+            m_tmpStrBufSize = lng+1;
          }
-    }
+        wxChar *temp = m_tmpStrBuf;
  
-    while (i < end)
-    {
-        size_t x = 0;
-        d = temp[templen++] = *i;
-        if ((d == wxT('\n')) || (d == wxT('\r')) || (d == wxT(' ')) || (d == wxT('\t')))
+        wxString::const_iterator i = txt.begin();
+        const wxString::const_iterator end = txt.end();
+
+        if (m_tmpLastWasSpace)
          {
-            ++i, ++x;
              while ( (i < end) &&
-                    (*i == wxT('\n') || *i == wxT('\r') ||
-                     *i == wxT(' ') || *i == wxT('\t')) )
+                    (*i == wxT('\n') || *i == wxT('\r') || *i == wxT(' ') ||
+                     *i == wxT('\t')) )
              {
                  ++i;
-                ++x;
              }
          }
-        else
-            ++i;
  
-        if (x)
+        while (i < end)
+        {
+            size_t x = 0;
+            wxChar d = *i;
+            if ((d == wxT('\n')) || (d == wxT('\r')) || (d == wxT(' ')) || (d == wxT('\t')))
+            {
+                ++i, ++x;
+                while ( (i < end) &&
+                        (*i == wxT('\n') || *i == wxT('\r') ||
+                         *i == wxT(' ') || *i == wxT('\t')) )
+                {
+                    ++i;
+                    ++x;
+                }
+            }
+            else
+            {
+                ++i;
+            }
+
+            if (d == CUR_NBSP_VALUE)
+                d = ' ';
+
+            temp[templen++] = d;
+
+            if (x)
+            {
+                temp[templen-1] = wxT(' ');
+                FlushWordBuf(temp, templen);
+                m_tmpLastWasSpace = true;
+            }
+        }
+
+        if (templen && (templen > 1 || temp[0] != wxT(' ')))
          {
-            temp[templen-1] = wxT(' ');
-            DoAddText(temp, templen);
-            m_tmpLastWasSpace = true;
+            FlushWordBuf(temp, templen);
+            m_tmpLastWasSpace = false;
          }
      }
-
-    if (templen && (templen > 1 || temp[0] != wxT(' ')))
+    else // m_whitespaceMode == Whitespace_Pre
      {
-        DoAddText(temp, templen);
+        if ( txt.find(CUR_NBSP_VALUE) != wxString::npos )
+        {
+            // we need to substitute spaces for &nbsp; here just like we
+            // did in the Whitespace_Normal branch above
+            wxString txt2(txt);
+            txt2.Replace(CUR_NBSP_VALUE, ' ');
+            AddPreBlock(txt2);
+        }
+        else
+        {
+            AddPreBlock(txt);
+        }
+
+        // don't eat any whitespace in <pre> block
          m_tmpLastWasSpace = false;
      }
  }
  
-void wxHtmlWinParser::DoAddText(wxChar *temp, int& templen)
+void wxHtmlWinParser::FlushWordBuf(wxChar *buf, int& len)
  {
-    #define NBSP_UNICODE_VALUE 160
-#if !wxUSE_UNICODE
-    if ( m_nbsp == 0 )
-        m_nbsp = GetEntitiesParser()->GetCharForCode(NBSP_UNICODE_VALUE);
-    #define CUR_NBSP_VALUE m_nbsp
-#else
-    #define CUR_NBSP_VALUE NBSP_UNICODE_VALUE
-#endif
+    buf[len] = 0;
  
-    temp[templen] = 0;
-    templen = 0;
  #if !wxUSE_UNICODE
      if (m_EncConv)
-        m_EncConv->Convert(temp);
+        m_EncConv->Convert(buf);
  #endif
-    size_t len = wxStrlen(temp);
-    for (size_t j = 0; j < len; j++)
-    {
-        if (temp[j] == CUR_NBSP_VALUE)
-            temp[j] = wxT(' ');
-    }
  
-    wxHtmlWordCell *c = new wxHtmlWordCell(temp, *(GetDC()));
+    AddWord(wxString(buf, len));
+
+    len = 0;
+}
  
-    ApplyStateToCell(c);
+void wxHtmlWinParser::AddWord(wxHtmlWordCell *word)
+{
+    ApplyStateToCell(word);
  
-    m_Container->InsertCell(c);
-    c->SetPreviousWord(m_lastWordCell);
-    m_lastWordCell = c;
+    m_Container->InsertCell(word);
+    word->SetPreviousWord(m_lastWordCell);
+    m_lastWordCell = word;
  }
  
+void wxHtmlWinParser::AddPreBlock(const wxString& text)
+{
+    if ( text.find('\t') != wxString::npos )
+    {
+        wxString text2;
+        text2.reserve(text.length());
+
+        const wxString::const_iterator end = text.end();
+        wxString::const_iterator copyFrom = text.begin();
+        size_t posFrom = 0;
+        size_t pos = 0;
+        int posColumn = m_posColumn;
+        for ( wxString::const_iterator i = copyFrom; i != end; ++i, ++pos )
+        {
+            if ( *i == '\t' )
+            {
+                if ( copyFrom != i )
+                    text2.append(copyFrom, i);
+
+                const unsigned SPACES_PER_TAB = 8;
+                const size_t expandTo = SPACES_PER_TAB - posColumn % SPACES_PER_TAB;
+                text2.append(expandTo, ' ');
+
+                posColumn += expandTo;
+                copyFrom = i + 1;
+                posFrom = pos + 1;
+            }
+            else
+            {
+                ++posColumn;
+            }
+        }
+        if ( copyFrom != text.end() )
+            text2.append(copyFrom, text.end());
+
+        AddWord(new wxHtmlWordWithTabsCell(text2, text, m_posColumn, *(GetDC())));
+
+        m_posColumn = posColumn;
+    }
+    else
+    {
+        // no special formatting needed
+        AddWord(text);
+        m_posColumn += text.length();
+    }
+}
  
  
  wxHtmlContainerCell* wxHtmlWinParser::OpenContainer()
  {
      m_Container = new wxHtmlContainerCell(m_Container);
      m_Container->SetAlignHor(m_Align);
+    m_posColumn = 0;
      m_tmpLastWasSpace = true;
          /* to avoid space being first character in paragraph */
      return m_Container;
author	Václav Slavík <vslavik@fastmail.fm>
	Mon, 21 Apr 2008 10:46:30 +0000 (10:46 +0000)
committer	Václav Slavík <vslavik@fastmail.fm>
	Mon, 21 Apr 2008 10:46:30 +0000 (10:46 +0000)
include/wx/html/htmlcell.h		patch \| blob \| blame \| history
include/wx/html/winpars.h		patch \| blob \| blame \| history
src/html/htmlcell.cpp		patch \| blob \| blame \| history
src/html/m_pre.cpp		patch \| blob \| blame \| history
src/html/winpars.cpp		patch \| blob \| blame \| history