2 * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #ifndef TextIterator_h
27 #define TextIterator_h
29 #include "InlineTextBox.h"
31 #include <wtf/Vector.h>
35 // FIXME: Can't really answer this question correctly without knowing the white-space mode.
36 // FIXME: Move this somewhere else in the editing directory. It doesn't belong here.
37 inline bool isCollapsibleWhitespace(UChar c
)
48 String
plainText(const Range
*);
49 UChar
* plainTextToMallocAllocatedBuffer(const Range
*, unsigned& bufferLength
, bool isDisplayString
);
50 PassRefPtr
<Range
> findPlainText(const Range
*, const String
&, bool forward
, bool caseSensitive
);
52 // Iterates through the DOM range, returning all the text, and 0-length boundaries
53 // at points where replaced elements break up the text flow. The text comes back in
54 // chunks so as to optimize for performance of the iteration.
59 explicit TextIterator(const Range
*, bool emitCharactersBetweenAllVisiblePositions
= false, bool enterTextControls
= false);
61 bool atEnd() const { return !m_positionNode
; }
64 int length() const { return m_textLength
; }
65 const UChar
* characters() const { return m_textCharacters
; }
67 PassRefPtr
<Range
> range() const;
70 static int rangeLength(const Range
*, bool spacesForReplacedElements
= false);
71 static PassRefPtr
<Range
> rangeFromLocationAndLength(Element
* scope
, int rangeLocation
, int rangeLength
, bool spacesForReplacedElements
= false);
72 static PassRefPtr
<Range
> subrange(Range
* entireRange
, int characterOffset
, int characterCount
);
76 bool shouldRepresentNodeOffsetZero();
77 bool shouldEmitSpaceBeforeAndAfterNode(Node
*);
78 void representNodeOffsetZero();
79 bool handleTextNode();
80 bool handleReplacedElement();
81 bool handleNonTextNode();
83 void emitCharacter(UChar
, Node
*textNode
, Node
*offsetBaseNode
, int textStartOffset
, int textEndOffset
);
84 void emitText(Node
*textNode
, int textStartOffset
, int textEndOffset
);
86 // Current position, not necessarily of the text being returned, but position
87 // as we walk through the DOM tree.
91 bool m_handledChildren
;
92 bool m_inShadowContent
;
95 Node
*m_startContainer
;
101 // The current text and its position, in the form to be returned from the iterator.
102 Node
*m_positionNode
;
103 mutable Node
*m_positionOffsetBaseNode
;
104 mutable int m_positionStartOffset
;
105 mutable int m_positionEndOffset
;
106 const UChar
* m_textCharacters
;
109 // Used when there is still some pending text from the current node; when these
110 // are false and 0, we go back to normal iterating.
111 bool m_needAnotherNewline
;
112 InlineTextBox
*m_textBox
;
114 // Used to do the whitespace collapsing logic.
115 Node
*m_lastTextNode
;
116 bool m_lastTextNodeEndedWithCollapsedSpace
;
117 UChar m_lastCharacter
;
119 // Used for whitespace characters that aren't in the DOM, so we can point at them.
120 UChar m_singleCharacterBuffer
;
122 // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text)
123 Vector
<InlineTextBox
*> m_sortedTextBoxes
;
124 size_t m_sortedTextBoxesPosition
;
126 // Used when deciding whether to emit a "positioning" (e.g. newline) before any other content
129 // Used by selection preservation code. There should be one character emitted between every VisiblePosition
130 // in the Range used to create the TextIterator.
131 // FIXME <rdar://problem/6028818>: This functionality should eventually be phased out when we rewrite
132 // moveParagraphs to not clone/destroy moved content.
133 bool m_emitCharactersBetweenAllVisiblePositions
;
134 bool m_enterTextControls
;
137 // Iterates through the DOM range, returning all the text, and 0-length boundaries
138 // at points where replaced elements break up the text flow. The text comes back in
139 // chunks so as to optimize for performance of the iteration.
140 class SimplifiedBackwardsTextIterator
{
142 SimplifiedBackwardsTextIterator();
143 explicit SimplifiedBackwardsTextIterator(const Range
*);
145 bool atEnd() const { return !m_positionNode
; }
148 int length() const { return m_textLength
; }
149 const UChar
* characters() const { return m_textCharacters
; }
151 PassRefPtr
<Range
> range() const;
155 bool handleTextNode();
156 bool handleReplacedElement();
157 bool handleNonTextNode();
158 void emitCharacter(UChar
, Node
*Node
, int startOffset
, int endOffset
);
160 // Current position, not necessarily of the text being returned, but position
161 // as we walk through the DOM tree.
165 bool m_handledChildren
;
170 // Start of the range.
174 // The current text and its position, in the form to be returned from the iterator.
175 Node
* m_positionNode
;
176 int m_positionStartOffset
;
177 int m_positionEndOffset
;
178 const UChar
* m_textCharacters
;
181 // Used to do the whitespace logic.
182 Node
* m_lastTextNode
;
183 UChar m_lastCharacter
;
185 // Used for whitespace characters that aren't in the DOM, so we can point at them.
186 UChar m_singleCharacterBuffer
;
188 // The node after the last node this iterator should process.
189 Node
* m_pastStartNode
;
192 // Builds on the text iterator, adding a character position so we can walk one
193 // character at a time, or faster, as needed. Useful for searching.
194 class CharacterIterator
{
197 explicit CharacterIterator(const Range
* r
, bool emitCharactersBetweenAllVisiblePositions
= false, bool enterTextControls
= false);
199 void advance(int numCharacters
);
201 bool atBreak() const { return m_atBreak
; }
202 bool atEnd() const { return m_textIterator
.atEnd(); }
204 int length() const { return m_textIterator
.length() - m_runOffset
; }
205 const UChar
* characters() const { return m_textIterator
.characters() + m_runOffset
; }
206 String
string(int numChars
);
208 int characterOffset() const { return m_offset
; }
209 PassRefPtr
<Range
> range() const;
216 TextIterator m_textIterator
;
219 class BackwardsCharacterIterator
{
221 BackwardsCharacterIterator();
222 explicit BackwardsCharacterIterator(const Range
*);
226 bool atEnd() const { return m_textIterator
.atEnd(); }
228 PassRefPtr
<Range
> range() const;
235 SimplifiedBackwardsTextIterator m_textIterator
;
238 // Very similar to the TextIterator, except that the chunks of text returned are "well behaved",
239 // meaning they never end split up a word. This is useful for spellcheck or (perhaps one day) searching.
240 class WordAwareIterator
{
243 explicit WordAwareIterator(const Range
*r
);
245 bool atEnd() const { return !m_didLookAhead
&& m_textIterator
.atEnd(); }
249 const UChar
* characters() const;
251 // Range of the text we're currently returning
252 PassRefPtr
<Range
> range() const { return m_range
; }
255 // text from the previous chunk from the textIterator
256 const UChar
* m_previousText
;
257 int m_previousLength
;
259 // many chunks from textIterator concatenated
260 Vector
<UChar
> m_buffer
;
262 // Did we have to look ahead in the textIterator to confirm the current chunk?
265 RefPtr
<Range
> m_range
;
267 TextIterator m_textIterator
;