src/stc/scintilla/src/Document.cxx

   1 // Scintilla source code edit control
   2 /** @file Document.cxx
   3  ** Text document that handles notifications, DBCS, styling, words and end of line.
   4  **/
   5 // Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org>
   6 // The License.txt file describes the conditions under which this software may be distributed.
   7
   8 #include <stdlib.h>
   9 #include <string.h>
  10 #include <stdio.h>
  11 #include <ctype.h>
  12 #include <assert.h>
  13
  14 #include <string>
  15 #include <vector>
  16
  17 #include "Platform.h"
  18
  19 #include "ILexer.h"
  20 #include "Scintilla.h"
  21
  22 #include "SplitVector.h"
  23 #include "Partitioning.h"
  24 #include "RunStyles.h"
  25 #include "CellBuffer.h"
  26 #include "PerLine.h"
  27 #include "CharClassify.h"
  28 #include "CharacterSet.h"
  29 #include "Decoration.h"
  30 #include "Document.h"
  31 #include "RESearch.h"
  32 #include "UniConversion.h"
  33
  34 #ifdef SCI_NAMESPACE
  35 using namespace Scintilla;
  36 #endif
  37
  38 static inline bool IsPunctuation(char ch) {
  39         return isascii(ch) && ispunct(ch);
  40 }
  41
  42 void LexInterface::Colourise(int start, int end) {
  43         if (pdoc && instance && !performingStyle) {
  44                 // Protect against reentrance, which may occur, for example, when
  45                 // fold points are discovered while performing styling and the folding
  46                 // code looks for child lines which may trigger styling.
  47                 performingStyle = true;
  48
  49                 int lengthDoc = pdoc->Length();
  50                 if (end == -1)
  51                         end = lengthDoc;
  52                 int len = end - start;
  53
  54                 PLATFORM_ASSERT(len >= 0);
  55                 PLATFORM_ASSERT(start + len <= lengthDoc);
  56
  57                 int styleStart = 0;
  58                 if (start > 0)
  59                         styleStart = pdoc->StyleAt(start - 1) & pdoc->stylingBitsMask;
  60
  61                 if (len > 0) {
  62                         instance->Lex(start, len, styleStart, pdoc);
  63                         instance->Fold(start, len, styleStart, pdoc);
  64                 }
  65
  66                 performingStyle = false;
  67         }
  68 }
  69
  70 Document::Document() {
  71         refCount = 0;
  72 #ifdef _WIN32
  73         eolMode = SC_EOL_CRLF;
  74 #else
  75         eolMode = SC_EOL_LF;
  76 #endif
  77         dbcsCodePage = 0;
  78         stylingBits = 5;
  79         stylingBitsMask = 0x1F;
  80         stylingMask = 0;
  81         endStyled = 0;
  82         styleClock = 0;
  83         enteredModification = 0;
  84         enteredStyling = 0;
  85         enteredReadOnlyCount = 0;
  86         tabInChars = 8;
  87         indentInChars = 0;
  88         actualIndentInChars = 8;
  89         useTabs = true;
  90         tabIndents = true;
  91         backspaceUnindents = false;
  92         watchers = 0;
  93         lenWatchers = 0;
  94
  95         matchesValid = false;
  96         regex = 0;
  97
  98         UTF8BytesOfLeadInitialise();
  99
 100         perLineData[ldMarkers] = new LineMarkers();
 101         perLineData[ldLevels] = new LineLevels();
 102         perLineData[ldState] = new LineState();
 103         perLineData[ldMargin] = new LineAnnotation();
 104         perLineData[ldAnnotation] = new LineAnnotation();
 105
 106         cb.SetPerLine(this);
 107
 108         pli = 0;
 109 }
 110
 111 Document::~Document() {
 112         for (int i = 0; i < lenWatchers; i++) {
 113                 watchers[i].watcher->NotifyDeleted(this, watchers[i].userData);
 114         }
 115         delete []watchers;
 116         for (int j=0; j<ldSize; j++) {
 117                 delete perLineData[j];
 118                 perLineData[j] = 0;
 119         }
 120         watchers = 0;
 121         lenWatchers = 0;
 122         delete regex;
 123         regex = 0;
 124         delete pli;
 125         pli = 0;
 126 }
 127
 128 void Document::Init() {
 129         for (int j=0; j<ldSize; j++) {
 130                 if (perLineData[j])
 131                         perLineData[j]->Init();
 132         }
 133 }
 134
 135 void Document::InsertLine(int line) {
 136         for (int j=0; j<ldSize; j++) {
 137                 if (perLineData[j])
 138                         perLineData[j]->InsertLine(line);
 139         }
 140 }
 141
 142 void Document::RemoveLine(int line) {
 143         for (int j=0; j<ldSize; j++) {
 144                 if (perLineData[j])
 145                         perLineData[j]->RemoveLine(line);
 146         }
 147 }
 148
 149 // Increase reference count and return its previous value.
 150 int Document::AddRef() {
 151         return refCount++;
 152 }
 153
 154 // Decrease reference count and return its previous value.
 155 // Delete the document if reference count reaches zero.
 156 int SCI_METHOD Document::Release() {
 157         int curRefCount = --refCount;
 158         if (curRefCount == 0)
 159                 delete this;
 160         return curRefCount;
 161 }
 162
 163 void Document::SetSavePoint() {
 164         cb.SetSavePoint();
 165         NotifySavePoint(true);
 166 }
 167
 168 int Document::GetMark(int line) {
 169         return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkValue(line);
 170 }
 171
 172 int Document::MarkerNext(int lineStart, int mask) const {
 173         return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkerNext(lineStart, mask);
 174 }
 175
 176 int Document::AddMark(int line, int markerNum) {
 177         if (line >= 0 && line <= LinesTotal()) {
 178                 int prev = static_cast<LineMarkers *>(perLineData[ldMarkers])->
 179                         AddMark(line, markerNum, LinesTotal());
 180                 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
 181                 NotifyModified(mh);
 182                 return prev;
 183         } else {
 184                 return 0;
 185         }
 186 }
 187
 188 void Document::AddMarkSet(int line, int valueSet) {
 189         if (line < 0 || line > LinesTotal()) {
 190                 return;
 191         }
 192         unsigned int m = valueSet;
 193         for (int i = 0; m; i++, m >>= 1)
 194                 if (m & 1)
 195                         static_cast<LineMarkers *>(perLineData[ldMarkers])->
 196                                 AddMark(line, i, LinesTotal());
 197         DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
 198         NotifyModified(mh);
 199 }
 200
 201 void Document::DeleteMark(int line, int markerNum) {
 202         static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, false);
 203         DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
 204         NotifyModified(mh);
 205 }
 206
 207 void Document::DeleteMarkFromHandle(int markerHandle) {
 208         static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMarkFromHandle(markerHandle);
 209         DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
 210         mh.line = -1;
 211         NotifyModified(mh);
 212 }
 213
 214 void Document::DeleteAllMarks(int markerNum) {
 215         bool someChanges = false;
 216         for (int line = 0; line < LinesTotal(); line++) {
 217                 if (static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, true))
 218                         someChanges = true;
 219         }
 220         if (someChanges) {
 221                 DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
 222                 mh.line = -1;
 223                 NotifyModified(mh);
 224         }
 225 }
 226
 227 int Document::LineFromHandle(int markerHandle) {
 228         return static_cast<LineMarkers *>(perLineData[ldMarkers])->LineFromHandle(markerHandle);
 229 }
 230
 231 int SCI_METHOD Document::LineStart(int line) const {
 232         return cb.LineStart(line);
 233 }
 234
 235 int Document::LineEnd(int line) const {
 236         if (line == LinesTotal() - 1) {
 237                 return LineStart(line + 1);
 238         } else {
 239                 int position = LineStart(line + 1) - 1;
 240                 // When line terminator is CR+LF, may need to go back one more
 241                 if ((position > LineStart(line)) && (cb.CharAt(position - 1) == '\r')) {
 242                         position--;
 243                 }
 244                 return position;
 245         }
 246 }
 247
 248 void SCI_METHOD Document::SetErrorStatus(int status) {
 249         // Tell the watchers the lexer has changed.
 250         for (int i = 0; i < lenWatchers; i++) {
 251                 watchers[i].watcher->NotifyErrorOccurred(this, watchers[i].userData, status);
 252         }
 253 }
 254
 255 int SCI_METHOD Document::LineFromPosition(int pos) const {
 256         return cb.LineFromPosition(pos);
 257 }
 258
 259 int Document::LineEndPosition(int position) const {
 260         return LineEnd(LineFromPosition(position));
 261 }
 262
 263 bool Document::IsLineEndPosition(int position) const {
 264         return LineEnd(LineFromPosition(position)) == position;
 265 }
 266
 267 int Document::VCHomePosition(int position) const {
 268         int line = LineFromPosition(position);
 269         int startPosition = LineStart(line);
 270         int endLine = LineEnd(line);
 271         int startText = startPosition;
 272         while (startText < endLine && (cb.CharAt(startText) == ' ' || cb.CharAt(startText) == '\t'))
 273                 startText++;
 274         if (position == startText)
 275                 return startPosition;
 276         else
 277                 return startText;
 278 }
 279
 280 int SCI_METHOD Document::SetLevel(int line, int level) {
 281         int prev = static_cast<LineLevels *>(perLineData[ldLevels])->SetLevel(line, level, LinesTotal());
 282         if (prev != level) {
 283                 DocModification mh(SC_MOD_CHANGEFOLD | SC_MOD_CHANGEMARKER,
 284                                    LineStart(line), 0, 0, 0, line);
 285                 mh.foldLevelNow = level;
 286                 mh.foldLevelPrev = prev;
 287                 NotifyModified(mh);
 288         }
 289         return prev;
 290 }
 291
 292 int SCI_METHOD Document::GetLevel(int line) const {
 293         return static_cast<LineLevels *>(perLineData[ldLevels])->GetLevel(line);
 294 }
 295
 296 void Document::ClearLevels() {
 297         static_cast<LineLevels *>(perLineData[ldLevels])->ClearLevels();
 298 }
 299
 300 static bool IsSubordinate(int levelStart, int levelTry) {
 301         if (levelTry & SC_FOLDLEVELWHITEFLAG)
 302                 return true;
 303         else
 304                 return (levelStart & SC_FOLDLEVELNUMBERMASK) < (levelTry & SC_FOLDLEVELNUMBERMASK);
 305 }
 306
 307 int Document::GetLastChild(int lineParent, int level, int lastLine) {
 308         if (level == -1)
 309                 level = GetLevel(lineParent) & SC_FOLDLEVELNUMBERMASK;
 310         int maxLine = LinesTotal();
 311         int lookLastLine = (lastLine != -1) ? Platform::Minimum(LinesTotal() - 1, lastLine) : -1;
 312         int lineMaxSubord = lineParent;
 313         while (lineMaxSubord < maxLine - 1) {
 314                 EnsureStyledTo(LineStart(lineMaxSubord + 2));
 315                 if (!IsSubordinate(level, GetLevel(lineMaxSubord + 1)))
 316                         break;
 317                 if ((lookLastLine != -1) && (lineMaxSubord >= lookLastLine) && !(GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG))
 318                         break;
 319                 lineMaxSubord++;
 320         }
 321         if (lineMaxSubord > lineParent) {
 322                 if (level > (GetLevel(lineMaxSubord + 1) & SC_FOLDLEVELNUMBERMASK)) {
 323                         // Have chewed up some whitespace that belongs to a parent so seek back
 324                         if (GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG) {
 325                                 lineMaxSubord--;
 326                         }
 327                 }
 328         }
 329         return lineMaxSubord;
 330 }
 331
 332 int Document::GetFoldParent(int line) {
 333         int level = GetLevel(line) & SC_FOLDLEVELNUMBERMASK;
 334         int lineLook = line - 1;
 335         while ((lineLook > 0) && (
 336                     (!(GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG)) ||
 337                     ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) >= level))
 338               ) {
 339                 lineLook--;
 340         }
 341         if ((GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG) &&
 342                 ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) < level)) {
 343                 return lineLook;
 344         } else {
 345                 return -1;
 346         }
 347 }
 348
 349 void Document::GetHighlightDelimiters(HighlightDelimiter &highlightDelimiter, int line, int lastLine) {
 350         int level = GetLevel(line);
 351         int lookLastLine = Platform::Maximum(line, lastLine) + 1;
 352
 353         int lookLine = line;
 354         int lookLineLevel = level;
 355         int lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 356         while ((lookLine > 0) && ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) ||
 357                 ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum >= (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))))) {
 358                 lookLineLevel = GetLevel(--lookLine);
 359                 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 360         }
 361
 362         int beginFoldBlock = (lookLineLevel & SC_FOLDLEVELHEADERFLAG) ? lookLine : GetFoldParent(lookLine);
 363         if (beginFoldBlock == -1) {
 364                 highlightDelimiter.Clear();
 365                 return;
 366         }
 367
 368         int endFoldBlock = GetLastChild(beginFoldBlock, -1, lookLastLine);
 369         int firstChangeableLineBefore = -1;
 370         if (endFoldBlock < line) {
 371                 lookLine = beginFoldBlock - 1;
 372                 lookLineLevel = GetLevel(lookLine);
 373                 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 374                 while ((lookLine >= 0) && (lookLineLevelNum >= SC_FOLDLEVELBASE)) {
 375                         if (lookLineLevel & SC_FOLDLEVELHEADERFLAG) {
 376                                 if (GetLastChild(lookLine, -1, lookLastLine) == line) {
 377                                         beginFoldBlock = lookLine;
 378                                         endFoldBlock = line;
 379                                         firstChangeableLineBefore = line - 1;
 380                                 }
 381                         }
 382                         if ((lookLine > 0) && (lookLineLevelNum == SC_FOLDLEVELBASE) && ((GetLevel(lookLine - 1) & SC_FOLDLEVELNUMBERMASK) > lookLineLevelNum))
 383                                 break;
 384                         lookLineLevel = GetLevel(--lookLine);
 385                         lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 386                 }
 387         }
 388         if (firstChangeableLineBefore == -1) {
 389                 for (lookLine = line - 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 390                         lookLine >= beginFoldBlock;
 391                         lookLineLevel = GetLevel(--lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
 392                         if ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) || (lookLineLevelNum > (level & SC_FOLDLEVELNUMBERMASK))) {
 393                                 firstChangeableLineBefore = lookLine;
 394                                 break;
 395                         }
 396                 }
 397         }
 398         if (firstChangeableLineBefore == -1)
 399                 firstChangeableLineBefore = beginFoldBlock - 1;
 400
 401         int firstChangeableLineAfter = -1;
 402         for (lookLine = line + 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 403                 lookLine <= endFoldBlock;
 404                 lookLineLevel = GetLevel(++lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
 405                 if ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum < (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))) {
 406                         firstChangeableLineAfter = lookLine;
 407                         break;
 408                 }
 409         }
 410         if (firstChangeableLineAfter == -1)
 411                 firstChangeableLineAfter = endFoldBlock + 1;
 412
 413         highlightDelimiter.beginFoldBlock = beginFoldBlock;
 414         highlightDelimiter.endFoldBlock = endFoldBlock;
 415         highlightDelimiter.firstChangeableLineBefore = firstChangeableLineBefore;
 416         highlightDelimiter.firstChangeableLineAfter = firstChangeableLineAfter;
 417 }
 418
 419 int Document::ClampPositionIntoDocument(int pos) {
 420         return Platform::Clamp(pos, 0, Length());
 421 }
 422
 423 bool Document::IsCrLf(int pos) {
 424         if (pos < 0)
 425                 return false;
 426         if (pos >= (Length() - 1))
 427                 return false;
 428         return (cb.CharAt(pos) == '\r') && (cb.CharAt(pos + 1) == '\n');
 429 }
 430
 431 int Document::LenChar(int pos) {
 432         if (pos < 0) {
 433                 return 1;
 434         } else if (IsCrLf(pos)) {
 435                 return 2;
 436         } else if (SC_CP_UTF8 == dbcsCodePage) {
 437                 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
 438                 const int widthCharBytes = UTF8BytesOfLead[leadByte];
 439                 int lengthDoc = Length();
 440                 if ((pos + widthCharBytes) > lengthDoc)
 441                         return lengthDoc - pos;
 442                 else
 443                         return widthCharBytes;
 444         } else if (dbcsCodePage) {
 445                 return IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
 446         } else {
 447                 return 1;
 448         }
 449 }
 450
 451 bool Document::InGoodUTF8(int pos, int &start, int &end) const {
 452         int trail = pos;
 453         while ((trail>0) && (pos-trail < UTF8MaxBytes) && UTF8IsTrailByte(static_cast<unsigned char>(cb.CharAt(trail-1))))
 454                 trail--;
 455         start = (trail > 0) ? trail-1 : trail;
 456
 457         const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(start));
 458         const int widthCharBytes = UTF8BytesOfLead[leadByte];
 459         if (widthCharBytes == 1) {
 460                 return false;
 461         } else {
 462                 int trailBytes = widthCharBytes - 1;
 463                 int len = pos - start;
 464                 if (len > trailBytes)
 465                         // pos too far from lead
 466                         return false;
 467                 char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
 468                 for (int b=1; b<widthCharBytes && ((start+b) < Length()); b++)
 469                         charBytes[b] = cb.CharAt(static_cast<int>(start+b));
 470                 int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
 471                 if (utf8status & UTF8MaskInvalid)
 472                         return false;
 473                 end = start + widthCharBytes;
 474                 return true;
 475         }
 476 }
 477
 478 // Normalise a position so that it is not halfway through a two byte character.
 479 // This can occur in two situations -
 480 // When lines are terminated with \r\n pairs which should be treated as one character.
 481 // When displaying DBCS text such as Japanese.
 482 // If moving, move the position in the indicated direction.
 483 int Document::MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd) {
 484         //Platform::DebugPrintf("NoCRLF %d %d\n", pos, moveDir);
 485         // If out of range, just return minimum/maximum value.
 486         if (pos <= 0)
 487                 return 0;
 488         if (pos >= Length())
 489                 return Length();
 490
 491         // PLATFORM_ASSERT(pos > 0 && pos < Length());
 492         if (checkLineEnd && IsCrLf(pos - 1)) {
 493                 if (moveDir > 0)
 494                         return pos + 1;
 495                 else
 496                         return pos - 1;
 497         }
 498
 499         if (dbcsCodePage) {
 500                 if (SC_CP_UTF8 == dbcsCodePage) {
 501                         unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
 502                         // If ch is not a trail byte then pos is valid intercharacter position
 503                         if (UTF8IsTrailByte(ch)) {
 504                                 int startUTF = pos;
 505                                 int endUTF = pos;
 506                                 if (InGoodUTF8(pos, startUTF, endUTF)) {
 507                                         // ch is a trail byte within a UTF-8 character
 508                                         if (moveDir > 0)
 509                                                 pos = endUTF;
 510                                         else
 511                                                 pos = startUTF;
 512                                 }
 513                                 // Else invalid UTF-8 so return position of isolated trail byte
 514                         }
 515                 } else {
 516                         // Anchor DBCS calculations at start of line because start of line can
 517                         // not be a DBCS trail byte.
 518                         int posStartLine = LineStart(LineFromPosition(pos));
 519                         if (pos == posStartLine)
 520                                 return pos;
 521
 522                         // Step back until a non-lead-byte is found.
 523                         int posCheck = pos;
 524                         while ((posCheck > posStartLine) && IsDBCSLeadByte(cb.CharAt(posCheck-1)))
 525                                 posCheck--;
 526
 527                         // Check from known start of character.
 528                         while (posCheck < pos) {
 529                                 int mbsize = IsDBCSLeadByte(cb.CharAt(posCheck)) ? 2 : 1;
 530                                 if (posCheck + mbsize == pos) {
 531                                         return pos;
 532                                 } else if (posCheck + mbsize > pos) {
 533                                         if (moveDir > 0) {
 534                                                 return posCheck + mbsize;
 535                                         } else {
 536                                                 return posCheck;
 537                                         }
 538                                 }
 539                                 posCheck += mbsize;
 540                         }
 541                 }
 542         }
 543
 544         return pos;
 545 }
 546
 547 // NextPosition moves between valid positions - it can not handle a position in the middle of a
 548 // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar.
 549 // A \r\n pair is treated as two characters.
 550 int Document::NextPosition(int pos, int moveDir) const {
 551         // If out of range, just return minimum/maximum value.
 552         int increment = (moveDir > 0) ? 1 : -1;
 553         if (pos + increment <= 0)
 554                 return 0;
 555         if (pos + increment >= Length())
 556                 return Length();
 557
 558         if (dbcsCodePage) {
 559                 if (SC_CP_UTF8 == dbcsCodePage) {
 560                         if (increment == 1) {
 561                                 // Simple forward movement case so can avoid some checks
 562                                 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
 563                                 if (UTF8IsAscii(leadByte)) {
 564                                         // Single byte character or invalid
 565                                         pos++;
 566                                 } else {
 567                                         const int widthCharBytes = UTF8BytesOfLead[leadByte];
 568                                         char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
 569                                         for (int b=1; b<widthCharBytes; b++)
 570                                                 charBytes[b] = cb.CharAt(static_cast<int>(pos+b));
 571                                         int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
 572                                         if (utf8status & UTF8MaskInvalid)
 573                                                 pos++;
 574                                         else
 575                                                 pos += utf8status & UTF8MaskWidth;
 576                                 }
 577                         } else {
 578                                 // Examine byte before position
 579                                 pos--;
 580                                 unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
 581                                 // If ch is not a trail byte then pos is valid intercharacter position
 582                                 if (UTF8IsTrailByte(ch)) {
 583                                         // If ch is a trail byte in a valid UTF-8 character then return start of character
 584                                         int startUTF = pos;
 585                                         int endUTF = pos;
 586                                         if (InGoodUTF8(pos, startUTF, endUTF)) {
 587                                                 pos = startUTF;
 588                                         }
 589                                         // Else invalid UTF-8 so return position of isolated trail byte
 590                                 }
 591                         }
 592                 } else {
 593                         if (moveDir > 0) {
 594                                 int mbsize = IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
 595                                 pos += mbsize;
 596                                 if (pos > Length())
 597                                         pos = Length();
 598                         } else {
 599                                 // Anchor DBCS calculations at start of line because start of line can
 600                                 // not be a DBCS trail byte.
 601                                 int posStartLine = LineStart(LineFromPosition(pos));
 602                                 // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx
 603                                 // http://msdn.microsoft.com/en-us/library/cc194790.aspx
 604                                 if ((pos - 1) <= posStartLine) {
 605                                         return pos - 1;
 606                                 } else if (IsDBCSLeadByte(cb.CharAt(pos - 1))) {
 607                                         // Must actually be trail byte
 608                                         return pos - 2;
 609                                 } else {
 610                                         // Otherwise, step back until a non-lead-byte is found.
 611                                         int posTemp = pos - 1;
 612                                         while (posStartLine <= --posTemp && IsDBCSLeadByte(cb.CharAt(posTemp)))
 613                                                 ;
 614                                         // Now posTemp+1 must point to the beginning of a character,
 615                                         // so figure out whether we went back an even or an odd
 616                                         // number of bytes and go back 1 or 2 bytes, respectively.
 617                                         return (pos - 1 - ((pos - posTemp) & 1));
 618                                 }
 619                         }
 620                 }
 621         } else {
 622                 pos += increment;
 623         }
 624
 625         return pos;
 626 }
 627
 628 bool Document::NextCharacter(int &pos, int moveDir) {
 629         // Returns true if pos changed
 630         int posNext = NextPosition(pos, moveDir);
 631         if (posNext == pos) {
 632                 return false;
 633         } else {
 634                 pos = posNext;
 635                 return true;
 636         }
 637 }
 638
 639 int SCI_METHOD Document::CodePage() const {
 640         return dbcsCodePage;
 641 }
 642
 643 bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const {
 644         // Byte ranges found in Wikipedia articles with relevant search strings in each case
 645         unsigned char uch = static_cast<unsigned char>(ch);
 646         switch (dbcsCodePage) {
 647                 case 932:
 648                         // Shift_jis
 649                         return ((uch >= 0x81) && (uch <= 0x9F)) ||
 650                                 ((uch >= 0xE0) && (uch <= 0xFC));
 651                                 // Lead bytes F0 to FC may be a Microsoft addition.
 652                 case 936:
 653                         // GBK
 654                         return (uch >= 0x81) && (uch <= 0xFE);
 655                 case 949:
 656                         // Korean Wansung KS C-5601-1987
 657                         return (uch >= 0x81) && (uch <= 0xFE);
 658                 case 950:
 659                         // Big5
 660                         return (uch >= 0x81) && (uch <= 0xFE);
 661                 case 1361:
 662                         // Korean Johab KS C-5601-1992
 663                         return
 664                                 ((uch >= 0x84) && (uch <= 0xD3)) ||
 665                                 ((uch >= 0xD8) && (uch <= 0xDE)) ||
 666                                 ((uch >= 0xE0) && (uch <= 0xF9));
 667         }
 668         return false;
 669 }
 670
 671 static inline bool IsSpaceOrTab(int ch) {
 672         return ch == ' ' || ch == '\t';
 673 }
 674
 675 // Need to break text into segments near lengthSegment but taking into
 676 // account the encoding to not break inside a UTF-8 or DBCS character
 677 // and also trying to avoid breaking inside a pair of combining characters.
 678 // The segment length must always be long enough (more than 4 bytes)
 679 // so that there will be at least one whole character to make a segment.
 680 // For UTF-8, text must consist only of valid whole characters.
 681 // In preference order from best to worst:
 682 //   1) Break after space
 683 //   2) Break before punctuation
 684 //   3) Break after whole character
 685
 686 int Document::SafeSegment(const char *text, int length, int lengthSegment) {
 687         if (length <= lengthSegment)
 688                 return length;
 689         int lastSpaceBreak = -1;
 690         int lastPunctuationBreak = -1;
 691         int lastEncodingAllowedBreak = -1;
 692         for (int j=0; j < lengthSegment;) {
 693                 unsigned char ch = static_cast<unsigned char>(text[j]);
 694                 if (j > 0) {
 695                         if (IsSpaceOrTab(text[j - 1]) && !IsSpaceOrTab(text[j])) {
 696                                 lastSpaceBreak = j;
 697                         }
 698                         if (ch < 'A') {
 699                                 lastPunctuationBreak = j;
 700                         }
 701                 }
 702                 lastEncodingAllowedBreak = j;
 703
 704                 if (dbcsCodePage == SC_CP_UTF8) {
 705                         j += UTF8BytesOfLead[ch];
 706                 } else if (dbcsCodePage) {
 707                         j += IsDBCSLeadByte(ch) ? 2 : 1;
 708                 } else {
 709                         j++;
 710                 }
 711         }
 712         if (lastSpaceBreak >= 0) {
 713                 return lastSpaceBreak;
 714         } else if (lastPunctuationBreak >= 0) {
 715                 return lastPunctuationBreak;
 716         }
 717         return lastEncodingAllowedBreak;
 718 }
 719
 720 void Document::ModifiedAt(int pos) {
 721         if (endStyled > pos)
 722                 endStyled = pos;
 723 }
 724
 725 void Document::CheckReadOnly() {
 726         if (cb.IsReadOnly() && enteredReadOnlyCount == 0) {
 727                 enteredReadOnlyCount++;
 728                 NotifyModifyAttempt();
 729                 enteredReadOnlyCount--;
 730         }
 731 }
 732
 733 // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt.
 734 // SetStyleAt does not change the persistent state of a document
 735
 736 bool Document::DeleteChars(int pos, int len) {
 737         if (len <= 0)
 738                 return false;
 739         if ((pos + len) > Length())
 740                 return false;
 741         CheckReadOnly();
 742         if (enteredModification != 0) {
 743                 return false;
 744         } else {
 745                 enteredModification++;
 746                 if (!cb.IsReadOnly()) {
 747                         NotifyModified(
 748                             DocModification(
 749                                 SC_MOD_BEFOREDELETE | SC_PERFORMED_USER,
 750                                 pos, len,
 751                                 0, 0));
 752                         int prevLinesTotal = LinesTotal();
 753                         bool startSavePoint = cb.IsSavePoint();
 754                         bool startSequence = false;
 755                         const char *text = cb.DeleteChars(pos, len, startSequence);
 756                         if (startSavePoint && cb.IsCollectingUndo())
 757                                 NotifySavePoint(!startSavePoint);
 758                         if ((pos < Length()) || (pos == 0))
 759                                 ModifiedAt(pos);
 760                         else
 761                                 ModifiedAt(pos-1);
 762                         NotifyModified(
 763                             DocModification(
 764                                 SC_MOD_DELETETEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
 765                                 pos, len,
 766                                 LinesTotal() - prevLinesTotal, text));
 767                 }
 768                 enteredModification--;
 769         }
 770         return !cb.IsReadOnly();
 771 }
 772
 773 /**
 774  * Insert a string with a length.
 775  */
 776 bool Document::InsertString(int position, const char *s, int insertLength) {
 777         if (insertLength <= 0) {
 778                 return false;
 779         }
 780         CheckReadOnly();
 781         if (enteredModification != 0) {
 782                 return false;
 783         } else {
 784                 enteredModification++;
 785                 if (!cb.IsReadOnly()) {
 786                         NotifyModified(
 787                             DocModification(
 788                                 SC_MOD_BEFOREINSERT | SC_PERFORMED_USER,
 789                                 position, insertLength,
 790                                 0, s));
 791                         int prevLinesTotal = LinesTotal();
 792                         bool startSavePoint = cb.IsSavePoint();
 793                         bool startSequence = false;
 794                         const char *text = cb.InsertString(position, s, insertLength, startSequence);
 795                         if (startSavePoint && cb.IsCollectingUndo())
 796                                 NotifySavePoint(!startSavePoint);
 797                         ModifiedAt(position);
 798                         NotifyModified(
 799                             DocModification(
 800                                 SC_MOD_INSERTTEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
 801                                 position, insertLength,
 802                                 LinesTotal() - prevLinesTotal, text));
 803                 }
 804                 enteredModification--;
 805         }
 806         return !cb.IsReadOnly();
 807 }
 808
 809 int SCI_METHOD Document::AddData(char *data, int length) {
 810         try {
 811                 int position = Length();
 812                 InsertString(position,data, length);
 813         } catch (std::bad_alloc &) {
 814                 return SC_STATUS_BADALLOC;
 815         } catch (...) {
 816                 return SC_STATUS_FAILURE;
 817         }
 818         return 0;
 819 }
 820
 821 void * SCI_METHOD Document::ConvertToDocument() {
 822         return this;
 823 }
 824
 825 int Document::Undo() {
 826         int newPos = -1;
 827         CheckReadOnly();
 828         if (enteredModification == 0) {
 829                 enteredModification++;
 830                 if (!cb.IsReadOnly()) {
 831                         bool startSavePoint = cb.IsSavePoint();
 832                         bool multiLine = false;
 833                         int steps = cb.StartUndo();
 834                         //Platform::DebugPrintf("Steps=%d\n", steps);
 835                         int coalescedRemovePos = -1;
 836                         int coalescedRemoveLen = 0;
 837                         int prevRemoveActionPos = -1;
 838                         int prevRemoveActionLen = 0;
 839                         for (int step = 0; step < steps; step++) {
 840                                 const int prevLinesTotal = LinesTotal();
 841                                 const Action &action = cb.GetUndoStep();
 842                                 if (action.at == removeAction) {
 843                                         NotifyModified(DocModification(
 844                                                                         SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
 845                                 } else if (action.at == containerAction) {
 846                                         DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
 847                                         dm.token = action.position;
 848                                         NotifyModified(dm);
 849                                         if (!action.mayCoalesce) {
 850                                                 coalescedRemovePos = -1;
 851                                                 coalescedRemoveLen = 0;
 852                                                 prevRemoveActionPos = -1;
 853                                                 prevRemoveActionLen = 0;
 854                                         }
 855                                 } else {
 856                                         NotifyModified(DocModification(
 857                                                                         SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
 858                                 }
 859                                 cb.PerformUndoStep();
 860                                 if (action.at != containerAction) {
 861                                         ModifiedAt(action.position);
 862                                         newPos = action.position;
 863                                 }
 864
 865                                 int modFlags = SC_PERFORMED_UNDO;
 866                                 // With undo, an insertion action becomes a deletion notification
 867                                 if (action.at == removeAction) {
 868                                         newPos += action.lenData;
 869                                         modFlags |= SC_MOD_INSERTTEXT;
 870                                         if ((coalescedRemoveLen > 0) &&
 871                                                 (action.position == prevRemoveActionPos || action.position == (prevRemoveActionPos + prevRemoveActionLen))) {
 872                                                 coalescedRemoveLen += action.lenData;
 873                                                 newPos = coalescedRemovePos + coalescedRemoveLen;
 874                                         } else {
 875                                                 coalescedRemovePos = action.position;
 876                                                 coalescedRemoveLen = action.lenData;
 877                                         }
 878                                         prevRemoveActionPos = action.position;
 879                                         prevRemoveActionLen = action.lenData;
 880                                 } else if (action.at == insertAction) {
 881                                         modFlags |= SC_MOD_DELETETEXT;
 882                                         coalescedRemovePos = -1;
 883                                         coalescedRemoveLen = 0;
 884                                         prevRemoveActionPos = -1;
 885                                         prevRemoveActionLen = 0;
 886                                 }
 887                                 if (steps > 1)
 888                                         modFlags |= SC_MULTISTEPUNDOREDO;
 889                                 const int linesAdded = LinesTotal() - prevLinesTotal;
 890                                 if (linesAdded != 0)
 891                                         multiLine = true;
 892                                 if (step == steps - 1) {
 893                                         modFlags |= SC_LASTSTEPINUNDOREDO;
 894                                         if (multiLine)
 895                                                 modFlags |= SC_MULTILINEUNDOREDO;
 896                                 }
 897                                 NotifyModified(DocModification(modFlags, action.position, action.lenData,
 898                                                                                            linesAdded, action.data));
 899                         }
 900
 901                         bool endSavePoint = cb.IsSavePoint();
 902                         if (startSavePoint != endSavePoint)
 903                                 NotifySavePoint(endSavePoint);
 904                 }
 905                 enteredModification--;
 906         }
 907         return newPos;
 908 }
 909
 910 int Document::Redo() {
 911         int newPos = -1;
 912         CheckReadOnly();
 913         if (enteredModification == 0) {
 914                 enteredModification++;
 915                 if (!cb.IsReadOnly()) {
 916                         bool startSavePoint = cb.IsSavePoint();
 917                         bool multiLine = false;
 918                         int steps = cb.StartRedo();
 919                         for (int step = 0; step < steps; step++) {
 920                                 const int prevLinesTotal = LinesTotal();
 921                                 const Action &action = cb.GetRedoStep();
 922                                 if (action.at == insertAction) {
 923                                         NotifyModified(DocModification(
 924                                                                         SC_MOD_BEFOREINSERT | SC_PERFORMED_REDO, action));
 925                                 } else if (action.at == containerAction) {
 926                                         DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_REDO);
 927                                         dm.token = action.position;
 928                                         NotifyModified(dm);
 929                                 } else {
 930                                         NotifyModified(DocModification(
 931                                                                         SC_MOD_BEFOREDELETE | SC_PERFORMED_REDO, action));
 932                                 }
 933                                 cb.PerformRedoStep();
 934                                 if (action.at != containerAction) {
 935                                         ModifiedAt(action.position);
 936                                         newPos = action.position;
 937                                 }
 938
 939                                 int modFlags = SC_PERFORMED_REDO;
 940                                 if (action.at == insertAction) {
 941                                         newPos += action.lenData;
 942                                         modFlags |= SC_MOD_INSERTTEXT;
 943                                 } else if (action.at == removeAction) {
 944                                         modFlags |= SC_MOD_DELETETEXT;
 945                                 }
 946                                 if (steps > 1)
 947                                         modFlags |= SC_MULTISTEPUNDOREDO;
 948                                 const int linesAdded = LinesTotal() - prevLinesTotal;
 949                                 if (linesAdded != 0)
 950                                         multiLine = true;
 951                                 if (step == steps - 1) {
 952                                         modFlags |= SC_LASTSTEPINUNDOREDO;
 953                                         if (multiLine)
 954                                                 modFlags |= SC_MULTILINEUNDOREDO;
 955                                 }
 956                                 NotifyModified(
 957                                         DocModification(modFlags, action.position, action.lenData,
 958                                                                         linesAdded, action.data));
 959                         }
 960
 961                         bool endSavePoint = cb.IsSavePoint();
 962                         if (startSavePoint != endSavePoint)
 963                                 NotifySavePoint(endSavePoint);
 964                 }
 965                 enteredModification--;
 966         }
 967         return newPos;
 968 }
 969
 970 /**
 971  * Insert a single character.
 972  */
 973 bool Document::InsertChar(int pos, char ch) {
 974         char chs[1];
 975         chs[0] = ch;
 976         return InsertString(pos, chs, 1);
 977 }
 978
 979 /**
 980  * Insert a null terminated string.
 981  */
 982 bool Document::InsertCString(int position, const char *s) {
 983         return InsertString(position, s, static_cast<int>(s ? strlen(s) : 0));
 984 }
 985
 986 void Document::ChangeChar(int pos, char ch) {
 987         DeleteChars(pos, 1);
 988         InsertChar(pos, ch);
 989 }
 990
 991 void Document::DelChar(int pos) {
 992         DeleteChars(pos, LenChar(pos));
 993 }
 994
 995 void Document::DelCharBack(int pos) {
 996         if (pos <= 0) {
 997                 return;
 998         } else if (IsCrLf(pos - 2)) {
 999                 DeleteChars(pos - 2, 2);
1000         } else if (dbcsCodePage) {
1001                 int startChar = NextPosition(pos, -1);
1002                 DeleteChars(startChar, pos - startChar);
1003         } else {
1004                 DeleteChars(pos - 1, 1);
1005         }
1006 }
1007
1008 static int NextTab(int pos, int tabSize) {
1009         return ((pos / tabSize) + 1) * tabSize;
1010 }
1011
1012 static std::string CreateIndentation(int indent, int tabSize, bool insertSpaces) {
1013         std::string indentation;
1014         if (!insertSpaces) {
1015                 while (indent >= tabSize) {
1016                         indentation += '\t';
1017                         indent -= tabSize;
1018                 }
1019         }
1020         while (indent > 0) {
1021                 indentation += ' ';
1022                 indent--;
1023         }
1024         return indentation;
1025 }
1026
1027 int SCI_METHOD Document::GetLineIndentation(int line) {
1028         int indent = 0;
1029         if ((line >= 0) && (line < LinesTotal())) {
1030                 int lineStart = LineStart(line);
1031                 int length = Length();
1032                 for (int i = lineStart; i < length; i++) {
1033                         char ch = cb.CharAt(i);
1034                         if (ch == ' ')
1035                                 indent++;
1036                         else if (ch == '\t')
1037                                 indent = NextTab(indent, tabInChars);
1038                         else
1039                                 return indent;
1040                 }
1041         }
1042         return indent;
1043 }
1044
1045 void Document::SetLineIndentation(int line, int indent) {
1046         int indentOfLine = GetLineIndentation(line);
1047         if (indent < 0)
1048                 indent = 0;
1049         if (indent != indentOfLine) {
1050                 std::string linebuf = CreateIndentation(indent, tabInChars, !useTabs);
1051                 int thisLineStart = LineStart(line);
1052                 int indentPos = GetLineIndentPosition(line);
1053                 UndoGroup ug(this);
1054                 DeleteChars(thisLineStart, indentPos - thisLineStart);
1055                 InsertCString(thisLineStart, linebuf.c_str());
1056         }
1057 }
1058
1059 int Document::GetLineIndentPosition(int line) const {
1060         if (line < 0)
1061                 return 0;
1062         int pos = LineStart(line);
1063         int length = Length();
1064         while ((pos < length) && IsSpaceOrTab(cb.CharAt(pos))) {
1065                 pos++;
1066         }
1067         return pos;
1068 }
1069
1070 int Document::GetColumn(int pos) {
1071         int column = 0;
1072         int line = LineFromPosition(pos);
1073         if ((line >= 0) && (line < LinesTotal())) {
1074                 for (int i = LineStart(line); i < pos;) {
1075                         char ch = cb.CharAt(i);
1076                         if (ch == '\t') {
1077                                 column = NextTab(column, tabInChars);
1078                                 i++;
1079                         } else if (ch == '\r') {
1080                                 return column;
1081                         } else if (ch == '\n') {
1082                                 return column;
1083                         } else if (i >= Length()) {
1084                                 return column;
1085                         } else {
1086                                 column++;
1087                                 i = NextPosition(i, 1);
1088                         }
1089                 }
1090         }
1091         return column;
1092 }
1093
1094 int Document::CountCharacters(int startPos, int endPos) {
1095         startPos = MovePositionOutsideChar(startPos, 1, false);
1096         endPos = MovePositionOutsideChar(endPos, -1, false);
1097         int count = 0;
1098         int i = startPos;
1099         while (i < endPos) {
1100                 count++;
1101                 if (IsCrLf(i))
1102                         i++;
1103                 i = NextPosition(i, 1);
1104         }
1105         return count;
1106 }
1107
1108 int Document::FindColumn(int line, int column) {
1109         int position = LineStart(line);
1110         if ((line >= 0) && (line < LinesTotal())) {
1111                 int columnCurrent = 0;
1112                 while ((columnCurrent < column) && (position < Length())) {
1113                         char ch = cb.CharAt(position);
1114                         if (ch == '\t') {
1115                                 columnCurrent = NextTab(columnCurrent, tabInChars);
1116                                 if (columnCurrent > column)
1117                                         return position;
1118                                 position++;
1119                         } else if (ch == '\r') {
1120                                 return position;
1121                         } else if (ch == '\n') {
1122                                 return position;
1123                         } else {
1124                                 columnCurrent++;
1125                                 position = NextPosition(position, 1);
1126                         }
1127                 }
1128         }
1129         return position;
1130 }
1131
1132 void Document::Indent(bool forwards, int lineBottom, int lineTop) {
1133         // Dedent - suck white space off the front of the line to dedent by equivalent of a tab
1134         for (int line = lineBottom; line >= lineTop; line--) {
1135                 int indentOfLine = GetLineIndentation(line);
1136                 if (forwards) {
1137                         if (LineStart(line) < LineEnd(line)) {
1138                                 SetLineIndentation(line, indentOfLine + IndentSize());
1139                         }
1140                 } else {
1141                         SetLineIndentation(line, indentOfLine - IndentSize());
1142                 }
1143         }
1144 }
1145
1146 // Convert line endings for a piece of text to a particular mode.
1147 // Stop at len or when a NUL is found.
1148 // Caller must delete the returned pointer.
1149 char *Document::TransformLineEnds(int *pLenOut, const char *s, size_t len, int eolModeWanted) {
1150         char *dest = new char[2 * len + 1];
1151         const char *sptr = s;
1152         char *dptr = dest;
1153         for (size_t i = 0; (i < len) && (*sptr != '\0'); i++) {
1154                 if (*sptr == '\n' || *sptr == '\r') {
1155                         if (eolModeWanted == SC_EOL_CR) {
1156                                 *dptr++ = '\r';
1157                         } else if (eolModeWanted == SC_EOL_LF) {
1158                                 *dptr++ = '\n';
1159                         } else { // eolModeWanted == SC_EOL_CRLF
1160                                 *dptr++ = '\r';
1161                                 *dptr++ = '\n';
1162                         }
1163                         if ((*sptr == '\r') && (i+1 < len) && (*(sptr+1) == '\n')) {
1164                                 i++;
1165                                 sptr++;
1166                         }
1167                         sptr++;
1168                 } else {
1169                         *dptr++ = *sptr++;
1170                 }
1171         }
1172         *dptr++ = '\0';
1173         *pLenOut = (dptr - dest) - 1;
1174         return dest;
1175 }
1176
1177 void Document::ConvertLineEnds(int eolModeSet) {
1178         UndoGroup ug(this);
1179
1180         for (int pos = 0; pos < Length(); pos++) {
1181                 if (cb.CharAt(pos) == '\r') {
1182                         if (cb.CharAt(pos + 1) == '\n') {
1183                                 // CRLF
1184                                 if (eolModeSet == SC_EOL_CR) {
1185                                         DeleteChars(pos + 1, 1); // Delete the LF
1186                                 } else if (eolModeSet == SC_EOL_LF) {
1187                                         DeleteChars(pos, 1); // Delete the CR
1188                                 } else {
1189                                         pos++;
1190                                 }
1191                         } else {
1192                                 // CR
1193                                 if (eolModeSet == SC_EOL_CRLF) {
1194                                         InsertString(pos + 1, "\n", 1); // Insert LF
1195                                         pos++;
1196                                 } else if (eolModeSet == SC_EOL_LF) {
1197                                         InsertString(pos, "\n", 1); // Insert LF
1198                                         DeleteChars(pos + 1, 1); // Delete CR
1199                                 }
1200                         }
1201                 } else if (cb.CharAt(pos) == '\n') {
1202                         // LF
1203                         if (eolModeSet == SC_EOL_CRLF) {
1204                                 InsertString(pos, "\r", 1); // Insert CR
1205                                 pos++;
1206                         } else if (eolModeSet == SC_EOL_CR) {
1207                                 InsertString(pos, "\r", 1); // Insert CR
1208                                 DeleteChars(pos + 1, 1); // Delete LF
1209                         }
1210                 }
1211         }
1212
1213 }
1214
1215 bool Document::IsWhiteLine(int line) const {
1216         int currentChar = LineStart(line);
1217         int endLine = LineEnd(line);
1218         while (currentChar < endLine) {
1219                 if (cb.CharAt(currentChar) != ' ' && cb.CharAt(currentChar) != '\t') {
1220                         return false;
1221                 }
1222                 ++currentChar;
1223         }
1224         return true;
1225 }
1226
1227 int Document::ParaUp(int pos) {
1228         int line = LineFromPosition(pos);
1229         line--;
1230         while (line >= 0 && IsWhiteLine(line)) { // skip empty lines
1231                 line--;
1232         }
1233         while (line >= 0 && !IsWhiteLine(line)) { // skip non-empty lines
1234                 line--;
1235         }
1236         line++;
1237         return LineStart(line);
1238 }
1239
1240 int Document::ParaDown(int pos) {
1241         int line = LineFromPosition(pos);
1242         while (line < LinesTotal() && !IsWhiteLine(line)) { // skip non-empty lines
1243                 line++;
1244         }
1245         while (line < LinesTotal() && IsWhiteLine(line)) { // skip empty lines
1246                 line++;
1247         }
1248         if (line < LinesTotal())
1249                 return LineStart(line);
1250         else // end of a document
1251                 return LineEnd(line-1);
1252 }
1253
1254 CharClassify::cc Document::WordCharClass(unsigned char ch) {
1255         if ((SC_CP_UTF8 == dbcsCodePage) && (!UTF8IsAscii(ch)))
1256                 return CharClassify::ccWord;
1257         return charClass.GetClass(ch);
1258 }
1259
1260 /**
1261  * Used by commmands that want to select whole words.
1262  * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.
1263  */
1264 int Document::ExtendWordSelect(int pos, int delta, bool onlyWordCharacters) {
1265         CharClassify::cc ccStart = CharClassify::ccWord;
1266         if (delta < 0) {
1267                 if (!onlyWordCharacters)
1268                         ccStart = WordCharClass(cb.CharAt(pos-1));
1269                 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart))
1270                         pos--;
1271         } else {
1272                 if (!onlyWordCharacters && pos < Length())
1273                         ccStart = WordCharClass(cb.CharAt(pos));
1274                 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1275                         pos++;
1276         }
1277         return MovePositionOutsideChar(pos, delta, true);
1278 }
1279
1280 /**
1281  * Find the start of the next word in either a forward (delta >= 0) or backwards direction
1282  * (delta < 0).
1283  * This is looking for a transition between character classes although there is also some
1284  * additional movement to transit white space.
1285  * Used by cursor movement by word commands.
1286  */
1287 int Document::NextWordStart(int pos, int delta) {
1288         if (delta < 0) {
1289                 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace))
1290                         pos--;
1291                 if (pos > 0) {
1292                         CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1293                         while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart)) {
1294                                 pos--;
1295                         }
1296                 }
1297         } else {
1298                 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1299                 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1300                         pos++;
1301                 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace))
1302                         pos++;
1303         }
1304         return pos;
1305 }
1306
1307 /**
1308  * Find the end of the next word in either a forward (delta >= 0) or backwards direction
1309  * (delta < 0).
1310  * This is looking for a transition between character classes although there is also some
1311  * additional movement to transit white space.
1312  * Used by cursor movement by word commands.
1313  */
1314 int Document::NextWordEnd(int pos, int delta) {
1315         if (delta < 0) {
1316                 if (pos > 0) {
1317                         CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1318                         if (ccStart != CharClassify::ccSpace) {
1319                                 while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == ccStart) {
1320                                         pos--;
1321                                 }
1322                         }
1323                         while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace) {
1324                                 pos--;
1325                         }
1326                 }
1327         } else {
1328                 while (pos < Length() && WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace) {
1329                         pos++;
1330                 }
1331                 if (pos < Length()) {
1332                         CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1333                         while (pos < Length() && WordCharClass(cb.CharAt(pos)) == ccStart) {
1334                                 pos++;
1335                         }
1336                 }
1337         }
1338         return pos;
1339 }
1340
1341 /**
1342  * Check that the character at the given position is a word or punctuation character and that
1343  * the previous character is of a different character class.
1344  */
1345 bool Document::IsWordStartAt(int pos) {
1346         if (pos > 0) {
1347                 CharClassify::cc ccPos = WordCharClass(CharAt(pos));
1348                 return (ccPos == CharClassify::ccWord || ccPos == CharClassify::ccPunctuation) &&
1349                         (ccPos != WordCharClass(CharAt(pos - 1)));
1350         }
1351         return true;
1352 }
1353
1354 /**
1355  * Check that the character at the given position is a word or punctuation character and that
1356  * the next character is of a different character class.
1357  */
1358 bool Document::IsWordEndAt(int pos) {
1359         if (pos < Length()) {
1360                 CharClassify::cc ccPrev = WordCharClass(CharAt(pos-1));
1361                 return (ccPrev == CharClassify::ccWord || ccPrev == CharClassify::ccPunctuation) &&
1362                         (ccPrev != WordCharClass(CharAt(pos)));
1363         }
1364         return true;
1365 }
1366
1367 /**
1368  * Check that the given range is has transitions between character classes at both
1369  * ends and where the characters on the inside are word or punctuation characters.
1370  */
1371 bool Document::IsWordAt(int start, int end) {
1372         return IsWordStartAt(start) && IsWordEndAt(end);
1373 }
1374
1375 static inline char MakeLowerCase(char ch) {
1376         if (ch < 'A' || ch > 'Z')
1377                 return ch;
1378         else
1379                 return static_cast<char>(ch - 'A' + 'a');
1380 }
1381
1382 CaseFolderTable::CaseFolderTable() {
1383         for (size_t iChar=0; iChar<sizeof(mapping); iChar++) {
1384                 mapping[iChar] = static_cast<char>(iChar);
1385         }
1386 }
1387
1388 CaseFolderTable::~CaseFolderTable() {
1389 }
1390
1391 size_t CaseFolderTable::Fold(char *folded, size_t sizeFolded, const char *mixed, size_t lenMixed) {
1392         if (lenMixed > sizeFolded) {
1393                 return 0;
1394         } else {
1395                 for (size_t i=0; i<lenMixed; i++) {
1396                         folded[i] = mapping[static_cast<unsigned char>(mixed[i])];
1397                 }
1398                 return lenMixed;
1399         }
1400 }
1401
1402 void CaseFolderTable::SetTranslation(char ch, char chTranslation) {
1403         mapping[static_cast<unsigned char>(ch)] = chTranslation;
1404 }
1405
1406 void CaseFolderTable::StandardASCII() {
1407         for (size_t iChar=0; iChar<sizeof(mapping); iChar++) {
1408                 if (iChar >= 'A' && iChar <= 'Z') {
1409                         mapping[iChar] = static_cast<char>(iChar - 'A' + 'a');
1410                 } else {
1411                         mapping[iChar] = static_cast<char>(iChar);
1412                 }
1413         }
1414 }
1415
1416 bool Document::MatchesWordOptions(bool word, bool wordStart, int pos, int length) {
1417         return (!word && !wordStart) ||
1418                         (word && IsWordAt(pos, pos + length)) ||
1419                         (wordStart && IsWordStartAt(pos));
1420 }
1421
1422 /**
1423  * Find text in document, supporting both forward and backward
1424  * searches (just pass minPos > maxPos to do a backward search)
1425  * Has not been tested with backwards DBCS searches yet.
1426  */
1427 long Document::FindText(int minPos, int maxPos, const char *search,
1428                         bool caseSensitive, bool word, bool wordStart, bool regExp, int flags,
1429                         int *length, CaseFolder *pcf) {
1430         if (*length <= 0)
1431                 return minPos;
1432         if (regExp) {
1433                 if (!regex)
1434                         regex = CreateRegexSearch(&charClass);
1435                 return regex->FindText(this, minPos, maxPos, search, caseSensitive, word, wordStart, flags, length);
1436         } else {
1437
1438                 const bool forward = minPos <= maxPos;
1439                 const int increment = forward ? 1 : -1;
1440
1441                 // Range endpoints should not be inside DBCS characters, but just in case, move them.
1442                 const int startPos = MovePositionOutsideChar(minPos, increment, false);
1443                 const int endPos = MovePositionOutsideChar(maxPos, increment, false);
1444
1445                 // Compute actual search ranges needed
1446                 const int lengthFind = *length;
1447
1448                 //Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind);
1449                 const int limitPos = Platform::Maximum(startPos, endPos);
1450                 int pos = startPos;
1451                 if (!forward) {
1452                         // Back all of a character
1453                         pos = NextPosition(pos, increment);
1454                 }
1455                 if (caseSensitive) {
1456                         const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1457                         const char charStartSearch =  search[0];
1458                         while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1459                                 if (CharAt(pos) == charStartSearch) {
1460                                         bool found = (pos + lengthFind) <= limitPos;
1461                                         for (int indexSearch = 1; (indexSearch < lengthFind) && found; indexSearch++) {
1462                                                 found = CharAt(pos + indexSearch) == search[indexSearch];
1463                                         }
1464                                         if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1465                                                 return pos;
1466                                         }
1467                                 }
1468                                 if (!NextCharacter(pos, increment))
1469                                         break;
1470                         }
1471                 } else if (SC_CP_UTF8 == dbcsCodePage) {
1472                         const size_t maxFoldingExpansion = 4;
1473                         std::vector<char> searchThing(lengthFind * UTF8MaxBytes * maxFoldingExpansion + 1);
1474                         const int lenSearch = static_cast<int>(
1475                                 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1476                         char bytes[UTF8MaxBytes + 1];
1477                         char folded[UTF8MaxBytes * maxFoldingExpansion + 1];
1478                         while (forward ? (pos < endPos) : (pos >= endPos)) {
1479                                 int widthFirstCharacter = 0;
1480                                 int posIndexDocument = pos;
1481                                 int indexSearch = 0;
1482                                 bool characterMatches = true;
1483                                 for (;;) {
1484                                         const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(posIndexDocument));
1485                                         bytes[0] = leadByte;
1486                                         int widthChar = 1;
1487                                         if (!UTF8IsAscii(leadByte)) {
1488                                                 const int widthCharBytes = UTF8BytesOfLead[leadByte];
1489                                                 for (int b=1; b<widthCharBytes; b++) {
1490                                                         bytes[b] = cb.CharAt(posIndexDocument+b);
1491                                                 }
1492                                                 widthChar = UTF8Classify(reinterpret_cast<const unsigned char *>(bytes), widthCharBytes) & UTF8MaskWidth;
1493                                         }
1494                                         if (!widthFirstCharacter)
1495                                                 widthFirstCharacter = widthChar;
1496                                         if ((posIndexDocument + widthChar) > limitPos)
1497                                                 break;
1498                                         const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1499                                         folded[lenFlat] = 0;
1500                                         // Does folded match the buffer
1501                                         characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1502                                         if (!characterMatches)
1503                                                 break;
1504                                         posIndexDocument += widthChar;
1505                                         indexSearch += lenFlat;
1506                                         if (indexSearch >= lenSearch)
1507                                                 break;
1508                                 }
1509                                 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1510                                         if (MatchesWordOptions(word, wordStart, pos, posIndexDocument - pos)) {
1511                                                 *length = posIndexDocument - pos;
1512                                                 return pos;
1513                                         }
1514                                 }
1515                                 if (forward) {
1516                                         pos += widthFirstCharacter;
1517                                 } else {
1518                                         if (!NextCharacter(pos, increment))
1519                                                 break;
1520                                 }
1521                         }
1522                 } else if (dbcsCodePage) {
1523                         const size_t maxBytesCharacter = 2;
1524                         const size_t maxFoldingExpansion = 4;
1525                         std::vector<char> searchThing(lengthFind * maxBytesCharacter * maxFoldingExpansion + 1);
1526                         const int lenSearch = static_cast<int>(
1527                                 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1528                         while (forward ? (pos < endPos) : (pos >= endPos)) {
1529                                 int indexDocument = 0;
1530                                 int indexSearch = 0;
1531                                 bool characterMatches = true;
1532                                 while (characterMatches &&
1533                                         ((pos + indexDocument) < limitPos) &&
1534                                         (indexSearch < lenSearch)) {
1535                                         char bytes[maxBytesCharacter + 1];
1536                                         bytes[0] = cb.CharAt(pos + indexDocument);
1537                                         const int widthChar = IsDBCSLeadByte(bytes[0]) ? 2 : 1;
1538                                         if (widthChar == 2)
1539                                                 bytes[1] = cb.CharAt(pos + indexDocument + 1);
1540                                         if ((pos + indexDocument + widthChar) > limitPos)
1541                                                 break;
1542                                         char folded[maxBytesCharacter * maxFoldingExpansion + 1];
1543                                         const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1544                                         folded[lenFlat] = 0;
1545                                         // Does folded match the buffer
1546                                         characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1547                                         indexDocument += widthChar;
1548                                         indexSearch += lenFlat;
1549                                 }
1550                                 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1551                                         if (MatchesWordOptions(word, wordStart, pos, indexDocument)) {
1552                                                 *length = indexDocument;
1553                                                 return pos;
1554                                         }
1555                                 }
1556                                 if (!NextCharacter(pos, increment))
1557                                         break;
1558                         }
1559                 } else {
1560                         const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1561                         std::vector<char> searchThing(lengthFind + 1);
1562                         pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
1563                         while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1564                                 bool found = (pos + lengthFind) <= limitPos;
1565                                 for (int indexSearch = 0; (indexSearch < lengthFind) && found; indexSearch++) {
1566                                         char ch = CharAt(pos + indexSearch);
1567                                         char folded[2];
1568                                         pcf->Fold(folded, sizeof(folded), &ch, 1);
1569                                         found = folded[0] == searchThing[indexSearch];
1570                                 }
1571                                 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1572                                         return pos;
1573                                 }
1574                                 if (!NextCharacter(pos, increment))
1575                                         break;
1576                         }
1577                 }
1578         }
1579         //Platform::DebugPrintf("Not found\n");
1580         return -1;
1581 }
1582
1583 const char *Document::SubstituteByPosition(const char *text, int *length) {
1584         if (regex)
1585                 return regex->SubstituteByPosition(this, text, length);
1586         else
1587                 return 0;
1588 }
1589
1590 int Document::LinesTotal() const {
1591         return cb.Lines();
1592 }
1593
1594 void Document::ChangeCase(Range r, bool makeUpperCase) {
1595         for (int pos = r.start; pos < r.end;) {
1596                 int len = LenChar(pos);
1597                 if (len == 1) {
1598                         char ch = CharAt(pos);
1599                         if (makeUpperCase) {
1600                                 if (IsLowerCase(ch)) {
1601                                         ChangeChar(pos, static_cast<char>(MakeUpperCase(ch)));
1602                                 }
1603                         } else {
1604                                 if (IsUpperCase(ch)) {
1605                                         ChangeChar(pos, static_cast<char>(MakeLowerCase(ch)));
1606                                 }
1607                         }
1608                 }
1609                 pos += len;
1610         }
1611 }
1612
1613 void Document::SetDefaultCharClasses(bool includeWordClass) {
1614     charClass.SetDefaultCharClasses(includeWordClass);
1615 }
1616
1617 void Document::SetCharClasses(const unsigned char *chars, CharClassify::cc newCharClass) {
1618     charClass.SetCharClasses(chars, newCharClass);
1619 }
1620
1621 int Document::GetCharsOfClass(CharClassify::cc characterClass, unsigned char *buffer) {
1622     return charClass.GetCharsOfClass(characterClass, buffer);
1623 }
1624
1625 void Document::SetStylingBits(int bits) {
1626         stylingBits = bits;
1627         stylingBitsMask = (1 << stylingBits) - 1;
1628 }
1629
1630 void SCI_METHOD Document::StartStyling(int position, char mask) {
1631         stylingMask = mask;
1632         endStyled = position;
1633 }
1634
1635 bool SCI_METHOD Document::SetStyleFor(int length, char style) {
1636         if (enteredStyling != 0) {
1637                 return false;
1638         } else {
1639                 enteredStyling++;
1640                 style &= stylingMask;
1641                 int prevEndStyled = endStyled;
1642                 if (cb.SetStyleFor(endStyled, length, style, stylingMask)) {
1643                         DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1644                                            prevEndStyled, length);
1645                         NotifyModified(mh);
1646                 }
1647                 endStyled += length;
1648                 enteredStyling--;
1649                 return true;
1650         }
1651 }
1652
1653 bool SCI_METHOD Document::SetStyles(int length, const char *styles) {
1654         if (enteredStyling != 0) {
1655                 return false;
1656         } else {
1657                 enteredStyling++;
1658                 bool didChange = false;
1659                 int startMod = 0;
1660                 int endMod = 0;
1661                 for (int iPos = 0; iPos < length; iPos++, endStyled++) {
1662                         PLATFORM_ASSERT(endStyled < Length());
1663                         if (cb.SetStyleAt(endStyled, styles[iPos], stylingMask)) {
1664                                 if (!didChange) {
1665                                         startMod = endStyled;
1666                                 }
1667                                 didChange = true;
1668                                 endMod = endStyled;
1669                         }
1670                 }
1671                 if (didChange) {
1672                         DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1673                                            startMod, endMod - startMod + 1);
1674                         NotifyModified(mh);
1675                 }
1676                 enteredStyling--;
1677                 return true;
1678         }
1679 }
1680
1681 void Document::EnsureStyledTo(int pos) {
1682         if ((enteredStyling == 0) && (pos > GetEndStyled())) {
1683                 IncrementStyleClock();
1684                 if (pli && !pli->UseContainerLexing()) {
1685                         int lineEndStyled = LineFromPosition(GetEndStyled());
1686                         int endStyledTo = LineStart(lineEndStyled);
1687                         pli->Colourise(endStyledTo, pos);
1688                 } else {
1689                         // Ask the watchers to style, and stop as soon as one responds.
1690                         for (int i = 0; pos > GetEndStyled() && i < lenWatchers; i++) {
1691                                 watchers[i].watcher->NotifyStyleNeeded(this, watchers[i].userData, pos);
1692                         }
1693                 }
1694         }
1695 }
1696
1697 void Document::LexerChanged() {
1698         // Tell the watchers the lexer has changed.
1699         for (int i = 0; i < lenWatchers; i++) {
1700                 watchers[i].watcher->NotifyLexerChanged(this, watchers[i].userData);
1701         }
1702 }
1703
1704 int SCI_METHOD Document::SetLineState(int line, int state) {
1705         int statePrevious = static_cast<LineState *>(perLineData[ldState])->SetLineState(line, state);
1706         if (state != statePrevious) {
1707                 DocModification mh(SC_MOD_CHANGELINESTATE, LineStart(line), 0, 0, 0, line);
1708                 NotifyModified(mh);
1709         }
1710         return statePrevious;
1711 }
1712
1713 int SCI_METHOD Document::GetLineState(int line) const {
1714         return static_cast<LineState *>(perLineData[ldState])->GetLineState(line);
1715 }
1716
1717 int Document::GetMaxLineState() {
1718         return static_cast<LineState *>(perLineData[ldState])->GetMaxLineState();
1719 }
1720
1721 void SCI_METHOD Document::ChangeLexerState(int start, int end) {
1722         DocModification mh(SC_MOD_LEXERSTATE, start, end-start, 0, 0, 0);
1723         NotifyModified(mh);
1724 }
1725
1726 StyledText Document::MarginStyledText(int line) {
1727         LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldMargin]);
1728         return StyledText(pla->Length(line), pla->Text(line),
1729                 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1730 }
1731
1732 void Document::MarginSetText(int line, const char *text) {
1733         static_cast<LineAnnotation *>(perLineData[ldMargin])->SetText(line, text);
1734         DocModification mh(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line);
1735         NotifyModified(mh);
1736 }
1737
1738 void Document::MarginSetStyle(int line, int style) {
1739         static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyle(line, style);
1740         NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
1741 }
1742
1743 void Document::MarginSetStyles(int line, const unsigned char *styles) {
1744         static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyles(line, styles);
1745         NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
1746 }
1747
1748 int Document::MarginLength(int line) const {
1749         return static_cast<LineAnnotation *>(perLineData[ldMargin])->Length(line);
1750 }
1751
1752 void Document::MarginClearAll() {
1753         int maxEditorLine = LinesTotal();
1754         for (int l=0; l<maxEditorLine; l++)
1755                 MarginSetText(l, 0);
1756         // Free remaining data
1757         static_cast<LineAnnotation *>(perLineData[ldMargin])->ClearAll();
1758 }
1759
1760 bool Document::AnnotationAny() const {
1761         return static_cast<LineAnnotation *>(perLineData[ldAnnotation])->AnySet();
1762 }
1763
1764 StyledText Document::AnnotationStyledText(int line) {
1765         LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldAnnotation]);
1766         return StyledText(pla->Length(line), pla->Text(line),
1767                 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1768 }
1769
1770 void Document::AnnotationSetText(int line, const char *text) {
1771         if (line >= 0 && line < LinesTotal()) {
1772                 const int linesBefore = AnnotationLines(line);
1773                 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetText(line, text);
1774                 const int linesAfter = AnnotationLines(line);
1775                 DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1776                 mh.annotationLinesAdded = linesAfter - linesBefore;
1777                 NotifyModified(mh);
1778         }
1779 }
1780
1781 void Document::AnnotationSetStyle(int line, int style) {
1782         static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyle(line, style);
1783         DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1784         NotifyModified(mh);
1785 }
1786
1787 void Document::AnnotationSetStyles(int line, const unsigned char *styles) {
1788         if (line >= 0 && line < LinesTotal()) {
1789                 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyles(line, styles);
1790         }
1791 }
1792
1793 int Document::AnnotationLength(int line) const {
1794         return static_cast<LineAnnotation *>(perLineData[ldAnnotation])->Length(line);
1795 }
1796
1797 int Document::AnnotationLines(int line) const {
1798         return static_cast<LineAnnotation *>(perLineData[ldAnnotation])->Lines(line);
1799 }
1800
1801 void Document::AnnotationClearAll() {
1802         int maxEditorLine = LinesTotal();
1803         for (int l=0; l<maxEditorLine; l++)
1804                 AnnotationSetText(l, 0);
1805         // Free remaining data
1806         static_cast<LineAnnotation *>(perLineData[ldAnnotation])->ClearAll();
1807 }
1808
1809 void Document::IncrementStyleClock() {
1810         styleClock = (styleClock + 1) % 0x100000;
1811 }
1812
1813 void SCI_METHOD Document::DecorationFillRange(int position, int value, int fillLength) {
1814         if (decorations.FillRange(position, value, fillLength)) {
1815                 DocModification mh(SC_MOD_CHANGEINDICATOR | SC_PERFORMED_USER,
1816                                                         position, fillLength);
1817                 NotifyModified(mh);
1818         }
1819 }
1820
1821 bool Document::AddWatcher(DocWatcher *watcher, void *userData) {
1822         for (int i = 0; i < lenWatchers; i++) {
1823                 if ((watchers[i].watcher == watcher) &&
1824                         (watchers[i].userData == userData))
1825                         return false;
1826         }
1827         WatcherWithUserData *pwNew = new WatcherWithUserData[lenWatchers + 1];
1828         for (int j = 0; j < lenWatchers; j++)
1829                 pwNew[j] = watchers[j];
1830         pwNew[lenWatchers].watcher = watcher;
1831         pwNew[lenWatchers].userData = userData;
1832         delete []watchers;
1833         watchers = pwNew;
1834         lenWatchers++;
1835         return true;
1836 }
1837
1838 bool Document::RemoveWatcher(DocWatcher *watcher, void *userData) {
1839         for (int i = 0; i < lenWatchers; i++) {
1840                 if ((watchers[i].watcher == watcher) &&
1841                         (watchers[i].userData == userData)) {
1842                         if (lenWatchers == 1) {
1843                                 delete []watchers;
1844                                 watchers = 0;
1845                                 lenWatchers = 0;
1846                         } else {
1847                                 WatcherWithUserData *pwNew = new WatcherWithUserData[lenWatchers];
1848                                 for (int j = 0; j < lenWatchers - 1; j++) {
1849                                         pwNew[j] = (j < i) ? watchers[j] : watchers[j + 1];
1850                                 }
1851                                 delete []watchers;
1852                                 watchers = pwNew;
1853                                 lenWatchers--;
1854                         }
1855                         return true;
1856                 }
1857         }
1858         return false;
1859 }
1860
1861 void Document::NotifyModifyAttempt() {
1862         for (int i = 0; i < lenWatchers; i++) {
1863                 watchers[i].watcher->NotifyModifyAttempt(this, watchers[i].userData);
1864         }
1865 }
1866
1867 void Document::NotifySavePoint(bool atSavePoint) {
1868         for (int i = 0; i < lenWatchers; i++) {
1869                 watchers[i].watcher->NotifySavePoint(this, watchers[i].userData, atSavePoint);
1870         }
1871 }
1872
1873 void Document::NotifyModified(DocModification mh) {
1874         if (mh.modificationType & SC_MOD_INSERTTEXT) {
1875                 decorations.InsertSpace(mh.position, mh.length);
1876         } else if (mh.modificationType & SC_MOD_DELETETEXT) {
1877                 decorations.DeleteRange(mh.position, mh.length);
1878         }
1879         for (int i = 0; i < lenWatchers; i++) {
1880                 watchers[i].watcher->NotifyModified(this, mh, watchers[i].userData);
1881         }
1882 }
1883
1884 bool Document::IsWordPartSeparator(char ch) {
1885         return (WordCharClass(ch) == CharClassify::ccWord) && IsPunctuation(ch);
1886 }
1887
1888 int Document::WordPartLeft(int pos) {
1889         if (pos > 0) {
1890                 --pos;
1891                 char startChar = cb.CharAt(pos);
1892                 if (IsWordPartSeparator(startChar)) {
1893                         while (pos > 0 && IsWordPartSeparator(cb.CharAt(pos))) {
1894                                 --pos;
1895                         }
1896                 }
1897                 if (pos > 0) {
1898                         startChar = cb.CharAt(pos);
1899                         --pos;
1900                         if (IsLowerCase(startChar)) {
1901                                 while (pos > 0 && IsLowerCase(cb.CharAt(pos)))
1902                                         --pos;
1903                                 if (!IsUpperCase(cb.CharAt(pos)) && !IsLowerCase(cb.CharAt(pos)))
1904                                         ++pos;
1905                         } else if (IsUpperCase(startChar)) {
1906                                 while (pos > 0 && IsUpperCase(cb.CharAt(pos)))
1907                                         --pos;
1908                                 if (!IsUpperCase(cb.CharAt(pos)))
1909                                         ++pos;
1910                         } else if (IsADigit(startChar)) {
1911                                 while (pos > 0 && IsADigit(cb.CharAt(pos)))
1912                                         --pos;
1913                                 if (!IsADigit(cb.CharAt(pos)))
1914                                         ++pos;
1915                         } else if (IsPunctuation(startChar)) {
1916                                 while (pos > 0 && IsPunctuation(cb.CharAt(pos)))
1917                                         --pos;
1918                                 if (!IsPunctuation(cb.CharAt(pos)))
1919                                         ++pos;
1920                         } else if (isspacechar(startChar)) {
1921                                 while (pos > 0 && isspacechar(cb.CharAt(pos)))
1922                                         --pos;
1923                                 if (!isspacechar(cb.CharAt(pos)))
1924                                         ++pos;
1925                         } else if (!isascii(startChar)) {
1926                                 while (pos > 0 && !isascii(cb.CharAt(pos)))
1927                                         --pos;
1928                                 if (isascii(cb.CharAt(pos)))
1929                                         ++pos;
1930                         } else {
1931                                 ++pos;
1932                         }
1933                 }
1934         }
1935         return pos;
1936 }
1937
1938 int Document::WordPartRight(int pos) {
1939         char startChar = cb.CharAt(pos);
1940         int length = Length();
1941         if (IsWordPartSeparator(startChar)) {
1942                 while (pos < length && IsWordPartSeparator(cb.CharAt(pos)))
1943                         ++pos;
1944                 startChar = cb.CharAt(pos);
1945         }
1946         if (!isascii(startChar)) {
1947                 while (pos < length && !isascii(cb.CharAt(pos)))
1948                         ++pos;
1949         } else if (IsLowerCase(startChar)) {
1950                 while (pos < length && IsLowerCase(cb.CharAt(pos)))
1951                         ++pos;
1952         } else if (IsUpperCase(startChar)) {
1953                 if (IsLowerCase(cb.CharAt(pos + 1))) {
1954                         ++pos;
1955                         while (pos < length && IsLowerCase(cb.CharAt(pos)))
1956                                 ++pos;
1957                 } else {
1958                         while (pos < length && IsUpperCase(cb.CharAt(pos)))
1959                                 ++pos;
1960                 }
1961                 if (IsLowerCase(cb.CharAt(pos)) && IsUpperCase(cb.CharAt(pos - 1)))
1962                         --pos;
1963         } else if (IsADigit(startChar)) {
1964                 while (pos < length && IsADigit(cb.CharAt(pos)))
1965                         ++pos;
1966         } else if (IsPunctuation(startChar)) {
1967                 while (pos < length && IsPunctuation(cb.CharAt(pos)))
1968                         ++pos;
1969         } else if (isspacechar(startChar)) {
1970                 while (pos < length && isspacechar(cb.CharAt(pos)))
1971                         ++pos;
1972         } else {
1973                 ++pos;
1974         }
1975         return pos;
1976 }
1977
1978 bool IsLineEndChar(char c) {
1979         return (c == '\n' || c == '\r');
1980 }
1981
1982 int Document::ExtendStyleRange(int pos, int delta, bool singleLine) {
1983         int sStart = cb.StyleAt(pos);
1984         if (delta < 0) {
1985                 while (pos > 0 && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
1986                         pos--;
1987                 pos++;
1988         } else {
1989                 while (pos < (Length()) && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
1990                         pos++;
1991         }
1992         return pos;
1993 }
1994
1995 static char BraceOpposite(char ch) {
1996         switch (ch) {
1997         case '(':
1998                 return ')';
1999         case ')':
2000                 return '(';
2001         case '[':
2002                 return ']';
2003         case ']':
2004                 return '[';
2005         case '{':
2006                 return '}';
2007         case '}':
2008                 return '{';
2009         case '<':
2010                 return '>';
2011         case '>':
2012                 return '<';
2013         default:
2014                 return '\0';
2015         }
2016 }
2017
2018 // TODO: should be able to extend styled region to find matching brace
2019 int Document::BraceMatch(int position, int /*maxReStyle*/) {
2020         char chBrace = CharAt(position);
2021         char chSeek = BraceOpposite(chBrace);
2022         if (chSeek == '\0')
2023                 return - 1;
2024         char styBrace = static_cast<char>(StyleAt(position) & stylingBitsMask);
2025         int direction = -1;
2026         if (chBrace == '(' || chBrace == '[' || chBrace == '{' || chBrace == '<')
2027                 direction = 1;
2028         int depth = 1;
2029         position = NextPosition(position, direction);
2030         while ((position >= 0) && (position < Length())) {
2031                 char chAtPos = CharAt(position);
2032                 char styAtPos = static_cast<char>(StyleAt(position) & stylingBitsMask);
2033                 if ((position > GetEndStyled()) || (styAtPos == styBrace)) {
2034                         if (chAtPos == chBrace)
2035                                 depth++;
2036                         if (chAtPos == chSeek)
2037                                 depth--;
2038                         if (depth == 0)
2039                                 return position;
2040                 }
2041                 int positionBeforeMove = position;
2042                 position = NextPosition(position, direction);
2043                 if (position == positionBeforeMove)
2044                         break;
2045         }
2046         return - 1;
2047 }
2048
2049 /**
2050  * Implementation of RegexSearchBase for the default built-in regular expression engine
2051  */
2052 class BuiltinRegex : public RegexSearchBase {
2053 public:
2054         BuiltinRegex(CharClassify *charClassTable) : search(charClassTable), substituted(NULL) {}
2055
2056         virtual ~BuiltinRegex() {
2057                 delete substituted;
2058         }
2059
2060         virtual long FindText(Document *doc, int minPos, int maxPos, const char *s,
2061                         bool caseSensitive, bool word, bool wordStart, int flags,
2062                         int *length);
2063
2064         virtual const char *SubstituteByPosition(Document *doc, const char *text, int *length);
2065
2066 private:
2067         RESearch search;
2068         char *substituted;
2069 };
2070
2071 // Define a way for the Regular Expression code to access the document
2072 class DocumentIndexer : public CharacterIndexer {
2073         Document *pdoc;
2074         int end;
2075 public:
2076         DocumentIndexer(Document *pdoc_, int end_) :
2077                 pdoc(pdoc_), end(end_) {
2078         }
2079
2080         virtual ~DocumentIndexer() {
2081         }
2082
2083         virtual char CharAt(int index) {
2084                 if (index < 0 || index >= end)
2085                         return 0;
2086                 else
2087                         return pdoc->CharAt(index);
2088         }
2089 };
2090
2091 long BuiltinRegex::FindText(Document *doc, int minPos, int maxPos, const char *s,
2092                         bool caseSensitive, bool, bool, int flags,
2093                         int *length) {
2094         bool posix = (flags & SCFIND_POSIX) != 0;
2095         int increment = (minPos <= maxPos) ? 1 : -1;
2096
2097         int startPos = minPos;
2098         int endPos = maxPos;
2099
2100         // Range endpoints should not be inside DBCS characters, but just in case, move them.
2101         startPos = doc->MovePositionOutsideChar(startPos, 1, false);
2102         endPos = doc->MovePositionOutsideChar(endPos, 1, false);
2103
2104         const char *errmsg = search.Compile(s, *length, caseSensitive, posix);
2105         if (errmsg) {
2106                 return -1;
2107         }
2108         // Find a variable in a property file: \$(\([A-Za-z0-9_.]+\))
2109         // Replace first '.' with '-' in each property file variable reference:
2110         //     Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\))
2111         //     Replace: $(\1-\2)
2112         int lineRangeStart = doc->LineFromPosition(startPos);
2113         int lineRangeEnd = doc->LineFromPosition(endPos);
2114         if ((increment == 1) &&
2115                 (startPos >= doc->LineEnd(lineRangeStart)) &&
2116                 (lineRangeStart < lineRangeEnd)) {
2117                 // the start position is at end of line or between line end characters.
2118                 lineRangeStart++;
2119                 startPos = doc->LineStart(lineRangeStart);
2120         } else if ((increment == -1) &&
2121                    (startPos <= doc->LineStart(lineRangeStart)) &&
2122                    (lineRangeStart > lineRangeEnd)) {
2123                 // the start position is at beginning of line.
2124                 lineRangeStart--;
2125                 startPos = doc->LineEnd(lineRangeStart);
2126         }
2127         int pos = -1;
2128         int lenRet = 0;
2129         char searchEnd = s[*length - 1];
2130         char searchEndPrev = (*length > 1) ? s[*length - 2] : '\0';
2131         int lineRangeBreak = lineRangeEnd + increment;
2132         for (int line = lineRangeStart; line != lineRangeBreak; line += increment) {
2133                 int startOfLine = doc->LineStart(line);
2134                 int endOfLine = doc->LineEnd(line);
2135                 if (increment == 1) {
2136                         if (line == lineRangeStart) {
2137                                 if ((startPos != startOfLine) && (s[0] == '^'))
2138                                         continue;       // Can't match start of line if start position after start of line
2139                                 startOfLine = startPos;
2140                         }
2141                         if (line == lineRangeEnd) {
2142                                 if ((endPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2143                                         continue;       // Can't match end of line if end position before end of line
2144                                 endOfLine = endPos;
2145                         }
2146                 } else {
2147                         if (line == lineRangeEnd) {
2148                                 if ((endPos != startOfLine) && (s[0] == '^'))
2149                                         continue;       // Can't match start of line if end position after start of line
2150                                 startOfLine = endPos;
2151                         }
2152                         if (line == lineRangeStart) {
2153                                 if ((startPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2154                                         continue;       // Can't match end of line if start position before end of line
2155                                 endOfLine = startPos;
2156                         }
2157                 }
2158
2159                 DocumentIndexer di(doc, endOfLine);
2160                 int success = search.Execute(di, startOfLine, endOfLine);
2161                 if (success) {
2162                         pos = search.bopat[0];
2163                         lenRet = search.eopat[0] - search.bopat[0];
2164                         // There can be only one start of a line, so no need to look for last match in line
2165                         if ((increment == -1) && (s[0] != '^')) {
2166                                 // Check for the last match on this line.
2167                                 int repetitions = 1000; // Break out of infinite loop
2168                                 while (success && (search.eopat[0] <= endOfLine) && (repetitions--)) {
2169                                         success = search.Execute(di, pos+1, endOfLine);
2170                                         if (success) {
2171                                                 if (search.eopat[0] <= minPos) {
2172                                                         pos = search.bopat[0];
2173                                                         lenRet = search.eopat[0] - search.bopat[0];
2174                                                 } else {
2175                                                         success = 0;
2176                                                 }
2177                                         }
2178                                 }
2179                         }
2180                         break;
2181                 }
2182         }
2183         *length = lenRet;
2184         return pos;
2185 }
2186
2187 const char *BuiltinRegex::SubstituteByPosition(Document *doc, const char *text, int *length) {
2188         delete []substituted;
2189         substituted = 0;
2190         DocumentIndexer di(doc, doc->Length());
2191         if (!search.GrabMatches(di))
2192                 return 0;
2193         unsigned int lenResult = 0;
2194         for (int i = 0; i < *length; i++) {
2195                 if (text[i] == '\\') {
2196                         if (text[i + 1] >= '0' && text[i + 1] <= '9') {
2197                                 unsigned int patNum = text[i + 1] - '0';
2198                                 lenResult += search.eopat[patNum] - search.bopat[patNum];
2199                                 i++;
2200                         } else {
2201                                 switch (text[i + 1]) {
2202                                 case 'a':
2203                                 case 'b':
2204                                 case 'f':
2205                                 case 'n':
2206                                 case 'r':
2207                                 case 't':
2208                                 case 'v':
2209                                 case '\\':
2210                                         i++;
2211                                 }
2212                                 lenResult++;
2213                         }
2214                 } else {
2215                         lenResult++;
2216                 }
2217         }
2218         substituted = new char[lenResult + 1];
2219         char *o = substituted;
2220         for (int j = 0; j < *length; j++) {
2221                 if (text[j] == '\\') {
2222                         if (text[j + 1] >= '0' && text[j + 1] <= '9') {
2223                                 unsigned int patNum = text[j + 1] - '0';
2224                                 unsigned int len = search.eopat[patNum] - search.bopat[patNum];
2225                                 if (search.pat[patNum]) // Will be null if try for a match that did not occur
2226                                         memcpy(o, search.pat[patNum], len);
2227                                 o += len;
2228                                 j++;
2229                         } else {
2230                                 j++;
2231                                 switch (text[j]) {
2232                                 case 'a':
2233                                         *o++ = '\a';
2234                                         break;
2235                                 case 'b':
2236                                         *o++ = '\b';
2237                                         break;
2238                                 case 'f':
2239                                         *o++ = '\f';
2240                                         break;
2241                                 case 'n':
2242                                         *o++ = '\n';
2243                                         break;
2244                                 case 'r':
2245                                         *o++ = '\r';
2246                                         break;
2247                                 case 't':
2248                                         *o++ = '\t';
2249                                         break;
2250                                 case 'v':
2251                                         *o++ = '\v';
2252                                         break;
2253                                 case '\\':
2254                                         *o++ = '\\';
2255                                         break;
2256                                 default:
2257                                         *o++ = '\\';
2258                                         j--;
2259                                 }
2260                         }
2261                 } else {
2262                         *o++ = text[j];
2263                 }
2264         }
2265         *o = '\0';
2266         *length = lenResult;
2267         return substituted;
2268 }
2269
2270 #ifndef SCI_OWNREGEX
2271
2272 #ifdef SCI_NAMESPACE
2273
2274 RegexSearchBase *Scintilla::CreateRegexSearch(CharClassify *charClassTable) {
2275         return new BuiltinRegex(charClassTable);
2276 }
2277
2278 #else
2279
2280 RegexSearchBase *CreateRegexSearch(CharClassify *charClassTable) {
2281         return new BuiltinRegex(charClassTable);
2282 }
2283
2284 #endif
2285
2286 #endif