]> git.saurik.com Git - wxWidgets.git/blob - contrib/src/stc/scintilla/src/PosRegExp.cxx
Moved from Scintilla version 1.25 to 1.32
[wxWidgets.git] / contrib / src / stc / scintilla / src / PosRegExp.cxx
1 #include <string.h>
2 #include <stdio.h>
3 #include <ctype.h>
4 #include <malloc.h>
5
6 #include "PosRegExp.h"
7
8 //Up: /[A-Z \x80-\x9f \xf0 ]/x
9 //Lo: /[a-z \xa0-\xaf \xe0-\xef \xf1 ]/x
10 //Wd: /[\d _ A-Z a-z \xa0-\xaf \xe0-\xf1 \x80-\x9f]/x
11 //* // Dos866
12 SCharData UCData = {0x0, 0x0, 0x7fffffe, 0x0, 0xffffffff, 0x0, 0x0, 0x10000},
13 LCData = {0x0, 0x0, 0x0, 0x7fffffe, 0x0, 0xffff, 0x0, 0x2ffff},
14 WdData = {0x0, 0x3ff0000, 0x87fffffe, 0x7fffffe, 0xffffffff, 0xffff, 0x0, 0x3ffff},
15 DigData = {0x0, 0x3ff0000, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
16 /*/ // cp1251
17 SCharData UCData = {0x0, 0x0, 0x7fffffe, 0x0, 0x0, 0x0, 0xffffffff, 0x0},
18 LCData = {0x0, 0x0, 0x0, 0x7fffffe, 0x0, 0x0, 0x0, 0xffffffff},
19 WdData = {0x0, 0x3ff0000, 0x87fffffe, 0x7fffffe, 0x0, 0x0, 0xffffffff, 0xffffffff},
20 DigData = {0x0, 0x3ff0000, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
21 //*/
22
23 ///////////////////////////////////////////////
24
25 int GetNumber(int *str,int s,int e) {
26 int r = 1, num = 0;
27 if (e < s) return -1;
28 for(int i = e-1; i >= s; i--) {
29 if (str[i] > '9' || str[i] < '0') return -1;
30 num += (str[i] - 0x30)*r;
31 r *= 10;
32 };
33 return num;
34 /*
35 char tmp[20];
36 double Res;
37 if (e == s) return -1;
38 for (int i = s;i < e;i++)
39 tmp[i-s] = (char)Str[i];
40 tmp[e-s] = 0;
41 GetNumber(tmp,&Res);
42 return (int)Res;
43 */
44 };
45
46 bool IsDigit(char Symb) {
47 return DigData.GetBit(Symb);
48 };
49 bool IsWord(char Symb) {
50 return WdData.GetBit(Symb);
51 };
52 bool IsUpperCase(char Symb) {
53 return UCData.GetBit(Symb);
54 };
55 bool IsLowerCase(char Symb) {
56 return LCData.GetBit(Symb);
57 };
58 char LowCase(char Chr) {
59 if (UCData.GetBit(Chr))
60 return Chr+0x20;
61 return Chr;
62 };
63
64 ///////////////////////////////////////////////
65
66 SRegInfo::SRegInfo() {
67 Next = Parent = 0;
68 un.Param = 0;
69 Op = ReEmpty;
70 };
71 SRegInfo::~SRegInfo() {
72 if (Next) delete Next;
73 if (un.Param)
74 switch(Op) {
75 case ReEnum:
76 case ReNEnum:
77 delete un.ChrClass;
78 break;
79 default:
80 if (Op > ReBlockOps && Op < ReSymbolOps || Op == ReBrackets)
81 delete un.Param;
82 break;
83 };
84 };
85
86 ///////////////////////////////////////////////
87
88 void SCharData::SetBit(unsigned char Bit) {
89 int p = Bit/8;
90 CArr[p] |= (1 << Bit%8);
91 };
92 void SCharData::ClearBit(unsigned char Bit) {
93 int p = Bit/8;
94 CArr[p] &= ~(1 << Bit%8);
95 };
96 bool SCharData::GetBit(unsigned char Bit) {
97 int p = (unsigned char)Bit/8;
98 return (CArr[p] & (1 << Bit%8))!=0;
99 };
100
101 /////////////////////////////////////////////////////////////////
102 ////////////////////// RegExp Class ///////////////////////////
103 /////////////////////////////////////////////////////////////////
104
105 PosRegExp::PosRegExp() {
106 Info = 0;
107 Exprn = 0;
108 NoMoves = false;
109 Error = true;
110 FirstChar = 0;
111 CurMatch = 0;
112 };
113 PosRegExp::~PosRegExp() {
114 if (Info) delete Info;
115 };
116
117 bool PosRegExp::SetExpr(const char *Expr) {
118 if (!this) return false;
119 Error = true;
120 CurMatch = 0;
121 if (SetExprLow(Expr)) Error = false;
122 return !Error;
123 };
124 bool PosRegExp::isok() {
125 return !Error;
126 };
127
128
129 bool PosRegExp::SetExprLow(const char *Expr) {
130 int Len = strlen(Expr);
131 bool Ok = false;
132 int i,j,s = 0,pos,tmp;
133 int EnterBr = 0,EnterGr = 0,EnterFg = 0;
134
135 if (Info) delete Info;
136 Info = new SRegInfo;
137 Exprn = new int[Len];
138
139 NoCase = false;
140 Extend = false;
141 if (Expr[0] == '/') s++;
142 else return false;
143
144 for (i = Len; i > 0 && !Ok;i--)
145 if (Expr[i] == '/') {
146 Len = i-s;
147 Ok = true;
148 for (int j = i+1; Expr[j]; j++) {
149 if (Expr[j] == 'i') NoCase = true;
150 if (Expr[j] == 'x') Extend = true;
151 };
152 };
153 if (!Ok) return false;
154
155 ////////////////////////////////
156 for (j = 0,pos = 0; j < Len; j++,pos++) {
157 if (Extend && Expr[j+s] == ' ') {
158 pos--;
159 continue;
160 };
161
162 Exprn[pos] = (int)(unsigned char)Expr[j+s];
163
164 if (Expr[j+s] == BackSlash) {
165 switch (Expr[j+s+1]) {
166 case 'd':
167 Exprn[pos] = ReDigit;
168 break;
169 case 'D':
170 Exprn[pos] = ReNDigit;
171 break;
172 case 'w':
173 Exprn[pos] = ReWordSymb;
174 break;
175 case 'W':
176 Exprn[pos] = ReNWordSymb;
177 break;
178 case 's':
179 Exprn[pos] = ReWSpace;
180 break;
181 case 'S':
182 Exprn[pos] = ReNWSpace;
183 break;
184 case 'u':
185 Exprn[pos] = ReUCase;
186 break;
187 case 'l':
188 Exprn[pos] = ReNUCase;
189 break;
190 case 't':
191 Exprn[pos] = '\t';
192 break;
193 case 'n':
194 Exprn[pos] = '\n';
195 break;
196 case 'r':
197 Exprn[pos] = '\r';
198 break;
199 case 'b':
200 Exprn[pos] = ReWBound;
201 break;
202 case 'B':
203 Exprn[pos] = ReNWBound;
204 break;
205 case 'c':
206 Exprn[pos] = RePreNW;
207 break;
208 case 'm':
209 Exprn[pos] = ReStart;
210 break;
211 case 'M':
212 Exprn[pos] = ReEnd;
213 break;
214 case 'x':
215 tmp = toupper(Expr[j+s+2])-0x30;
216 tmp = (tmp>9?tmp-7:tmp)<<4;
217 tmp += (toupper(Expr[j+s+3])-0x30)>9?toupper(Expr[j+s+3])-0x37:(toupper(Expr[j+s+3])-0x30);
218 Exprn[pos] = tmp;
219 j+=2;
220 break;
221 case 'y':
222 tmp = Expr[j+s+2] - 0x30;
223 if (tmp >= 0 && tmp <= 9) {
224 if (tmp == 1) {
225 tmp = 10 + Expr[j+s+3] - 0x30;
226 if (tmp >= 10 && tmp <= 19) j++;
227 else tmp = 1;
228 };
229 Exprn[pos] = ReBkTrace + tmp;
230 j++;
231 break;
232 };
233 default:
234 tmp = Expr[j+s+1] - 0x30;
235 if (tmp >= 0 && tmp <= 9) {
236 if (tmp == 1) {
237 tmp = 10 + Expr[j+s+2] - 0x30;
238 if (tmp >= 10 && tmp <= 19) j++;
239 else tmp = 1;
240 };
241 Exprn[pos] = ReBkBrack + tmp;
242 break;
243 } else
244 Exprn[pos] = Expr[j+s+1];
245 break;
246 };
247 j++;
248 continue;
249 };
250 if (Expr[j+s] == ']') {
251 Exprn[pos] = ReEnumE;
252 if (EnterFg || !EnterGr) return false;
253 EnterGr--;
254 };
255 if (Expr[j+s] == '-' && EnterGr) Exprn[pos] = ReFrToEnum;
256
257 if (EnterGr) continue;
258
259 if (Expr[j+s] == '[' && Expr[j+s+1] == '^') {
260 Exprn[pos] = ReNEnumS;
261 if (EnterFg) return false;
262 EnterGr++;
263 j++;
264 continue;
265 };
266 if (Expr[j+s] == '*' && Expr[j+s+1] == '?') {
267 Exprn[pos] = ReNGMul;
268 j++;
269 continue;
270 };
271 if (Expr[j+s] == '+' && Expr[j+s+1] == '?') {
272 Exprn[pos] = ReNGPlus;
273 j++;
274 continue;
275 };
276 if (Expr[j+s] == '?' && Expr[j+s+1] == '?') {
277 Exprn[pos] = ReNGQuest;
278 j++;
279 continue;
280 };
281 if (Expr[j+s] == '?' && Expr[j+s+1] == '#' &&
282 Expr[j+s+2]>='0' && Expr[j+s+2]<='9') {
283 Exprn[pos] = ReBehind+Expr[j+s+2]-0x30;
284 j+=2;
285 continue;
286 };
287 if (Expr[j+s] == '?' && Expr[j+s+1] == '~' &&
288 Expr[j+s+2]>='0' && Expr[j+s+2]<='9') {
289 Exprn[pos] = ReNBehind+Expr[j+s+2]-0x30;
290 j+=2;
291 continue;
292 };
293 if (Expr[j+s] == '?' && Expr[j+s+1] == '=') {
294 Exprn[pos] = ReAhead;
295 j++;
296 continue;
297 };
298 if (Expr[j+s] == '?' && Expr[j+s+1] == '!') {
299 Exprn[pos] = ReNAhead;
300 j++;
301 continue;
302 };
303
304 if (Expr[j+s] == '(') {
305 Exprn[pos] = ReLBrack;
306 if (EnterFg) return false;
307 EnterBr++;
308 };
309 if (Expr[j+s] == ')') {
310 Exprn[pos] = ReRBrack;
311 if (!EnterBr || EnterFg) return false;
312 EnterBr--;
313 };
314 if (Expr[j+s] == '[') {
315 Exprn[pos] = ReEnumS;
316 if (EnterFg) return false;
317 EnterGr++;
318 };
319 if (Expr[j+s] == '{') {
320 Exprn[pos] = ReRangeS;
321 if (EnterFg) return false;
322 EnterFg++;
323 };
324 if (Expr[j+s] == '}' && Expr[j+s+1] == '?') {
325 Exprn[pos] = ReNGRangeE;
326 if (!EnterFg) return false;
327 EnterFg--;
328 j++;
329 continue;
330 };
331 if (Expr[j+s] == '}') {
332 Exprn[pos] = ReRangeE;
333 if (!EnterFg) return false;
334 EnterFg--;
335 };
336
337 if (Expr[j+s] == '^') Exprn[pos] = ReSoL;
338 if (Expr[j+s] == '$') Exprn[pos] = ReEoL;
339 if (Expr[j+s] == '.') Exprn[pos] = ReAnyChr;
340 if (Expr[j+s] == '*') Exprn[pos] = ReMul;
341 if (Expr[j+s] == '+') Exprn[pos] = RePlus;
342 if (Expr[j+s] == '?') Exprn[pos] = ReQuest;
343 if (Expr[j+s] == '|') Exprn[pos] = ReOr;
344 };
345 if (EnterGr || EnterBr || EnterFg) return false;
346
347 Info->Op = ReBrackets;
348 Info->un.Param = new SRegInfo;
349 Info->s = CurMatch++;
350
351 if (!SetStructs(Info->un.Param,0,pos)) return false;
352 Optimize();
353 delete Exprn;
354 return true;
355 };
356
357 void PosRegExp::Optimize() {
358 PRegInfo Next = Info;
359 FirstChar = 0;
360 while(Next) {
361 if (Next->Op == ReBrackets || Next->Op == RePlus || Next->Op == ReNGPlus) {
362 Next = Next->un.Param;
363 continue;
364 };
365 if (Next->Op == ReSymb) {
366 if (Next->un.Symb & 0xFF00 && Next->un.Symb != ReSoL && Next->un.Symb != ReWBound)
367 break;
368 FirstChar = Next->un.Symb;
369 break;
370 };
371 break;
372 };
373 };
374
375 bool PosRegExp::SetStructs(PRegInfo &re,int start,int end) {
376 PRegInfo Next,Prev,Prev2;
377 int comma,st,en,ng,i, j,k;
378 int EnterBr;
379 bool Add;
380
381 if (end - start < 0) return false;
382 Next = re;
383 for (i = start; i < end; i++) {
384 Add = false;
385 // Ops
386 if (Exprn[i] > ReBlockOps && Exprn[i] < ReSymbolOps) {
387 Next->un.Param = 0;
388 Next->Op = (EOps)Exprn[i];
389 Add = true;
390 };
391 // {n,m}
392 if (Exprn[i] == ReRangeS) {
393 st = i;
394 en = -1;
395 comma = -1;
396 ng = 0;
397 for (j = i;j < end;j++) {
398 if (Exprn[j] == ReNGRangeE) {
399 en = j;
400 ng = 1;
401 break;
402 };
403 if (Exprn[j] == ReRangeE) {
404 en = j;
405 break;
406 };
407 if ((char)Exprn[j] == ',')
408 comma = j;
409 };
410 if (en == -1) return false;
411 if (comma == -1) comma = en;
412 Next->s = (char)GetNumber(Exprn,st+1,comma);
413 if (comma != en)
414 Next->e = (char)GetNumber(Exprn,comma+1,en);
415 else
416 Next->e = Next->s;
417 Next->un.Param = 0;
418 Next->Op = ng?ReNGRangeNM:ReRangeNM;
419 if (en-comma == 1) {
420 Next->e = -1;
421 Next->Op = ng?ReNGRangeN:ReRangeN;
422 };
423 i=j;
424 Add = true;
425 };
426 // [] [^]
427 if (Exprn[i] == ReEnumS || Exprn[i] == ReNEnumS) {
428 Next->Op = (Exprn[i] == ReEnumS)?ReEnum:ReNEnum;
429 for (j = i+1;j < end;j++) {
430 if (Exprn[j] == ReEnumE)
431 break;
432 };
433 if (j == end) return false;
434 Next->un.ChrClass = new SCharData;
435 memset(Next->un.ChrClass, 0, 32);
436 for (j = i+1;Exprn[j] != ReEnumE;j++) {
437 if (Exprn[j+1] == ReFrToEnum) {
438 for (i = (Exprn[j]&0xFF); i < (Exprn[j+2]&0xFF);i++)
439 Next->un.ChrClass->SetBit(i&0xFF);
440 j++;
441 continue;
442 };
443 switch(Exprn[j]) {
444 case ReDigit:
445 for (k = 0x30;k < 0x40;k++)
446 if (IsDigit((char)k))
447 Next->un.ChrClass->SetBit(k);
448 break;
449 case ReNDigit:
450 for (k = 0x30;k < 0x40;k++)
451 if (!IsDigit((char)k))
452 Next->un.ChrClass->SetBit(k);
453 Next->un.ChrClass->ClearBit(0x0a);
454 Next->un.ChrClass->ClearBit(0x0d);
455 break;
456 case ReWordSymb:
457 for (k = 0;k < 256;k++)
458 if (IsWord((char)k))
459 Next->un.ChrClass->SetBit(k);
460 break;
461 case ReNWordSymb:
462 for (k = 0;k < 256;k++)
463 if (!IsWord((char)k))
464 Next->un.ChrClass->SetBit(k);
465 Next->un.ChrClass->ClearBit(0x0a);
466 Next->un.ChrClass->ClearBit(0x0d);
467 break;
468 case ReWSpace:
469 Next->un.ChrClass->SetBit(0x20);
470 Next->un.ChrClass->SetBit(0x09);
471 break;
472 case ReNWSpace:
473 memset(Next->un.ChrClass->IArr, 0xFF, 32);
474 Next->un.ChrClass->ClearBit(0x20);
475 Next->un.ChrClass->ClearBit(0x09);
476 Next->un.ChrClass->ClearBit(0x0a);
477 Next->un.ChrClass->ClearBit(0x0d);
478 break;
479 default:
480 if (!(Exprn[j]&0xFF00))
481 Next->un.ChrClass->SetBit(Exprn[j]&0xFF);
482 break;
483 };
484 };
485 Add = true;
486 i=j;
487 };
488 // ( ... )
489 if (Exprn[i] == ReLBrack) {
490 EnterBr = 1;
491 for (j = i+1;j < end;j++) {
492 if (Exprn[j] == ReLBrack) EnterBr++;
493 if (Exprn[j] == ReRBrack) EnterBr--;
494 if (!EnterBr) break;
495 };
496 if (EnterBr) return false;
497 Next->Op = ReBrackets;
498 Next->un.Param = new SRegInfo;
499 Next->un.Param->Parent = Next;
500 Next->s = CurMatch++;
501 if (CurMatch > MatchesNum) CurMatch = MatchesNum;
502 if (!SetStructs(Next->un.Param,i+1,j)) return false;
503 Add = true;
504 i=j;
505 };
506 if ((Exprn[i]&0xFF00) == ReBkTrace) {
507 Next->Op = ReBkTrace;
508 Next->un.Symb = Exprn[i]&0xFF;
509 Add = true;
510 };
511 if ((Exprn[i]&0xFF00) == ReBkBrack) {
512 Next->Op = ReBkBrack;
513 Next->un.Symb = Exprn[i]&0xFF;
514 Add = true;
515 };
516 if ((Exprn[i]&0xFF00) == ReBehind) {
517 Next->Op = ReBehind;
518 Next->s = Exprn[i]&0xFF;
519 Add = true;
520 };
521 if ((Exprn[i]&0xFF00) == ReNBehind) {
522 Next->Op = ReNBehind;
523 Next->s = Exprn[i]&0xFF;
524 Add = true;
525 };
526 // Chars
527 if (Exprn[i] >= ReAnyChr && Exprn[i] < ReTemp || Exprn[i] < 0x100) {
528 Next->Op = ReSymb;
529 Next->un.Symb = Exprn[i];
530 Add = true;
531 };
532 // Next
533 if (Add && i != end-1) {
534 Next->Next = new SRegInfo;
535 Next->Next->Parent = Next->Parent;
536 Next = Next->Next;
537 };
538 };
539 Next = re;
540 Prev = Prev2 = 0;
541 while(Next) {
542 if (Next->Op > ReBlockOps && Next->Op < ReSymbolOps) {
543 if (!Prev) return false;
544 if (!Prev2) re = Next;
545 else Prev2->Next = Next;
546 //if (Prev->Op > ReBlockOps && Prev->Op < ReSymbolOps) return false;
547 Prev->Parent = Next;
548 Prev->Next = 0;
549 Next->un.Param = Prev;
550 Prev = Prev2;
551 };
552 Prev2 = Prev;
553 Prev = Next;
554 Next = Next->Next;
555 };
556
557 return true;
558 };
559
560 /////////////////////////////////////////////////////////////////
561 ///////////////////////// Parsing /////////////////////////////
562 /////////////////////////////////////////////////////////////////
563
564 bool PosRegExp::CheckSymb(int Symb,bool Inc) {
565 bool Res;
566 char ch;
567 switch(Symb) {
568 case ReAnyChr:
569 if (posParse >= posEnd) return false;
570 ch = CharAt(posParse,param);
571 Res = ch != '\r' && ch != '\n';
572 if (Res && Inc) posParse++;
573 return Res;
574 case ReSoL:
575 if (posStart == posParse)
576 return true;
577 ch = CharAt(posParse-1,param);
578 return ch == '\n' || ch == '\r';
579 case ReEoL:
580 if (posEnd == posParse)
581 return true;
582 ch = CharAt(posParse,param);
583 return ch == '\n' || ch == '\r';
584 case ReDigit:
585 if (posParse >= posEnd) return false;
586 ch = CharAt(posParse,param);
587 Res = (ch >= 0x30 && ch <= 0x39);
588 if (Res && Inc) posParse++;
589 return Res;
590 case ReNDigit:
591 if (posParse >= posEnd) return false;
592 ch = CharAt(posParse,param);
593 Res = !(ch >= 0x30 && ch <= 0x39) && ch != '\r' && ch != '\n';
594 if (Res && Inc) posParse++;
595 return Res;
596 case ReWordSymb:
597 if (posParse >= posEnd) return false;
598 Res = IsWord(CharAt(posParse,param));
599 if (Res && Inc) posParse++;
600 return Res;
601 case ReNWordSymb:
602 if (posParse >= posEnd) return false;
603 ch = CharAt(posParse,param);
604 Res = !IsWord(ch) && ch != '\r' && ch != '\n';
605 if (Res && Inc) posParse++;
606 return Res;
607 case ReWSpace:
608 if (posParse >= posEnd) return false;
609 ch = CharAt(posParse,param);
610 Res = (ch == 0x20 || ch == '\t');
611 if (Res && Inc) posParse++;
612 return Res;
613 case ReNWSpace:
614 if (posParse >= posEnd) return false;
615 ch = CharAt(posParse,param);
616 Res = !(ch == 0x20 || ch == '\t') && ch != '\r' && ch != '\n';
617 if (Res && Inc) posParse++;
618 return Res;
619 case ReUCase:
620 if (posParse >= posEnd) return false;
621 Res = IsUpperCase(CharAt(posParse,param));
622 if (Res && Inc) posParse++;
623 return Res;
624 case ReNUCase:
625 if (posParse >= posEnd) return false;
626 Res = IsLowerCase(CharAt(posParse,param));
627 if (Res && Inc) posParse++;
628 return Res;
629 case ReWBound:
630 if (posParse >= posEnd) return true;
631 ch = CharAt(posParse,param);
632 return IsWord(CharAt(posParse,param)) && (posParse == posStart || !IsWord(CharAt(posParse-1,param)));
633 case ReNWBound:
634 if (posParse >= posEnd) return true;
635 return !IsWord(CharAt(posParse,param)) && IsWord(CharAt(posParse-1,param));
636 case RePreNW:
637 if (posParse >= posEnd) return true;
638 return (posParse == posStart || !IsWord(CharAt(posParse-1,param)));
639 case ReStart:
640 Matches->s[0] = (posParse-posStart);
641 return true;
642 case ReEnd:
643 Matches->e[0] = (posParse-posStart);
644 return true;
645 default:
646 if ((Symb & 0xFF00) || posParse >= posEnd) return false;
647 if (NoCase) {
648 if (LowCase(CharAt(posParse,param)) != LowCase((char)Symb&0xFF)) return false;
649 } else
650 if (CharAt(posParse,param) != (char)(Symb&0xFF)) return false;
651 if (Inc) posParse++;
652 return true;
653 };
654 }
655
656 bool PosRegExp::LowParseRe(PRegInfo &Next) {
657 PRegInfo OrNext;
658 int i,match,sv;
659 int posStr;
660
661 switch(Next->Op) {
662 case ReSymb:
663 if (!CheckSymb(Next->un.Symb,true)) return false;
664 break;
665 case ReEmpty:
666 break;
667 case ReBkTrace:
668 if (!posBkStr | !BkTrace) return false;
669 sv = Next->un.Symb;
670 posStr = posParse;
671 for (i = BkTrace->s[sv]; i < BkTrace->e[sv]; i++) {
672 if (CharAt(posStr,param) != CharAt(posBkStr+i,param) || posEnd == posStr) return false;
673 posStr++;
674 };
675 posParse = posStr;
676 break;
677 case ReBkBrack:
678 sv = Next->un.Symb;
679 posStr = posParse;
680 if (Matches->s[sv] == -1 || Matches->e[sv] == -1) return false;
681 for (i = Matches->s[sv]; i < Matches->e[sv]; i++) {
682 if (CharAt(posStr,param) != CharAt(posStart+i,param) || posEnd == posStr) return false;
683 posStr++;
684 };
685 posParse = posStr;
686 break;
687 case ReBehind:
688 sv = Next->s;
689 posStr = posParse;
690 posParse -= sv;
691 if (!LowParse(Next->un.Param)) return false;
692 posParse = posStr;
693 break;
694 case ReNBehind:
695 sv = Next->s;
696 posStr = posParse;
697 posParse -= sv;
698 if (LowParse(Next->un.Param)) return false;
699 posParse = posStr;
700 break;
701 case ReAhead:
702 posStr = posParse;
703 if (!LowParse(Next->un.Param)) return false;
704 posParse = posStr;
705 break;
706 case ReNAhead:
707 posStr = posParse;
708 if (LowParse(Next->un.Param)) return false;
709 posParse = posStr;
710 break;
711 case ReEnum:
712 if (posParse >= posEnd) return false;
713 if (!Next->un.ChrClass->GetBit(CharAt(posParse,param))) return false;
714 posParse++;
715 break;
716 case ReNEnum:
717 if (posParse >= posEnd) return false;
718 if (Next->un.ChrClass->GetBit(CharAt(posParse,param))) return false;
719 posParse++;
720 break;
721 case ReBrackets:
722 match = Next->s;
723 sv = posParse-posStart;
724 posStr = posParse;
725 if (LowParse(Next->un.Param)) {
726 if (match || (Matches->s[match] == -1))
727 Matches->s[match] = sv;
728 if (match || (Matches->e[match] == -1))
729 Matches->e[match] = posParse-posStart;
730 return true;
731 };
732 posParse = posStr;
733 return false;
734 case ReMul:
735 posStr = posParse;
736 while (LowParse(Next->un.Param));
737 while(!LowCheckNext(Next) && posStr < posParse) posParse--;
738 break;
739 case ReNGMul:
740 do {
741 if (LowCheckNext(Next)) break;
742 } while (LowParse(Next->un.Param));
743 break;
744 case RePlus:
745 posStr = posParse;
746 match = false;
747 while (LowParse(Next->un.Param))
748 match = true;
749 if (!match) return false;
750 while(!LowCheckNext(Next) && posStr < posParse) posParse--;
751 break;
752 case ReNGPlus:
753 if (!LowParse(Next->un.Param)) return false;
754 do {
755 if (LowCheckNext(Next)) break;
756 } while (LowParse(Next->un.Param));
757 break;
758 case ReQuest:
759 LowParse(Next->un.Param);
760 break;
761 case ReNGQuest:
762 if (LowCheckNext(Next)) break;
763 if (!LowParse(Next->un.Param)) return false;
764 break;
765 case ReOr:
766 OrNext = Next;
767 // posStr = posParse;
768 if (LowParse(Next->un.Param)) {
769 while (OrNext && OrNext->Op == ReOr)
770 OrNext = OrNext->Next;
771 /*if (!LowCheckNext(OrNext)){
772 posParse = posStr;
773 OrNext = Next;
774 };*/
775 };
776 Next = OrNext;
777 break;
778 case ReRangeN:
779 posStr = posParse;
780 i = 0;
781 while (LowParse(Next->un.Param)) i++; // ???
782 do {
783 if (i < Next->s) {
784 posParse = posStr;
785 return false;
786 };
787 i--;
788 } while(!LowCheckNext(Next) && posStr < posParse--);
789 break;
790 case ReNGRangeN:
791 posStr = posParse;
792 i = 0;
793 while (LowParse(Next->un.Param)) {
794 i++;
795 if (i >= Next->s && LowCheckNext(Next)) // ???
796 break;
797 };
798 if (i < Next->s) {
799 posParse = posStr;
800 return false;
801 };
802 break;
803 case ReRangeNM:
804 posStr = posParse;
805 i = 0;
806 while (i < Next->s && LowParse(Next->un.Param)) // ???
807 i++;
808 if (i < Next->s) {
809 posParse = posStr;
810 return false;
811 };
812 while (i < Next->e && LowParse(Next->un.Param)) // ???
813 i++;
814
815 while(!LowCheckNext(Next)) {
816 i--;
817 posParse--;
818 if (i < Next->s) {
819 posParse = posStr;
820 return false;
821 };
822 };
823 break;
824 case ReNGRangeNM:
825 posStr = posParse;
826 i = 0;
827 while (i < Next->s && LowParse(Next->un.Param)) // ???
828 i++;
829 if (i < Next->s) {
830 posParse = posStr;
831 return false;
832 };
833 while(!LowCheckNext(Next)) {
834 i++;
835 if (!LowParse(Next->un.Param) || i > Next->e) { // ???
836 posParse = posStr;
837 return false;
838 };
839 };
840 break;
841 };
842 return true;
843 };
844
845 bool PosRegExp::LowCheckNext(PRegInfo Re) {
846 PRegInfo Next;
847 int tmp = posParse;
848 Next = Re;
849 do {
850 if (Next && Next->Op == ReOr)
851 while (Next && Next->Op == ReOr)
852 Next = Next->Next;
853 if (Next->Next && !LowParse(Next->Next)) {
854 posParse = tmp;
855 Ok = false;
856 return false;
857 };
858 Next = Next->Parent;
859 } while(Next);
860 posParse = tmp;
861 if (Ok != false) Ok = true;
862 return true;
863 };
864
865 bool PosRegExp::LowParse(PRegInfo Re) {
866 while(Re && posParse <= posEnd) {
867 if (!LowParseRe(Re)) return false;
868 if (Re) Re = Re->Next;
869 };
870 return true;
871 };
872
873 bool PosRegExp::QuickCheck() {
874 if (!NoMoves || !FirstChar)
875 return true;
876 switch(FirstChar) {
877 case ReSoL:
878 if (posParse != posStart) return false;
879 return true;
880 case ReWBound:
881 return IsWord(CharAt(posParse,param)) && (posParse == posStart || !IsWord(CharAt(posParse-1,param)));
882 default:
883 if (NoCase && LowCase(CharAt(posParse,param)) != LowCase(FirstChar)) return false;
884 if (!NoCase && CharAt(posParse,param) != (char)FirstChar) return false;
885 return true;
886 };
887 };
888
889 bool PosRegExp::ParseRe(int posStr) {
890 if (Error) return false;
891
892 posParse = posStr;
893 if (!QuickCheck()) return false;
894
895 for (int i = 0; i < MatchesNum; i++)
896 Matches->s[i] = Matches->e[i] = -1;
897 Matches->CurMatch = CurMatch;
898
899 Ok = -1;
900 //try{
901 do {
902 if (!LowParse(Info)) {
903 if (NoMoves) return false;
904 } else
905 return true;
906 posParse = ++posStr;
907 } while(posParse != posEnd+1);
908 return false;
909 //}__except(){
910 // return true;
911 //};
912 }
913 ;
914
915 bool PosRegExp::Parse(int posStr,int posSol, int posEol, PMatches Mtch, int Moves) {
916 if (!this) return false;
917
918 bool s = NoMoves;
919 if (Moves != -1) NoMoves = Moves!=0;
920 posStart = posSol;
921 posEnd = posEol;
922 Matches = Mtch;
923 bool r = ParseRe(posStr);
924 NoMoves = s;
925 return r;
926 };
927
928 bool PosRegExp::Parse(int posStr, int posStop, PMatches Mtch) {
929 if (!this) return false;
930 posStart = posStr;
931 posEnd = posStop;
932 Matches = Mtch;
933 return ParseRe(posStr);
934 };
935
936 bool PosRegExp::SetNoMoves(bool Moves) {
937 NoMoves = Moves;
938 return true;
939 };
940
941 bool PosRegExp::SetBkTrace(int posStr,PMatches Trace) {
942 BkTrace = Trace;
943 posBkStr = posStr;
944 return true;
945 };
946
947 #define EVAL_MATCHES 16
948 #define EVAL_CHUNKSIZE 256
949
950 #define EVAL_LOWERCASE 1
951 #define EVAL_UPPERCASE 2
952 #define EVAL_LOWERCASE_NEXT 4
953 #define EVAL_UPPERCASE_NEXT 8
954
955 bool PosRegExp::Evaluate(char *Expr, int posStr, PMatches Mtch, char **Res) {
956 int length,
957 newlength,
958 chunklength,
959 value,
960 size,
961 src,
962 end;
963 unsigned flag;
964 char ch,
965 *dest,
966 *pool;
967
968 size = EVAL_CHUNKSIZE;
969 pool = (char*) malloc (size);
970 dest = pool;
971 length = 0;
972 flag = 0;
973 while (*Expr) {
974 switch (ch = *Expr++) {
975 case '\\':
976 switch (ch = *Expr++) {
977 case 'A':
978 case 'B':
979 case 'C':
980 case 'D':
981 case 'E':
982 case 'F':
983 ch -= ('A' - '0');
984 case '0':
985 case '1':
986 case '2':
987 case '3':
988 case '4':
989 case '5':
990 case '6':
991 case '7':
992 case '8':
993 case '9':
994 value = ch - '0';
995 if (Mtch->s[value] != -1 && value < EVAL_MATCHES) {
996 chunklength = Mtch->e[value] - Mtch->s[value];
997 if (chunklength) {
998 newlength = chunklength + length;
999 if (newlength > size) {
1000 do
1001 size += EVAL_CHUNKSIZE;
1002 while (size < newlength);
1003 pool = (char*) realloc (pool, size);
1004 dest = pool + length;
1005 }
1006 length = newlength;
1007 src = posStr + Mtch->s[value];
1008 end = posStr + Mtch->e[value];
1009 if (flag & EVAL_UPPERCASE) {
1010 if (flag & EVAL_LOWERCASE_NEXT) {
1011 *dest++ = tolower (CharAt(src++,param));
1012 flag &= ~EVAL_LOWERCASE_NEXT;
1013 }
1014 while (src < end)
1015 *dest++ = toupper (CharAt(src++,param));
1016 } else if (flag & EVAL_LOWERCASE) {
1017 if (flag & EVAL_UPPERCASE_NEXT) {
1018 *dest++ = toupper (CharAt(src++,param));
1019 flag &= ~EVAL_UPPERCASE_NEXT;
1020 }
1021 while (src < end)
1022 *dest++ = tolower (CharAt(src++,param));
1023 } else {
1024 if (flag & EVAL_LOWERCASE_NEXT) {
1025 *dest++ = tolower (CharAt(src++,param));
1026 flag &= ~EVAL_LOWERCASE_NEXT;
1027 } else if (flag & EVAL_UPPERCASE_NEXT) {
1028 *dest++ = toupper (CharAt(src++,param));
1029 flag &= ~EVAL_UPPERCASE_NEXT;
1030 }
1031 while (src < end)
1032 *dest++ = CharAt(src++,param);
1033 }
1034 }
1035 } else
1036 goto error;
1037 continue;
1038 case '\0':
1039 goto error;
1040 case 'r':
1041 ch = '\r';
1042 break;
1043 case 'n':
1044 ch = '\n';
1045 break;
1046 case 'b':
1047 ch = '\b';
1048 break;
1049 case 'a':
1050 ch = '\a';
1051 break;
1052 case 't':
1053 ch = '\t';
1054 break;
1055 case 'U':
1056 flag |= EVAL_UPPERCASE;
1057 continue;
1058 case 'u':
1059 flag |= EVAL_UPPERCASE_NEXT;
1060 continue;
1061 case 'L':
1062 flag |= EVAL_LOWERCASE;
1063 continue;
1064 case 'l':
1065 flag |= EVAL_LOWERCASE_NEXT;
1066 continue;
1067 case 'Q':
1068 case 'q':
1069 flag &= ~(EVAL_UPPERCASE | EVAL_LOWERCASE);
1070 continue;
1071 case 'x':
1072 {
1073 if (!*Expr)
1074 goto error;
1075 value = toupper (*Expr) - '0';
1076 if (value > 9)
1077 value = value + '0' - 'A' + 10;
1078 if (value > 15)
1079 goto error;
1080 ch = value << 4;
1081 Expr++;
1082 if (!*Expr)
1083 goto error;
1084 value = toupper (*Expr) - '0';
1085 if (value > 9)
1086 value = value + '0' - 'A' + 10;
1087 if (value > 15)
1088 goto error;
1089 Expr++;
1090 ch |= value;
1091 break;
1092 }
1093 case 'd':
1094 {
1095 if (!*Expr)
1096 goto error;
1097 value = toupper (*Expr) - '0';
1098 if (value > 9)
1099 goto error;
1100 ch = value * 100;
1101 Expr++;
1102 if (!*Expr)
1103 goto error;
1104 value = toupper (*Expr) - '0';
1105 if (value > 9)
1106 goto error;
1107 ch += value * 10;
1108 Expr++;
1109 if (!*Expr)
1110 goto error;
1111 value = toupper (*Expr) - '0';
1112 if (value > 9)
1113 goto error;
1114 ch += value;
1115 Expr++;
1116 break;
1117 }
1118 case 'o':
1119 {
1120 if (!*Expr)
1121 goto error;
1122 value = toupper (*Expr) - '0';
1123 if (value > 9)
1124 goto error;
1125 ch = value << 6;
1126 Expr++;
1127 if (!*Expr)
1128 goto error;
1129 value = toupper (*Expr) - '0';
1130 if (value > 9)
1131 goto error;
1132 ch += value << 3;
1133 Expr++;
1134 if (!*Expr)
1135 goto error;
1136 value = toupper (*Expr) - '0';
1137 if (value > 9)
1138 goto error;
1139 ch |= value;
1140 Expr++;
1141 /* break; */
1142 }
1143 /* default:
1144 break; */
1145 }
1146 default:
1147 if (++length > size) {
1148 do
1149 size += EVAL_CHUNKSIZE;
1150 while (size < length);
1151 pool = (char*) realloc (pool, size);
1152 dest = pool + length - 1;
1153 }
1154 if (flag & EVAL_LOWERCASE_NEXT) {
1155 *dest++ = tolower (ch);
1156 flag &= ~EVAL_LOWERCASE_NEXT;
1157 } else if (flag & EVAL_UPPERCASE_NEXT) {
1158 *dest++ = toupper (ch);
1159 flag &= ~EVAL_UPPERCASE_NEXT;
1160 } else if (flag & EVAL_UPPERCASE)
1161 *dest++ = toupper (ch);
1162 else if (flag & EVAL_LOWERCASE)
1163 *dest++ = tolower (ch);
1164 else
1165 *dest++ = ch;
1166 }
1167 }
1168 if (++length > size) {
1169 do
1170 size += EVAL_CHUNKSIZE;
1171 while (size < length);
1172 pool = (char*) realloc (pool, size);
1173 dest = pool + length - 1;
1174 }
1175 *dest = '\0';
1176 *Res = pool;
1177 return true;
1178 error:
1179 free (pool);
1180 return false;
1181 }