]>
Commit | Line | Data |
---|---|---|
d134f170 RD |
1 | #include <string.h> |
2 | #include <stdio.h> | |
3 | #include <ctype.h> | |
4 | #include <malloc.h> | |
5 | ||
6 | #include "PosRegExp.h" | |
7 | ||
8 | //Up: /[A-Z \x80-\x9f \xf0 ]/x | |
9 | //Lo: /[a-z \xa0-\xaf \xe0-\xef \xf1 ]/x | |
10 | //Wd: /[\d _ A-Z a-z \xa0-\xaf \xe0-\xf1 \x80-\x9f]/x | |
11 | //* // Dos866 | |
12 | SCharData UCData = {0x0, 0x0, 0x7fffffe, 0x0, 0xffffffff, 0x0, 0x0, 0x10000}, | |
13 | LCData = {0x0, 0x0, 0x0, 0x7fffffe, 0x0, 0xffff, 0x0, 0x2ffff}, | |
14 | WdData = {0x0, 0x3ff0000, 0x87fffffe, 0x7fffffe, 0xffffffff, 0xffff, 0x0, 0x3ffff}, | |
15 | DigData = {0x0, 0x3ff0000, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; | |
16 | /*/ // cp1251 | |
17 | SCharData UCData = {0x0, 0x0, 0x7fffffe, 0x0, 0x0, 0x0, 0xffffffff, 0x0}, | |
18 | LCData = {0x0, 0x0, 0x0, 0x7fffffe, 0x0, 0x0, 0x0, 0xffffffff}, | |
19 | WdData = {0x0, 0x3ff0000, 0x87fffffe, 0x7fffffe, 0x0, 0x0, 0xffffffff, 0xffffffff}, | |
20 | DigData = {0x0, 0x3ff0000, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; | |
21 | //*/ | |
22 | ||
23 | /////////////////////////////////////////////// | |
24 | ||
25 | int GetNumber(int *str,int s,int e) { | |
26 | int r = 1, num = 0; | |
27 | if (e < s) return -1; | |
28 | for(int i = e-1; i >= s; i--) { | |
29 | if (str[i] > '9' || str[i] < '0') return -1; | |
30 | num += (str[i] - 0x30)*r; | |
31 | r *= 10; | |
32 | }; | |
33 | return num; | |
34 | /* | |
35 | char tmp[20]; | |
36 | double Res; | |
37 | if (e == s) return -1; | |
38 | for (int i = s;i < e;i++) | |
39 | tmp[i-s] = (char)Str[i]; | |
40 | tmp[e-s] = 0; | |
41 | GetNumber(tmp,&Res); | |
42 | return (int)Res; | |
43 | */ | |
44 | }; | |
45 | ||
46 | bool IsDigit(char Symb) { | |
47 | return DigData.GetBit(Symb); | |
48 | }; | |
49 | bool IsWord(char Symb) { | |
50 | return WdData.GetBit(Symb); | |
51 | }; | |
52 | bool IsUpperCase(char Symb) { | |
53 | return UCData.GetBit(Symb); | |
54 | }; | |
55 | bool IsLowerCase(char Symb) { | |
56 | return LCData.GetBit(Symb); | |
57 | }; | |
58 | char LowCase(char Chr) { | |
59 | if (UCData.GetBit(Chr)) | |
60 | return Chr+0x20; | |
61 | return Chr; | |
62 | }; | |
63 | ||
64 | /////////////////////////////////////////////// | |
65 | ||
66 | SRegInfo::SRegInfo() { | |
67 | Next = Parent = 0; | |
68 | un.Param = 0; | |
69 | Op = ReEmpty; | |
70 | }; | |
71 | SRegInfo::~SRegInfo() { | |
72 | if (Next) delete Next; | |
73 | if (un.Param) | |
74 | switch(Op) { | |
75 | case ReEnum: | |
76 | case ReNEnum: | |
77 | delete un.ChrClass; | |
78 | break; | |
79 | default: | |
80 | if (Op > ReBlockOps && Op < ReSymbolOps || Op == ReBrackets) | |
81 | delete un.Param; | |
82 | break; | |
83 | }; | |
84 | }; | |
85 | ||
86 | /////////////////////////////////////////////// | |
87 | ||
88 | void SCharData::SetBit(unsigned char Bit) { | |
89 | int p = Bit/8; | |
90 | CArr[p] |= (1 << Bit%8); | |
91 | }; | |
92 | void SCharData::ClearBit(unsigned char Bit) { | |
93 | int p = Bit/8; | |
94 | CArr[p] &= ~(1 << Bit%8); | |
95 | }; | |
96 | bool SCharData::GetBit(unsigned char Bit) { | |
97 | int p = (unsigned char)Bit/8; | |
98 | return (CArr[p] & (1 << Bit%8))!=0; | |
99 | }; | |
100 | ||
101 | ///////////////////////////////////////////////////////////////// | |
102 | ////////////////////// RegExp Class /////////////////////////// | |
103 | ///////////////////////////////////////////////////////////////// | |
104 | ||
105 | PosRegExp::PosRegExp() { | |
106 | Info = 0; | |
107 | Exprn = 0; | |
108 | NoMoves = false; | |
109 | Error = true; | |
110 | FirstChar = 0; | |
111 | CurMatch = 0; | |
112 | }; | |
113 | PosRegExp::~PosRegExp() { | |
114 | if (Info) delete Info; | |
115 | }; | |
116 | ||
117 | bool PosRegExp::SetExpr(const char *Expr) { | |
118 | if (!this) return false; | |
119 | Error = true; | |
120 | CurMatch = 0; | |
121 | if (SetExprLow(Expr)) Error = false; | |
122 | return !Error; | |
123 | }; | |
124 | bool PosRegExp::isok() { | |
125 | return !Error; | |
126 | }; | |
127 | ||
128 | ||
129 | bool PosRegExp::SetExprLow(const char *Expr) { | |
130 | int Len = strlen(Expr); | |
131 | bool Ok = false; | |
132 | int i,j,s = 0,pos,tmp; | |
133 | int EnterBr = 0,EnterGr = 0,EnterFg = 0; | |
134 | ||
135 | if (Info) delete Info; | |
136 | Info = new SRegInfo; | |
137 | Exprn = new int[Len]; | |
138 | ||
139 | NoCase = false; | |
140 | Extend = false; | |
141 | if (Expr[0] == '/') s++; | |
142 | else return false; | |
143 | ||
144 | for (i = Len; i > 0 && !Ok;i--) | |
145 | if (Expr[i] == '/') { | |
146 | Len = i-s; | |
147 | Ok = true; | |
148 | for (int j = i+1; Expr[j]; j++) { | |
149 | if (Expr[j] == 'i') NoCase = true; | |
150 | if (Expr[j] == 'x') Extend = true; | |
151 | }; | |
152 | }; | |
153 | if (!Ok) return false; | |
154 | ||
155 | //////////////////////////////// | |
156 | for (j = 0,pos = 0; j < Len; j++,pos++) { | |
157 | if (Extend && Expr[j+s] == ' ') { | |
158 | pos--; | |
159 | continue; | |
160 | }; | |
161 | ||
162 | Exprn[pos] = (int)(unsigned char)Expr[j+s]; | |
163 | ||
164 | if (Expr[j+s] == BackSlash) { | |
165 | switch (Expr[j+s+1]) { | |
166 | case 'd': | |
167 | Exprn[pos] = ReDigit; | |
168 | break; | |
169 | case 'D': | |
170 | Exprn[pos] = ReNDigit; | |
171 | break; | |
172 | case 'w': | |
173 | Exprn[pos] = ReWordSymb; | |
174 | break; | |
175 | case 'W': | |
176 | Exprn[pos] = ReNWordSymb; | |
177 | break; | |
178 | case 's': | |
179 | Exprn[pos] = ReWSpace; | |
180 | break; | |
181 | case 'S': | |
182 | Exprn[pos] = ReNWSpace; | |
183 | break; | |
184 | case 'u': | |
185 | Exprn[pos] = ReUCase; | |
186 | break; | |
187 | case 'l': | |
188 | Exprn[pos] = ReNUCase; | |
189 | break; | |
190 | case 't': | |
191 | Exprn[pos] = '\t'; | |
192 | break; | |
193 | case 'n': | |
194 | Exprn[pos] = '\n'; | |
195 | break; | |
196 | case 'r': | |
197 | Exprn[pos] = '\r'; | |
198 | break; | |
199 | case 'b': | |
200 | Exprn[pos] = ReWBound; | |
201 | break; | |
202 | case 'B': | |
203 | Exprn[pos] = ReNWBound; | |
204 | break; | |
205 | case 'c': | |
206 | Exprn[pos] = RePreNW; | |
207 | break; | |
208 | case 'm': | |
209 | Exprn[pos] = ReStart; | |
210 | break; | |
211 | case 'M': | |
212 | Exprn[pos] = ReEnd; | |
213 | break; | |
214 | case 'x': | |
215 | tmp = toupper(Expr[j+s+2])-0x30; | |
216 | tmp = (tmp>9?tmp-7:tmp)<<4; | |
217 | tmp += (toupper(Expr[j+s+3])-0x30)>9?toupper(Expr[j+s+3])-0x37:(toupper(Expr[j+s+3])-0x30); | |
218 | Exprn[pos] = tmp; | |
219 | j+=2; | |
220 | break; | |
221 | case 'y': | |
222 | tmp = Expr[j+s+2] - 0x30; | |
223 | if (tmp >= 0 && tmp <= 9) { | |
224 | if (tmp == 1) { | |
225 | tmp = 10 + Expr[j+s+3] - 0x30; | |
226 | if (tmp >= 10 && tmp <= 19) j++; | |
227 | else tmp = 1; | |
228 | }; | |
229 | Exprn[pos] = ReBkTrace + tmp; | |
230 | j++; | |
231 | break; | |
232 | }; | |
233 | default: | |
234 | tmp = Expr[j+s+1] - 0x30; | |
235 | if (tmp >= 0 && tmp <= 9) { | |
236 | if (tmp == 1) { | |
237 | tmp = 10 + Expr[j+s+2] - 0x30; | |
238 | if (tmp >= 10 && tmp <= 19) j++; | |
239 | else tmp = 1; | |
240 | }; | |
241 | Exprn[pos] = ReBkBrack + tmp; | |
242 | break; | |
243 | } else | |
244 | Exprn[pos] = Expr[j+s+1]; | |
245 | break; | |
246 | }; | |
247 | j++; | |
248 | continue; | |
249 | }; | |
250 | if (Expr[j+s] == ']') { | |
251 | Exprn[pos] = ReEnumE; | |
252 | if (EnterFg || !EnterGr) return false; | |
253 | EnterGr--; | |
254 | }; | |
255 | if (Expr[j+s] == '-' && EnterGr) Exprn[pos] = ReFrToEnum; | |
256 | ||
257 | if (EnterGr) continue; | |
258 | ||
259 | if (Expr[j+s] == '[' && Expr[j+s+1] == '^') { | |
260 | Exprn[pos] = ReNEnumS; | |
261 | if (EnterFg) return false; | |
262 | EnterGr++; | |
263 | j++; | |
264 | continue; | |
265 | }; | |
266 | if (Expr[j+s] == '*' && Expr[j+s+1] == '?') { | |
267 | Exprn[pos] = ReNGMul; | |
268 | j++; | |
269 | continue; | |
270 | }; | |
271 | if (Expr[j+s] == '+' && Expr[j+s+1] == '?') { | |
272 | Exprn[pos] = ReNGPlus; | |
273 | j++; | |
274 | continue; | |
275 | }; | |
276 | if (Expr[j+s] == '?' && Expr[j+s+1] == '?') { | |
277 | Exprn[pos] = ReNGQuest; | |
278 | j++; | |
279 | continue; | |
280 | }; | |
281 | if (Expr[j+s] == '?' && Expr[j+s+1] == '#' && | |
282 | Expr[j+s+2]>='0' && Expr[j+s+2]<='9') { | |
283 | Exprn[pos] = ReBehind+Expr[j+s+2]-0x30; | |
284 | j+=2; | |
285 | continue; | |
286 | }; | |
287 | if (Expr[j+s] == '?' && Expr[j+s+1] == '~' && | |
288 | Expr[j+s+2]>='0' && Expr[j+s+2]<='9') { | |
289 | Exprn[pos] = ReNBehind+Expr[j+s+2]-0x30; | |
290 | j+=2; | |
291 | continue; | |
292 | }; | |
293 | if (Expr[j+s] == '?' && Expr[j+s+1] == '=') { | |
294 | Exprn[pos] = ReAhead; | |
295 | j++; | |
296 | continue; | |
297 | }; | |
298 | if (Expr[j+s] == '?' && Expr[j+s+1] == '!') { | |
299 | Exprn[pos] = ReNAhead; | |
300 | j++; | |
301 | continue; | |
302 | }; | |
303 | ||
304 | if (Expr[j+s] == '(') { | |
305 | Exprn[pos] = ReLBrack; | |
306 | if (EnterFg) return false; | |
307 | EnterBr++; | |
308 | }; | |
309 | if (Expr[j+s] == ')') { | |
310 | Exprn[pos] = ReRBrack; | |
311 | if (!EnterBr || EnterFg) return false; | |
312 | EnterBr--; | |
313 | }; | |
314 | if (Expr[j+s] == '[') { | |
315 | Exprn[pos] = ReEnumS; | |
316 | if (EnterFg) return false; | |
317 | EnterGr++; | |
318 | }; | |
319 | if (Expr[j+s] == '{') { | |
320 | Exprn[pos] = ReRangeS; | |
321 | if (EnterFg) return false; | |
322 | EnterFg++; | |
323 | }; | |
324 | if (Expr[j+s] == '}' && Expr[j+s+1] == '?') { | |
325 | Exprn[pos] = ReNGRangeE; | |
326 | if (!EnterFg) return false; | |
327 | EnterFg--; | |
328 | j++; | |
329 | continue; | |
330 | }; | |
331 | if (Expr[j+s] == '}') { | |
332 | Exprn[pos] = ReRangeE; | |
333 | if (!EnterFg) return false; | |
334 | EnterFg--; | |
335 | }; | |
336 | ||
337 | if (Expr[j+s] == '^') Exprn[pos] = ReSoL; | |
338 | if (Expr[j+s] == '$') Exprn[pos] = ReEoL; | |
339 | if (Expr[j+s] == '.') Exprn[pos] = ReAnyChr; | |
340 | if (Expr[j+s] == '*') Exprn[pos] = ReMul; | |
341 | if (Expr[j+s] == '+') Exprn[pos] = RePlus; | |
342 | if (Expr[j+s] == '?') Exprn[pos] = ReQuest; | |
343 | if (Expr[j+s] == '|') Exprn[pos] = ReOr; | |
344 | }; | |
345 | if (EnterGr || EnterBr || EnterFg) return false; | |
346 | ||
347 | Info->Op = ReBrackets; | |
348 | Info->un.Param = new SRegInfo; | |
349 | Info->s = CurMatch++; | |
350 | ||
351 | if (!SetStructs(Info->un.Param,0,pos)) return false; | |
352 | Optimize(); | |
353 | delete Exprn; | |
354 | return true; | |
355 | }; | |
356 | ||
357 | void PosRegExp::Optimize() { | |
358 | PRegInfo Next = Info; | |
359 | FirstChar = 0; | |
360 | while(Next) { | |
361 | if (Next->Op == ReBrackets || Next->Op == RePlus || Next->Op == ReNGPlus) { | |
362 | Next = Next->un.Param; | |
363 | continue; | |
364 | }; | |
365 | if (Next->Op == ReSymb) { | |
366 | if (Next->un.Symb & 0xFF00 && Next->un.Symb != ReSoL && Next->un.Symb != ReWBound) | |
367 | break; | |
368 | FirstChar = Next->un.Symb; | |
369 | break; | |
370 | }; | |
371 | break; | |
372 | }; | |
373 | }; | |
374 | ||
375 | bool PosRegExp::SetStructs(PRegInfo &re,int start,int end) { | |
376 | PRegInfo Next,Prev,Prev2; | |
377 | int comma,st,en,ng,i, j,k; | |
378 | int EnterBr; | |
379 | bool Add; | |
380 | ||
381 | if (end - start < 0) return false; | |
382 | Next = re; | |
383 | for (i = start; i < end; i++) { | |
384 | Add = false; | |
385 | // Ops | |
386 | if (Exprn[i] > ReBlockOps && Exprn[i] < ReSymbolOps) { | |
387 | Next->un.Param = 0; | |
388 | Next->Op = (EOps)Exprn[i]; | |
389 | Add = true; | |
390 | }; | |
391 | // {n,m} | |
392 | if (Exprn[i] == ReRangeS) { | |
393 | st = i; | |
394 | en = -1; | |
395 | comma = -1; | |
396 | ng = 0; | |
397 | for (j = i;j < end;j++) { | |
398 | if (Exprn[j] == ReNGRangeE) { | |
399 | en = j; | |
400 | ng = 1; | |
401 | break; | |
402 | }; | |
403 | if (Exprn[j] == ReRangeE) { | |
404 | en = j; | |
405 | break; | |
406 | }; | |
407 | if ((char)Exprn[j] == ',') | |
408 | comma = j; | |
409 | }; | |
410 | if (en == -1) return false; | |
411 | if (comma == -1) comma = en; | |
412 | Next->s = (char)GetNumber(Exprn,st+1,comma); | |
413 | if (comma != en) | |
414 | Next->e = (char)GetNumber(Exprn,comma+1,en); | |
415 | else | |
416 | Next->e = Next->s; | |
417 | Next->un.Param = 0; | |
418 | Next->Op = ng?ReNGRangeNM:ReRangeNM; | |
419 | if (en-comma == 1) { | |
420 | Next->e = -1; | |
421 | Next->Op = ng?ReNGRangeN:ReRangeN; | |
422 | }; | |
423 | i=j; | |
424 | Add = true; | |
425 | }; | |
426 | // [] [^] | |
427 | if (Exprn[i] == ReEnumS || Exprn[i] == ReNEnumS) { | |
428 | Next->Op = (Exprn[i] == ReEnumS)?ReEnum:ReNEnum; | |
429 | for (j = i+1;j < end;j++) { | |
430 | if (Exprn[j] == ReEnumE) | |
431 | break; | |
432 | }; | |
433 | if (j == end) return false; | |
434 | Next->un.ChrClass = new SCharData; | |
435 | memset(Next->un.ChrClass, 0, 32); | |
436 | for (j = i+1;Exprn[j] != ReEnumE;j++) { | |
437 | if (Exprn[j+1] == ReFrToEnum) { | |
438 | for (i = (Exprn[j]&0xFF); i < (Exprn[j+2]&0xFF);i++) | |
439 | Next->un.ChrClass->SetBit(i&0xFF); | |
440 | j++; | |
441 | continue; | |
442 | }; | |
443 | switch(Exprn[j]) { | |
444 | case ReDigit: | |
445 | for (k = 0x30;k < 0x40;k++) | |
446 | if (IsDigit((char)k)) | |
447 | Next->un.ChrClass->SetBit(k); | |
448 | break; | |
449 | case ReNDigit: | |
450 | for (k = 0x30;k < 0x40;k++) | |
451 | if (!IsDigit((char)k)) | |
452 | Next->un.ChrClass->SetBit(k); | |
453 | Next->un.ChrClass->ClearBit(0x0a); | |
454 | Next->un.ChrClass->ClearBit(0x0d); | |
455 | break; | |
456 | case ReWordSymb: | |
457 | for (k = 0;k < 256;k++) | |
458 | if (IsWord((char)k)) | |
459 | Next->un.ChrClass->SetBit(k); | |
460 | break; | |
461 | case ReNWordSymb: | |
462 | for (k = 0;k < 256;k++) | |
463 | if (!IsWord((char)k)) | |
464 | Next->un.ChrClass->SetBit(k); | |
465 | Next->un.ChrClass->ClearBit(0x0a); | |
466 | Next->un.ChrClass->ClearBit(0x0d); | |
467 | break; | |
468 | case ReWSpace: | |
469 | Next->un.ChrClass->SetBit(0x20); | |
470 | Next->un.ChrClass->SetBit(0x09); | |
471 | break; | |
472 | case ReNWSpace: | |
473 | memset(Next->un.ChrClass->IArr, 0xFF, 32); | |
474 | Next->un.ChrClass->ClearBit(0x20); | |
475 | Next->un.ChrClass->ClearBit(0x09); | |
476 | Next->un.ChrClass->ClearBit(0x0a); | |
477 | Next->un.ChrClass->ClearBit(0x0d); | |
478 | break; | |
479 | default: | |
480 | if (!(Exprn[j]&0xFF00)) | |
481 | Next->un.ChrClass->SetBit(Exprn[j]&0xFF); | |
482 | break; | |
483 | }; | |
484 | }; | |
485 | Add = true; | |
486 | i=j; | |
487 | }; | |
488 | // ( ... ) | |
489 | if (Exprn[i] == ReLBrack) { | |
490 | EnterBr = 1; | |
491 | for (j = i+1;j < end;j++) { | |
492 | if (Exprn[j] == ReLBrack) EnterBr++; | |
493 | if (Exprn[j] == ReRBrack) EnterBr--; | |
494 | if (!EnterBr) break; | |
495 | }; | |
496 | if (EnterBr) return false; | |
497 | Next->Op = ReBrackets; | |
498 | Next->un.Param = new SRegInfo; | |
499 | Next->un.Param->Parent = Next; | |
500 | Next->s = CurMatch++; | |
501 | if (CurMatch > MatchesNum) CurMatch = MatchesNum; | |
502 | if (!SetStructs(Next->un.Param,i+1,j)) return false; | |
503 | Add = true; | |
504 | i=j; | |
505 | }; | |
506 | if ((Exprn[i]&0xFF00) == ReBkTrace) { | |
507 | Next->Op = ReBkTrace; | |
508 | Next->un.Symb = Exprn[i]&0xFF; | |
509 | Add = true; | |
510 | }; | |
511 | if ((Exprn[i]&0xFF00) == ReBkBrack) { | |
512 | Next->Op = ReBkBrack; | |
513 | Next->un.Symb = Exprn[i]&0xFF; | |
514 | Add = true; | |
515 | }; | |
516 | if ((Exprn[i]&0xFF00) == ReBehind) { | |
517 | Next->Op = ReBehind; | |
518 | Next->s = Exprn[i]&0xFF; | |
519 | Add = true; | |
520 | }; | |
521 | if ((Exprn[i]&0xFF00) == ReNBehind) { | |
522 | Next->Op = ReNBehind; | |
523 | Next->s = Exprn[i]&0xFF; | |
524 | Add = true; | |
525 | }; | |
526 | // Chars | |
527 | if (Exprn[i] >= ReAnyChr && Exprn[i] < ReTemp || Exprn[i] < 0x100) { | |
528 | Next->Op = ReSymb; | |
529 | Next->un.Symb = Exprn[i]; | |
530 | Add = true; | |
531 | }; | |
532 | // Next | |
533 | if (Add && i != end-1) { | |
534 | Next->Next = new SRegInfo; | |
535 | Next->Next->Parent = Next->Parent; | |
536 | Next = Next->Next; | |
537 | }; | |
538 | }; | |
539 | Next = re; | |
540 | Prev = Prev2 = 0; | |
541 | while(Next) { | |
542 | if (Next->Op > ReBlockOps && Next->Op < ReSymbolOps) { | |
543 | if (!Prev) return false; | |
544 | if (!Prev2) re = Next; | |
545 | else Prev2->Next = Next; | |
546 | //if (Prev->Op > ReBlockOps && Prev->Op < ReSymbolOps) return false; | |
547 | Prev->Parent = Next; | |
548 | Prev->Next = 0; | |
549 | Next->un.Param = Prev; | |
550 | Prev = Prev2; | |
551 | }; | |
552 | Prev2 = Prev; | |
553 | Prev = Next; | |
554 | Next = Next->Next; | |
555 | }; | |
556 | ||
557 | return true; | |
558 | }; | |
559 | ||
560 | ///////////////////////////////////////////////////////////////// | |
561 | ///////////////////////// Parsing ///////////////////////////// | |
562 | ///////////////////////////////////////////////////////////////// | |
563 | ||
564 | bool PosRegExp::CheckSymb(int Symb,bool Inc) { | |
565 | bool Res; | |
566 | char ch; | |
567 | switch(Symb) { | |
568 | case ReAnyChr: | |
569 | if (posParse >= posEnd) return false; | |
570 | ch = CharAt(posParse,param); | |
571 | Res = ch != '\r' && ch != '\n'; | |
572 | if (Res && Inc) posParse++; | |
573 | return Res; | |
574 | case ReSoL: | |
575 | if (posStart == posParse) | |
576 | return true; | |
577 | ch = CharAt(posParse-1,param); | |
578 | return ch == '\n' || ch == '\r'; | |
579 | case ReEoL: | |
580 | if (posEnd == posParse) | |
581 | return true; | |
582 | ch = CharAt(posParse,param); | |
583 | return ch == '\n' || ch == '\r'; | |
584 | case ReDigit: | |
585 | if (posParse >= posEnd) return false; | |
586 | ch = CharAt(posParse,param); | |
587 | Res = (ch >= 0x30 && ch <= 0x39); | |
588 | if (Res && Inc) posParse++; | |
589 | return Res; | |
590 | case ReNDigit: | |
591 | if (posParse >= posEnd) return false; | |
592 | ch = CharAt(posParse,param); | |
593 | Res = !(ch >= 0x30 && ch <= 0x39) && ch != '\r' && ch != '\n'; | |
594 | if (Res && Inc) posParse++; | |
595 | return Res; | |
596 | case ReWordSymb: | |
597 | if (posParse >= posEnd) return false; | |
598 | Res = IsWord(CharAt(posParse,param)); | |
599 | if (Res && Inc) posParse++; | |
600 | return Res; | |
601 | case ReNWordSymb: | |
602 | if (posParse >= posEnd) return false; | |
603 | ch = CharAt(posParse,param); | |
604 | Res = !IsWord(ch) && ch != '\r' && ch != '\n'; | |
605 | if (Res && Inc) posParse++; | |
606 | return Res; | |
607 | case ReWSpace: | |
608 | if (posParse >= posEnd) return false; | |
609 | ch = CharAt(posParse,param); | |
610 | Res = (ch == 0x20 || ch == '\t'); | |
611 | if (Res && Inc) posParse++; | |
612 | return Res; | |
613 | case ReNWSpace: | |
614 | if (posParse >= posEnd) return false; | |
615 | ch = CharAt(posParse,param); | |
616 | Res = !(ch == 0x20 || ch == '\t') && ch != '\r' && ch != '\n'; | |
617 | if (Res && Inc) posParse++; | |
618 | return Res; | |
619 | case ReUCase: | |
620 | if (posParse >= posEnd) return false; | |
621 | Res = IsUpperCase(CharAt(posParse,param)); | |
622 | if (Res && Inc) posParse++; | |
623 | return Res; | |
624 | case ReNUCase: | |
625 | if (posParse >= posEnd) return false; | |
626 | Res = IsLowerCase(CharAt(posParse,param)); | |
627 | if (Res && Inc) posParse++; | |
628 | return Res; | |
629 | case ReWBound: | |
630 | if (posParse >= posEnd) return true; | |
631 | ch = CharAt(posParse,param); | |
632 | return IsWord(CharAt(posParse,param)) && (posParse == posStart || !IsWord(CharAt(posParse-1,param))); | |
633 | case ReNWBound: | |
634 | if (posParse >= posEnd) return true; | |
635 | return !IsWord(CharAt(posParse,param)) && IsWord(CharAt(posParse-1,param)); | |
636 | case RePreNW: | |
637 | if (posParse >= posEnd) return true; | |
638 | return (posParse == posStart || !IsWord(CharAt(posParse-1,param))); | |
639 | case ReStart: | |
640 | Matches->s[0] = (posParse-posStart); | |
641 | return true; | |
642 | case ReEnd: | |
643 | Matches->e[0] = (posParse-posStart); | |
644 | return true; | |
645 | default: | |
646 | if ((Symb & 0xFF00) || posParse >= posEnd) return false; | |
647 | if (NoCase) { | |
648 | if (LowCase(CharAt(posParse,param)) != LowCase((char)Symb&0xFF)) return false; | |
649 | } else | |
650 | if (CharAt(posParse,param) != (char)(Symb&0xFF)) return false; | |
651 | if (Inc) posParse++; | |
652 | return true; | |
653 | }; | |
654 | } | |
655 | ||
656 | bool PosRegExp::LowParseRe(PRegInfo &Next) { | |
657 | PRegInfo OrNext; | |
658 | int i,match,sv; | |
659 | int posStr; | |
660 | ||
661 | switch(Next->Op) { | |
662 | case ReSymb: | |
663 | if (!CheckSymb(Next->un.Symb,true)) return false; | |
664 | break; | |
665 | case ReEmpty: | |
666 | break; | |
667 | case ReBkTrace: | |
668 | if (!posBkStr | !BkTrace) return false; | |
669 | sv = Next->un.Symb; | |
670 | posStr = posParse; | |
671 | for (i = BkTrace->s[sv]; i < BkTrace->e[sv]; i++) { | |
672 | if (CharAt(posStr,param) != CharAt(posBkStr+i,param) || posEnd == posStr) return false; | |
673 | posStr++; | |
674 | }; | |
675 | posParse = posStr; | |
676 | break; | |
677 | case ReBkBrack: | |
678 | sv = Next->un.Symb; | |
679 | posStr = posParse; | |
680 | if (Matches->s[sv] == -1 || Matches->e[sv] == -1) return false; | |
681 | for (i = Matches->s[sv]; i < Matches->e[sv]; i++) { | |
682 | if (CharAt(posStr,param) != CharAt(posStart+i,param) || posEnd == posStr) return false; | |
683 | posStr++; | |
684 | }; | |
685 | posParse = posStr; | |
686 | break; | |
687 | case ReBehind: | |
688 | sv = Next->s; | |
689 | posStr = posParse; | |
690 | posParse -= sv; | |
691 | if (!LowParse(Next->un.Param)) return false; | |
692 | posParse = posStr; | |
693 | break; | |
694 | case ReNBehind: | |
695 | sv = Next->s; | |
696 | posStr = posParse; | |
697 | posParse -= sv; | |
698 | if (LowParse(Next->un.Param)) return false; | |
699 | posParse = posStr; | |
700 | break; | |
701 | case ReAhead: | |
702 | posStr = posParse; | |
703 | if (!LowParse(Next->un.Param)) return false; | |
704 | posParse = posStr; | |
705 | break; | |
706 | case ReNAhead: | |
707 | posStr = posParse; | |
708 | if (LowParse(Next->un.Param)) return false; | |
709 | posParse = posStr; | |
710 | break; | |
711 | case ReEnum: | |
712 | if (posParse >= posEnd) return false; | |
713 | if (!Next->un.ChrClass->GetBit(CharAt(posParse,param))) return false; | |
714 | posParse++; | |
715 | break; | |
716 | case ReNEnum: | |
717 | if (posParse >= posEnd) return false; | |
718 | if (Next->un.ChrClass->GetBit(CharAt(posParse,param))) return false; | |
719 | posParse++; | |
720 | break; | |
721 | case ReBrackets: | |
722 | match = Next->s; | |
723 | sv = posParse-posStart; | |
724 | posStr = posParse; | |
725 | if (LowParse(Next->un.Param)) { | |
726 | if (match || (Matches->s[match] == -1)) | |
727 | Matches->s[match] = sv; | |
728 | if (match || (Matches->e[match] == -1)) | |
729 | Matches->e[match] = posParse-posStart; | |
730 | return true; | |
731 | }; | |
732 | posParse = posStr; | |
733 | return false; | |
734 | case ReMul: | |
735 | posStr = posParse; | |
736 | while (LowParse(Next->un.Param)); | |
737 | while(!LowCheckNext(Next) && posStr < posParse) posParse--; | |
738 | break; | |
739 | case ReNGMul: | |
740 | do { | |
741 | if (LowCheckNext(Next)) break; | |
742 | } while (LowParse(Next->un.Param)); | |
743 | break; | |
744 | case RePlus: | |
745 | posStr = posParse; | |
746 | match = false; | |
747 | while (LowParse(Next->un.Param)) | |
748 | match = true; | |
749 | if (!match) return false; | |
750 | while(!LowCheckNext(Next) && posStr < posParse) posParse--; | |
751 | break; | |
752 | case ReNGPlus: | |
753 | if (!LowParse(Next->un.Param)) return false; | |
754 | do { | |
755 | if (LowCheckNext(Next)) break; | |
756 | } while (LowParse(Next->un.Param)); | |
757 | break; | |
758 | case ReQuest: | |
759 | LowParse(Next->un.Param); | |
760 | break; | |
761 | case ReNGQuest: | |
762 | if (LowCheckNext(Next)) break; | |
763 | if (!LowParse(Next->un.Param)) return false; | |
764 | break; | |
765 | case ReOr: | |
766 | OrNext = Next; | |
767 | // posStr = posParse; | |
768 | if (LowParse(Next->un.Param)) { | |
769 | while (OrNext && OrNext->Op == ReOr) | |
770 | OrNext = OrNext->Next; | |
771 | /*if (!LowCheckNext(OrNext)){ | |
772 | posParse = posStr; | |
773 | OrNext = Next; | |
774 | };*/ | |
775 | }; | |
776 | Next = OrNext; | |
777 | break; | |
778 | case ReRangeN: | |
779 | posStr = posParse; | |
780 | i = 0; | |
781 | while (LowParse(Next->un.Param)) i++; // ??? | |
782 | do { | |
783 | if (i < Next->s) { | |
784 | posParse = posStr; | |
785 | return false; | |
786 | }; | |
787 | i--; | |
788 | } while(!LowCheckNext(Next) && posStr < posParse--); | |
789 | break; | |
790 | case ReNGRangeN: | |
791 | posStr = posParse; | |
792 | i = 0; | |
793 | while (LowParse(Next->un.Param)) { | |
794 | i++; | |
795 | if (i >= Next->s && LowCheckNext(Next)) // ??? | |
796 | break; | |
797 | }; | |
798 | if (i < Next->s) { | |
799 | posParse = posStr; | |
800 | return false; | |
801 | }; | |
802 | break; | |
803 | case ReRangeNM: | |
804 | posStr = posParse; | |
805 | i = 0; | |
806 | while (i < Next->s && LowParse(Next->un.Param)) // ??? | |
807 | i++; | |
808 | if (i < Next->s) { | |
809 | posParse = posStr; | |
810 | return false; | |
811 | }; | |
812 | while (i < Next->e && LowParse(Next->un.Param)) // ??? | |
813 | i++; | |
814 | ||
815 | while(!LowCheckNext(Next)) { | |
816 | i--; | |
817 | posParse--; | |
818 | if (i < Next->s) { | |
819 | posParse = posStr; | |
820 | return false; | |
821 | }; | |
822 | }; | |
823 | break; | |
824 | case ReNGRangeNM: | |
825 | posStr = posParse; | |
826 | i = 0; | |
827 | while (i < Next->s && LowParse(Next->un.Param)) // ??? | |
828 | i++; | |
829 | if (i < Next->s) { | |
830 | posParse = posStr; | |
831 | return false; | |
832 | }; | |
833 | while(!LowCheckNext(Next)) { | |
834 | i++; | |
835 | if (!LowParse(Next->un.Param) || i > Next->e) { // ??? | |
836 | posParse = posStr; | |
837 | return false; | |
838 | }; | |
839 | }; | |
840 | break; | |
841 | }; | |
842 | return true; | |
843 | }; | |
844 | ||
845 | bool PosRegExp::LowCheckNext(PRegInfo Re) { | |
846 | PRegInfo Next; | |
847 | int tmp = posParse; | |
848 | Next = Re; | |
849 | do { | |
850 | if (Next && Next->Op == ReOr) | |
851 | while (Next && Next->Op == ReOr) | |
852 | Next = Next->Next; | |
853 | if (Next->Next && !LowParse(Next->Next)) { | |
854 | posParse = tmp; | |
855 | Ok = false; | |
856 | return false; | |
857 | }; | |
858 | Next = Next->Parent; | |
859 | } while(Next); | |
860 | posParse = tmp; | |
861 | if (Ok != false) Ok = true; | |
862 | return true; | |
863 | }; | |
864 | ||
865 | bool PosRegExp::LowParse(PRegInfo Re) { | |
866 | while(Re && posParse <= posEnd) { | |
867 | if (!LowParseRe(Re)) return false; | |
868 | if (Re) Re = Re->Next; | |
869 | }; | |
870 | return true; | |
871 | }; | |
872 | ||
873 | bool PosRegExp::QuickCheck() { | |
874 | if (!NoMoves || !FirstChar) | |
875 | return true; | |
876 | switch(FirstChar) { | |
877 | case ReSoL: | |
878 | if (posParse != posStart) return false; | |
879 | return true; | |
880 | case ReWBound: | |
881 | return IsWord(CharAt(posParse,param)) && (posParse == posStart || !IsWord(CharAt(posParse-1,param))); | |
882 | default: | |
883 | if (NoCase && LowCase(CharAt(posParse,param)) != LowCase(FirstChar)) return false; | |
884 | if (!NoCase && CharAt(posParse,param) != (char)FirstChar) return false; | |
885 | return true; | |
886 | }; | |
887 | }; | |
888 | ||
889 | bool PosRegExp::ParseRe(int posStr) { | |
890 | if (Error) return false; | |
891 | ||
892 | posParse = posStr; | |
893 | if (!QuickCheck()) return false; | |
894 | ||
895 | for (int i = 0; i < MatchesNum; i++) | |
896 | Matches->s[i] = Matches->e[i] = -1; | |
897 | Matches->CurMatch = CurMatch; | |
898 | ||
899 | Ok = -1; | |
900 | //try{ | |
901 | do { | |
902 | if (!LowParse(Info)) { | |
903 | if (NoMoves) return false; | |
904 | } else | |
905 | return true; | |
906 | posParse = ++posStr; | |
907 | } while(posParse != posEnd+1); | |
908 | return false; | |
909 | //}__except(){ | |
910 | // return true; | |
911 | //}; | |
912 | } | |
913 | ; | |
914 | ||
915 | bool PosRegExp::Parse(int posStr,int posSol, int posEol, PMatches Mtch, int Moves) { | |
916 | if (!this) return false; | |
917 | ||
918 | bool s = NoMoves; | |
919 | if (Moves != -1) NoMoves = Moves!=0; | |
920 | posStart = posSol; | |
921 | posEnd = posEol; | |
922 | Matches = Mtch; | |
923 | bool r = ParseRe(posStr); | |
924 | NoMoves = s; | |
925 | return r; | |
926 | }; | |
927 | ||
928 | bool PosRegExp::Parse(int posStr, int posStop, PMatches Mtch) { | |
929 | if (!this) return false; | |
930 | posStart = posStr; | |
931 | posEnd = posStop; | |
932 | Matches = Mtch; | |
933 | return ParseRe(posStr); | |
934 | }; | |
935 | ||
936 | bool PosRegExp::SetNoMoves(bool Moves) { | |
937 | NoMoves = Moves; | |
938 | return true; | |
939 | }; | |
940 | ||
941 | bool PosRegExp::SetBkTrace(int posStr,PMatches Trace) { | |
942 | BkTrace = Trace; | |
943 | posBkStr = posStr; | |
944 | return true; | |
945 | }; | |
946 | ||
947 | #define EVAL_MATCHES 16 | |
948 | #define EVAL_CHUNKSIZE 256 | |
949 | ||
950 | #define EVAL_LOWERCASE 1 | |
951 | #define EVAL_UPPERCASE 2 | |
952 | #define EVAL_LOWERCASE_NEXT 4 | |
953 | #define EVAL_UPPERCASE_NEXT 8 | |
954 | ||
955 | bool PosRegExp::Evaluate(char *Expr, int posStr, PMatches Mtch, char **Res) { | |
956 | int length, | |
957 | newlength, | |
958 | chunklength, | |
959 | value, | |
960 | size, | |
961 | src, | |
962 | end; | |
963 | unsigned flag; | |
964 | char ch, | |
965 | *dest, | |
966 | *pool; | |
967 | ||
968 | size = EVAL_CHUNKSIZE; | |
969 | pool = (char*) malloc (size); | |
970 | dest = pool; | |
971 | length = 0; | |
972 | flag = 0; | |
973 | while (*Expr) { | |
974 | switch (ch = *Expr++) { | |
975 | case '\\': | |
976 | switch (ch = *Expr++) { | |
977 | case 'A': | |
978 | case 'B': | |
979 | case 'C': | |
980 | case 'D': | |
981 | case 'E': | |
982 | case 'F': | |
983 | ch -= ('A' - '0'); | |
984 | case '0': | |
985 | case '1': | |
986 | case '2': | |
987 | case '3': | |
988 | case '4': | |
989 | case '5': | |
990 | case '6': | |
991 | case '7': | |
992 | case '8': | |
993 | case '9': | |
994 | value = ch - '0'; | |
995 | if (Mtch->s[value] != -1 && value < EVAL_MATCHES) { | |
996 | chunklength = Mtch->e[value] - Mtch->s[value]; | |
997 | if (chunklength) { | |
998 | newlength = chunklength + length; | |
999 | if (newlength > size) { | |
1000 | do | |
1001 | size += EVAL_CHUNKSIZE; | |
1002 | while (size < newlength); | |
1003 | pool = (char*) realloc (pool, size); | |
1004 | dest = pool + length; | |
1005 | } | |
1006 | length = newlength; | |
1007 | src = posStr + Mtch->s[value]; | |
1008 | end = posStr + Mtch->e[value]; | |
1009 | if (flag & EVAL_UPPERCASE) { | |
1010 | if (flag & EVAL_LOWERCASE_NEXT) { | |
1011 | *dest++ = tolower (CharAt(src++,param)); | |
1012 | flag &= ~EVAL_LOWERCASE_NEXT; | |
1013 | } | |
1014 | while (src < end) | |
1015 | *dest++ = toupper (CharAt(src++,param)); | |
1016 | } else if (flag & EVAL_LOWERCASE) { | |
1017 | if (flag & EVAL_UPPERCASE_NEXT) { | |
1018 | *dest++ = toupper (CharAt(src++,param)); | |
1019 | flag &= ~EVAL_UPPERCASE_NEXT; | |
1020 | } | |
1021 | while (src < end) | |
1022 | *dest++ = tolower (CharAt(src++,param)); | |
1023 | } else { | |
1024 | if (flag & EVAL_LOWERCASE_NEXT) { | |
1025 | *dest++ = tolower (CharAt(src++,param)); | |
1026 | flag &= ~EVAL_LOWERCASE_NEXT; | |
1027 | } else if (flag & EVAL_UPPERCASE_NEXT) { | |
1028 | *dest++ = toupper (CharAt(src++,param)); | |
1029 | flag &= ~EVAL_UPPERCASE_NEXT; | |
1030 | } | |
1031 | while (src < end) | |
1032 | *dest++ = CharAt(src++,param); | |
1033 | } | |
1034 | } | |
1035 | } else | |
1036 | goto error; | |
1037 | continue; | |
1038 | case '\0': | |
1039 | goto error; | |
1040 | case 'r': | |
1041 | ch = '\r'; | |
1042 | break; | |
1043 | case 'n': | |
1044 | ch = '\n'; | |
1045 | break; | |
1046 | case 'b': | |
1047 | ch = '\b'; | |
1048 | break; | |
1049 | case 'a': | |
1050 | ch = '\a'; | |
1051 | break; | |
1052 | case 't': | |
1053 | ch = '\t'; | |
1054 | break; | |
1055 | case 'U': | |
1056 | flag |= EVAL_UPPERCASE; | |
1057 | continue; | |
1058 | case 'u': | |
1059 | flag |= EVAL_UPPERCASE_NEXT; | |
1060 | continue; | |
1061 | case 'L': | |
1062 | flag |= EVAL_LOWERCASE; | |
1063 | continue; | |
1064 | case 'l': | |
1065 | flag |= EVAL_LOWERCASE_NEXT; | |
1066 | continue; | |
1067 | case 'Q': | |
1068 | case 'q': | |
1069 | flag &= ~(EVAL_UPPERCASE | EVAL_LOWERCASE); | |
1070 | continue; | |
1071 | case 'x': | |
1072 | { | |
1073 | if (!*Expr) | |
1074 | goto error; | |
1075 | value = toupper (*Expr) - '0'; | |
1076 | if (value > 9) | |
1077 | value = value + '0' - 'A' + 10; | |
1078 | if (value > 15) | |
1079 | goto error; | |
1080 | ch = value << 4; | |
1081 | Expr++; | |
1082 | if (!*Expr) | |
1083 | goto error; | |
1084 | value = toupper (*Expr) - '0'; | |
1085 | if (value > 9) | |
1086 | value = value + '0' - 'A' + 10; | |
1087 | if (value > 15) | |
1088 | goto error; | |
1089 | Expr++; | |
1090 | ch |= value; | |
1091 | break; | |
1092 | } | |
1093 | case 'd': | |
1094 | { | |
1095 | if (!*Expr) | |
1096 | goto error; | |
1097 | value = toupper (*Expr) - '0'; | |
1098 | if (value > 9) | |
1099 | goto error; | |
1100 | ch = value * 100; | |
1101 | Expr++; | |
1102 | if (!*Expr) | |
1103 | goto error; | |
1104 | value = toupper (*Expr) - '0'; | |
1105 | if (value > 9) | |
1106 | goto error; | |
1107 | ch += value * 10; | |
1108 | Expr++; | |
1109 | if (!*Expr) | |
1110 | goto error; | |
1111 | value = toupper (*Expr) - '0'; | |
1112 | if (value > 9) | |
1113 | goto error; | |
1114 | ch += value; | |
1115 | Expr++; | |
1116 | break; | |
1117 | } | |
1118 | case 'o': | |
1119 | { | |
1120 | if (!*Expr) | |
1121 | goto error; | |
1122 | value = toupper (*Expr) - '0'; | |
1123 | if (value > 9) | |
1124 | goto error; | |
1125 | ch = value << 6; | |
1126 | Expr++; | |
1127 | if (!*Expr) | |
1128 | goto error; | |
1129 | value = toupper (*Expr) - '0'; | |
1130 | if (value > 9) | |
1131 | goto error; | |
1132 | ch += value << 3; | |
1133 | Expr++; | |
1134 | if (!*Expr) | |
1135 | goto error; | |
1136 | value = toupper (*Expr) - '0'; | |
1137 | if (value > 9) | |
1138 | goto error; | |
1139 | ch |= value; | |
1140 | Expr++; | |
1141 | /* break; */ | |
1142 | } | |
1143 | /* default: | |
1144 | break; */ | |
1145 | } | |
1146 | default: | |
1147 | if (++length > size) { | |
1148 | do | |
1149 | size += EVAL_CHUNKSIZE; | |
1150 | while (size < length); | |
1151 | pool = (char*) realloc (pool, size); | |
1152 | dest = pool + length - 1; | |
1153 | } | |
1154 | if (flag & EVAL_LOWERCASE_NEXT) { | |
1155 | *dest++ = tolower (ch); | |
1156 | flag &= ~EVAL_LOWERCASE_NEXT; | |
1157 | } else if (flag & EVAL_UPPERCASE_NEXT) { | |
1158 | *dest++ = toupper (ch); | |
1159 | flag &= ~EVAL_UPPERCASE_NEXT; | |
1160 | } else if (flag & EVAL_UPPERCASE) | |
1161 | *dest++ = toupper (ch); | |
1162 | else if (flag & EVAL_LOWERCASE) | |
1163 | *dest++ = tolower (ch); | |
1164 | else | |
1165 | *dest++ = ch; | |
1166 | } | |
1167 | } | |
1168 | if (++length > size) { | |
1169 | do | |
1170 | size += EVAL_CHUNKSIZE; | |
1171 | while (size < length); | |
1172 | pool = (char*) realloc (pool, size); | |
1173 | dest = pool + length - 1; | |
1174 | } | |
1175 | *dest = '\0'; | |
1176 | *Res = pool; | |
1177 | return true; | |
1178 | error: | |
1179 | free (pool); | |
1180 | return false; | |
1181 | } |