]> git.saurik.com Git - wxWidgets.git/blame - docs/doxygen/overviews/resyntax.h
moving forward
[wxWidgets.git] / docs / doxygen / overviews / resyntax.h
CommitLineData
15b6757b
FM
1/////////////////////////////////////////////////////////////////////////////
2// Name: resyn
3// Purpose: topic overview
4// Author: wxWidgets team
5// RCS-ID: $Id$
6// Licence: wxWindows license
7/////////////////////////////////////////////////////////////////////////////
8
9/*!
36c9828f 10
15b6757b 11 @page resyn_overview Syntax of the builtin regular expression library
36c9828f 12
15b6757b 13 A @e regular expression describes strings of characters. It's a
36c9828f 14 pattern that matches certain strings and doesn't match others.
15b6757b
FM
15 @b See also
16 #wxRegEx
17 @ref differentflavors_overview
18 @ref resyntax_overview
19 @ref resynbracket_overview
20 #Escapes
21 #Metasyntax
22 #Matching
23 @ref relimits_overview
24 @ref resynbre_overview
25 @ref resynchars_overview
36c9828f
FM
26
27
15b6757b 28 @section differentflavors Different Flavors of REs
36c9828f 29
15b6757b
FM
30 @ref resyn_overview
31 Regular expressions ("RE''s), as defined by POSIX, come in two
32 flavors: @e extended REs ("EREs'') and @e basic REs ("BREs''). EREs are roughly those
33 of the traditional @e egrep, while BREs are roughly those of the traditional
34 @e ed. This implementation adds a third flavor, @e advanced REs ("AREs''), basically
36c9828f 35 EREs with some significant extensions.
15b6757b
FM
36 This manual page primarily describes
37 AREs. BREs mostly exist for backward compatibility in some old programs;
38 they will be discussed at the #end. POSIX EREs are almost an exact subset
39 of AREs. Features of AREs that are not present in EREs will be indicated.
36c9828f 40
15b6757b 41 @section resyntax Regular Expression Syntax
36c9828f 42
15b6757b
FM
43 @ref resyn_overview
44 These regular expressions are implemented using
45 the package written by Henry Spencer, based on the 1003.2 spec and some
46 (not quite all) of the Perl5 extensions (thanks, Henry!). Much of the description
36c9828f 47 of regular expressions below is copied verbatim from his manual entry.
15b6757b 48 An ARE is one or more @e branches, separated by '@b |', matching anything that matches
36c9828f 49 any of the branches.
15b6757b
FM
50 A branch is zero or more @e constraints or @e quantified
51 atoms, concatenated. It matches a match for the first, followed by a match
36c9828f 52 for the second, etc; an empty branch matches the empty string.
15b6757b
FM
53 A quantified atom is an @e atom possibly followed by a single @e quantifier. Without a quantifier,
54 it matches a match for the atom. The quantifiers, and what a so-quantified
55 atom matches, are:
36c9828f
FM
56
57
58
59
60
61
15b6757b 62 @b *
36c9828f
FM
63
64
65
66
15b6757b 67 a sequence of 0 or more matches of the atom
36c9828f
FM
68
69
70
71
72
15b6757b 73 @b +
36c9828f
FM
74
75
76
77
15b6757b 78 a sequence of 1 or more matches of the atom
36c9828f
FM
79
80
81
82
83
15b6757b 84 @b ?
36c9828f
FM
85
86
87
88
15b6757b 89 a sequence of 0 or 1 matches of the atom
36c9828f
FM
90
91
92
93
94
15b6757b 95 @b {m}
36c9828f
FM
96
97
98
99
15b6757b 100 a sequence of exactly @e m matches of the atom
36c9828f
FM
101
102
103
104
105
15b6757b 106 @b {m,}
36c9828f
FM
107
108
109
110
15b6757b 111 a sequence of @e m or more matches of the atom
36c9828f
FM
112
113
114
115
116
15b6757b 117 @b {m,n}
36c9828f
FM
118
119
120
121
15b6757b
FM
122 a sequence of @e m through @e n (inclusive)
123 matches of the atom; @e m may not exceed @e n
36c9828f
FM
124
125
126
127
128
15b6757b 129 @b *? +? ?? {m}? {m,}? {m,n}?
36c9828f
FM
130
131
132
133
15b6757b
FM
134 @e non-greedy quantifiers,
135 which match the same possibilities, but prefer the
136 smallest number rather than the largest number of matches (see #Matching)
36c9828f
FM
137
138
139
140
141
15b6757b 142 The forms using @b { and @b } are known as @e bounds. The numbers @e m and @e n are unsigned
36c9828f 143 decimal integers with permissible values from 0 to 255 inclusive.
15b6757b 144 An atom is one of:
36c9828f
FM
145
146
147
148
149
150
15b6757b 151 @b (re)
36c9828f
FM
152
153
154
155
15b6757b
FM
156 (where @e re is any regular expression) matches a match for
157 @e re, with the match noted for possible reporting
36c9828f
FM
158
159
160
161
162
15b6757b 163 @b (?:re)
36c9828f
FM
164
165
166
167
15b6757b
FM
168 as previous, but
169 does no reporting (a "non-capturing'' set of parentheses)
36c9828f
FM
170
171
172
173
174
15b6757b 175 @b ()
36c9828f
FM
176
177
178
179
15b6757b
FM
180 matches an empty
181 string, noted for possible reporting
36c9828f
FM
182
183
184
185
186
15b6757b 187 @b (?:)
36c9828f
FM
188
189
190
191
15b6757b 192 matches an empty string, without reporting
36c9828f
FM
193
194
195
196
197
15b6757b 198 @b [chars]
36c9828f
FM
199
200
201
202
15b6757b
FM
203 a @e bracket expression, matching any one of the @e chars
204 (see @ref resynbracket_overview for more detail)
36c9828f
FM
205
206
207
208
209
15b6757b 210 @b .
36c9828f
FM
211
212
213
214
215 matches any single character
216
217
218
219
220
15b6757b 221 @b \k
36c9828f
FM
222
223
224
225
15b6757b
FM
226 (where @e k is a non-alphanumeric character)
227 matches that character taken as an ordinary character, e.g. \\ matches a backslash
228 character
36c9828f
FM
229
230
231
232
233
15b6757b 234 @b \c
36c9828f
FM
235
236
237
238
15b6757b
FM
239 where @e c is alphanumeric (possibly followed by other characters),
240 an @e escape (AREs only), see #Escapes below
36c9828f
FM
241
242
243
244
245
15b6757b 246 @b {
36c9828f
FM
247
248
249
250
15b6757b
FM
251 when followed by a character
252 other than a digit, matches the left-brace character '@b {'; when followed by
253 a digit, it is the beginning of a @e bound (see above)
36c9828f
FM
254
255
256
257
258
15b6757b 259 @b x
36c9828f
FM
260
261
262
263
15b6757b
FM
264 where @e x is a single
265 character with no other significance, matches that character.
36c9828f
FM
266
267
268
269
270
15b6757b
FM
271 A @e constraint matches an empty string when specific conditions are met. A constraint may
272 not be followed by a quantifier. The simple constraints are as follows;
273 some more constraints are described later, under #Escapes.
36c9828f
FM
274
275
276
277
278
279
15b6757b 280 @b ^
36c9828f
FM
281
282
283
284
15b6757b 285 matches at the beginning of a line
36c9828f
FM
286
287
288
289
290
15b6757b 291 @b $
36c9828f
FM
292
293
294
295
15b6757b 296 matches at the end of a line
36c9828f
FM
297
298
299
300
301
15b6757b 302 @b (?=re)
36c9828f
FM
303
304
305
306
15b6757b
FM
307 @e positive lookahead
308 (AREs only), matches at any point where a substring matching @e re begins
36c9828f
FM
309
310
311
312
313
15b6757b 314 @b (?!re)
36c9828f
FM
315
316
317
318
15b6757b
FM
319 @e negative lookahead (AREs only),
320 matches at any point where no substring matching @e re begins
36c9828f
FM
321
322
323
324
325
15b6757b
FM
326 The lookahead constraints may not contain back references
327 (see later), and all parentheses within them are considered non-capturing.
328 An RE may not end with '@b \'.
36c9828f 329
15b6757b 330 @section wxresynbracket Bracket Expressions
36c9828f 331
15b6757b
FM
332 @ref resyn_overview
333 A @e bracket expression is a list
334 of characters enclosed in '@b []'. It normally matches any single character from
335 the list (but see below). If the list begins with '@b ^', it matches any single
36c9828f 336 character (but see below) @e not from the rest of the list.
15b6757b
FM
337 If two characters
338 in the list are separated by '@b -', this is shorthand for the full @e range of
339 characters between those two (inclusive) in the collating sequence, e.g.
340 @b [0-9] in ASCII matches any decimal digit. Two ranges may not share an endpoint,
341 so e.g. @b a-c-e is illegal. Ranges are very collating-sequence-dependent, and portable
36c9828f 342 programs should avoid relying on them.
15b6757b
FM
343 To include a literal @b ] or @b - in the
344 list, the simplest method is to enclose it in @b [. and @b .] to make it a collating
345 element (see below). Alternatively, make it the first character (following
346 a possible '@b ^'), or (AREs only) precede it with '@b \'.
347 Alternatively, for '@b -', make
348 it the last character, or the second endpoint of a range. To use a literal
349 @b - as the first endpoint of a range, make it a collating element or (AREs
350 only) precede it with '@b \'. With the exception of these, some combinations using
351 @b [ (see next paragraphs), and escapes, all other special characters lose
36c9828f 352 their special significance within a bracket expression.
15b6757b
FM
353 Within a bracket
354 expression, a collating element (a character, a multi-character sequence
355 that collates as if it were a single character, or a collating-sequence
356 name for either) enclosed in @b [. and @b .] stands for the
357 sequence of characters of that collating element.
358 @e wxWidgets: Currently no multi-character collating elements are defined.
359 So in @b [.X.], @e X can either be a single character literal or
360 the name of a character. For example, the following are both identical
361 @b [[.0.]-[.9.]] and @b [[.zero.]-[.nine.]] and mean the same as
362 @b [0-9].
363 See @ref resynchars_overview.
364 Within a bracket expression, a collating element enclosed in @b [= and @b =]
365 is an equivalence class, standing for the sequences of characters of all
366 collating elements equivalent to that one, including itself.
367 An equivalence class may not be an endpoint of a range.
36c9828f
FM
368 @e wxWidgets: Currently no equivalence classes are defined, so
369 @b [=X=] stands for just the single character @e X.
15b6757b
FM
370 @e X can either be a single character literal or the name of a character,
371 see @ref resynchars_overview.
372 Within a bracket expression,
373 the name of a @e character class enclosed in @b [: and @b :] stands for the list
374 of all characters (not all collating elements!) belonging to that class.
375 Standard character classes are:
36c9828f
FM
376
377
378
379
380
381
15b6757b 382 @b alpha
36c9828f
FM
383
384
385
386
15b6757b 387 A letter.
36c9828f
FM
388
389
390
391
392
15b6757b 393 @b upper
36c9828f
FM
394
395
396
397
15b6757b 398 An upper-case letter.
36c9828f
FM
399
400
401
402
403
15b6757b 404 @b lower
36c9828f
FM
405
406
407
408
15b6757b 409 A lower-case letter.
36c9828f
FM
410
411
412
413
414
15b6757b 415 @b digit
36c9828f
FM
416
417
418
419
15b6757b 420 A decimal digit.
36c9828f
FM
421
422
423
424
425
426 @b xdigit
427
428
429
430
431 A hexadecimal digit.
432
433
434
435
436
437 @b alnum
438
439
440
441
15b6757b 442 An alphanumeric (letter or digit).
36c9828f
FM
443
444
445
446
447
15b6757b 448 @b print
36c9828f
FM
449
450
451
452
15b6757b 453 An alphanumeric (same as alnum).
36c9828f
FM
454
455
456
457
458
15b6757b 459 @b blank
36c9828f
FM
460
461
462
463
15b6757b 464 A space or tab character.
36c9828f
FM
465
466
467
468
469
15b6757b 470 @b space
36c9828f
FM
471
472
473
474
15b6757b 475 A character producing white space in displayed text.
36c9828f
FM
476
477
478
479
480
15b6757b 481 @b punct
36c9828f
FM
482
483
484
485
15b6757b 486 A punctuation character.
36c9828f
FM
487
488
489
490
491
15b6757b 492 @b graph
36c9828f
FM
493
494
495
496
15b6757b 497 A character with a visible representation.
36c9828f
FM
498
499
500
501
502
15b6757b 503 @b cntrl
36c9828f
FM
504
505
506
507
15b6757b 508 A control character.
36c9828f
FM
509
510
511
512
513
514 A character class may not be used as an endpoint of a range.
15b6757b
FM
515 @e wxWidgets: In a non-Unicode build, these character classifications depend on the
516 current locale, and correspond to the values return by the ANSI C 'is'
517 functions: isalpha, isupper, etc. In Unicode mode they are based on
518 Unicode classifications, and are not affected by the current locale.
519 There are two special cases of bracket expressions:
520 the bracket expressions @b [[::]] and @b [[::]] are constraints, matching empty
521 strings at the beginning and end of a word respectively. A word is defined
522 as a sequence of word characters that is neither preceded nor followed
523 by word characters. A word character is an @e alnum character or an underscore
524 (@b _). These special bracket expressions are deprecated; users of AREs should
36c9828f
FM
525 use constraint escapes instead (see #Escapes below).
526
15b6757b 527 @section wxresynescapes Escapes
36c9828f 528
15b6757b
FM
529 @ref resyn_overview
530 Escapes (AREs only),
531 which begin with a @b \ followed by an alphanumeric character, come in several
532 varieties: character entry, class shorthands, constraint escapes, and back
533 references. A @b \ followed by an alphanumeric character but not constituting
534 a valid escape is illegal in AREs. In EREs, there are no escapes: outside
535 a bracket expression, a @b \ followed by an alphanumeric character merely stands
536 for that character as an ordinary character, and inside a bracket expression,
537 @b \ is an ordinary character. (The latter is the one actual incompatibility
36c9828f 538 between EREs and AREs.)
15b6757b
FM
539 Character-entry escapes (AREs only) exist to make
540 it easier to specify non-printing and otherwise inconvenient characters
541 in REs:
36c9828f
FM
542
543
544
545
546
547
15b6757b 548 @b \a
36c9828f
FM
549
550
551
552
15b6757b 553 alert (bell) character, as in C
36c9828f
FM
554
555
556
557
558
15b6757b 559 @b \b
36c9828f
FM
560
561
562
563
15b6757b 564 backspace, as in C
36c9828f
FM
565
566
567
568
569
15b6757b 570 @b \B
36c9828f
FM
571
572
573
574
15b6757b
FM
575 synonym
576 for @b \ to help reduce backslash doubling in some applications where there
577 are multiple levels of backslash processing
36c9828f
FM
578
579
580
581
582
15b6757b 583 @b \c@e X
36c9828f
FM
584
585
586
587
15b6757b
FM
588 (where X is any character)
589 the character whose low-order 5 bits are the same as those of @e X, and whose
590 other bits are all zero
36c9828f
FM
591
592
593
594
595
15b6757b 596 @b \e
36c9828f
FM
597
598
599
600
15b6757b
FM
601 the character whose collating-sequence name is
602 '@b ESC', or failing that, the character with octal value 033
36c9828f
FM
603
604
605
606
607
15b6757b 608 @b \f
36c9828f
FM
609
610
611
612
15b6757b 613 formfeed, as in C
36c9828f
FM
614
615
616
617
618
15b6757b 619 @b \n
36c9828f
FM
620
621
622
623
15b6757b 624 newline, as in C
36c9828f
FM
625
626
627
628
629
15b6757b 630 @b \r
36c9828f
FM
631
632
633
634
15b6757b 635 carriage return, as in C
36c9828f
FM
636
637
638
639
640
15b6757b 641 @b \t
36c9828f
FM
642
643
644
645
15b6757b 646 horizontal tab, as in C
36c9828f
FM
647
648
649
650
651
15b6757b 652 @b \u@e wxyz
36c9828f
FM
653
654
655
656
15b6757b
FM
657 (where @e wxyz is exactly four hexadecimal digits)
658 the Unicode
659 character @b U+@e wxyz in the local byte ordering
36c9828f
FM
660
661
662
663
664
15b6757b 665 @b \U@e stuvwxyz
36c9828f
FM
666
667
668
669
15b6757b
FM
670 (where @e stuvwxyz is
671 exactly eight hexadecimal digits) reserved for a somewhat-hypothetical Unicode
672 extension to 32 bits
36c9828f
FM
673
674
675
676
677
15b6757b 678 @b \v
36c9828f
FM
679
680
681
682
15b6757b 683 vertical tab, as in C are all available.
36c9828f
FM
684
685
686
687
688
15b6757b 689 @b \x@e hhh
36c9828f
FM
690
691
692
693
15b6757b
FM
694 (where
695 @e hhh is any sequence of hexadecimal digits) the character whose hexadecimal
696 value is @b 0x@e hhh (a single character no matter how many hexadecimal digits
697 are used).
36c9828f
FM
698
699
700
701
702
15b6757b 703 @b \0
36c9828f
FM
704
705
706
707
15b6757b 708 the character whose value is @b 0
36c9828f
FM
709
710
711
712
713
15b6757b 714 @b \@e xy
36c9828f
FM
715
716
717
718
15b6757b
FM
719 (where @e xy is exactly two
720 octal digits, and is not a @e back reference (see below)) the character whose
721 octal value is @b 0@e xy
36c9828f
FM
722
723
724
725
726
15b6757b 727 @b \@e xyz
36c9828f
FM
728
729
730
731
15b6757b
FM
732 (where @e xyz is exactly three octal digits, and is
733 not a back reference (see below))
734 the character whose octal value is @b 0@e xyz
36c9828f
FM
735
736
737
738
739
15b6757b 740 Hexadecimal digits are '@b 0'-'@b 9', '@b a'-'@b f', and '@b A'-'@b F'. Octal
36c9828f 741 digits are '@b 0'-'@b 7'.
15b6757b
FM
742 The character-entry
743 escapes are always taken as ordinary characters. For example, @b \135 is @b ] in
744 ASCII, but @b \135 does not terminate a bracket expression. Beware, however,
745 that some applications (e.g., C compilers) interpret such sequences themselves
746 before the regular-expression package gets to see them, which may require
36c9828f 747 doubling (quadrupling, etc.) the '@b \'.
15b6757b
FM
748 Class-shorthand escapes (AREs only) provide
749 shorthands for certain commonly-used character classes:
36c9828f
FM
750
751
752
753
754
755
15b6757b 756 @b \d
36c9828f
FM
757
758
759
760
15b6757b 761 @b [[:digit:]]
36c9828f
FM
762
763
764
765
766
15b6757b 767 @b \s
36c9828f
FM
768
769
770
771
15b6757b 772 @b [[:space:]]
36c9828f
FM
773
774
775
776
777
15b6757b 778 @b \w
36c9828f
FM
779
780
781
782
15b6757b 783 @b [[:alnum:]_] (note underscore)
36c9828f
FM
784
785
786
787
788
15b6757b 789 @b \D
36c9828f
FM
790
791
792
793
15b6757b 794 @b [^[:digit:]]
36c9828f
FM
795
796
797
798
799
15b6757b 800 @b \S
36c9828f
FM
801
802
803
804
15b6757b 805 @b [^[:space:]]
36c9828f
FM
806
807
808
809
810
15b6757b 811 @b \W
36c9828f
FM
812
813
814
815
15b6757b 816 @b [^[:alnum:]_] (note underscore)
36c9828f
FM
817
818
819
820
821
15b6757b
FM
822 Within bracket expressions, '@b \d', '@b \s', and
823 '@b \w' lose their outer brackets, and '@b \D',
824 '@b \S', and '@b \W' are illegal. (So, for example,
825 @b [a-c\d] is equivalent to @b [a-c[:digit:]].
826 Also, @b [a-c\D], which is equivalent to
36c9828f 827 @b [a-c^[:digit:]], is illegal.)
15b6757b
FM
828 A constraint escape (AREs only) is a constraint,
829 matching the empty string if specific conditions are met, written as an
830 escape:
36c9828f
FM
831
832
833
834
835
836
15b6757b 837 @b \A
36c9828f
FM
838
839
840
841
15b6757b
FM
842 matches only at the beginning of the string
843 (see #Matching, below,
844 for how this differs from '@b ^')
36c9828f
FM
845
846
847
848
849
15b6757b 850 @b \m
36c9828f
FM
851
852
853
854
15b6757b 855 matches only at the beginning of a word
36c9828f
FM
856
857
858
859
860
15b6757b 861 @b \M
36c9828f
FM
862
863
864
865
15b6757b 866 matches only at the end of a word
36c9828f
FM
867
868
869
870
871
15b6757b 872 @b \y
36c9828f
FM
873
874
875
876
15b6757b 877 matches only at the beginning or end of a word
36c9828f
FM
878
879
880
881
882
15b6757b 883 @b \Y
36c9828f
FM
884
885
886
887
15b6757b
FM
888 matches only at a point that is not the beginning or end of
889 a word
36c9828f
FM
890
891
892
893
894
15b6757b 895 @b \Z
36c9828f
FM
896
897
898
899
15b6757b
FM
900 matches only at the end of the string
901 (see #Matching, below, for
902 how this differs from '@b $')
36c9828f
FM
903
904
905
906
907
15b6757b 908 @b \@e m
36c9828f
FM
909
910
911
912
15b6757b
FM
913 (where @e m is a nonzero digit) a @e back reference,
914 see below
36c9828f
FM
915
916
917
918
919
15b6757b 920 @b \@e mnn
36c9828f
FM
921
922
923
924
15b6757b
FM
925 (where @e m is a nonzero digit, and @e nn is some more digits,
926 and the decimal value @e mnn is not greater than the number of closing capturing
927 parentheses seen so far) a @e back reference, see below
36c9828f
FM
928
929
930
931
932
15b6757b
FM
933 A word is defined
934 as in the specification of @b [[::]] and @b [[::]] above. Constraint escapes are
36c9828f 935 illegal within bracket expressions.
15b6757b
FM
936 A back reference (AREs only) matches
937 the same string matched by the parenthesized subexpression specified by
938 the number, so that (e.g.) @b ([bc])\1 matches @b bb or @b cc but not '@b bc'.
939 The subexpression
940 must entirely precede the back reference in the RE. Subexpressions are numbered
941 in the order of their leading parentheses. Non-capturing parentheses do not
36c9828f 942 define subexpressions.
15b6757b
FM
943 There is an inherent historical ambiguity between
944 octal character-entry escapes and back references, which is resolved by
945 heuristics, as hinted at above. A leading zero always indicates an octal
946 escape. A single non-zero digit, not followed by another digit, is always
947 taken as a back reference. A multi-digit sequence not starting with a zero
948 is taken as a back reference if it comes after a suitable subexpression
949 (i.e. the number is in the legal range for a back reference), and otherwise
36c9828f
FM
950 is taken as octal.
951
15b6757b 952 @section remetasyntax Metasyntax
36c9828f 953
15b6757b
FM
954 @ref resyn_overview
955 In addition to the main syntax described above,
956 there are some special forms and miscellaneous syntactic facilities available.
957 Normally the flavor of RE being used is specified by application-dependent
958 means. However, this can be overridden by a @e director. If an RE of any flavor
959 begins with '@b ***:', the rest of the RE is an ARE. If an RE of any flavor begins
960 with '@b ***=', the rest of the RE is taken to be a literal string, with all
36c9828f 961 characters considered ordinary characters.
15b6757b
FM
962 An ARE may begin with @e embedded options: a sequence @b (?xyz)
963 (where @e xyz is one or more alphabetic characters)
964 specifies options affecting the rest of the RE. These supplement, and can
965 override, any options specified by the application. The available option
966 letters are:
36c9828f
FM
967
968
969
970
971
972
15b6757b 973 @b b
36c9828f
FM
974
975
976
977
15b6757b 978 rest of RE is a BRE
36c9828f
FM
979
980
981
982
983
15b6757b 984 @b c
36c9828f
FM
985
986
987
988
15b6757b 989 case-sensitive matching (usual default)
36c9828f
FM
990
991
992
993
994
15b6757b 995 @b e
36c9828f
FM
996
997
998
999
15b6757b 1000 rest of RE is an ERE
36c9828f
FM
1001
1002
1003
1004
1005
15b6757b 1006 @b i
36c9828f
FM
1007
1008
1009
1010
15b6757b 1011 case-insensitive matching (see #Matching, below)
36c9828f
FM
1012
1013
1014
1015
1016
15b6757b 1017 @b m
36c9828f
FM
1018
1019
1020
1021
15b6757b 1022 historical synonym for @b n
36c9828f
FM
1023
1024
1025
1026
1027
15b6757b 1028 @b n
36c9828f
FM
1029
1030
1031
1032
15b6757b 1033 newline-sensitive matching (see #Matching, below)
36c9828f
FM
1034
1035
1036
1037
1038
15b6757b 1039 @b p
36c9828f
FM
1040
1041
1042
1043
15b6757b 1044 partial newline-sensitive matching (see #Matching, below)
36c9828f
FM
1045
1046
1047
1048
1049
15b6757b 1050 @b q
36c9828f
FM
1051
1052
1053
1054
15b6757b
FM
1055 rest of RE
1056 is a literal ("quoted'') string, all ordinary characters
36c9828f
FM
1057
1058
1059
1060
1061
15b6757b 1062 @b s
36c9828f
FM
1063
1064
1065
1066
15b6757b 1067 non-newline-sensitive matching (usual default)
36c9828f
FM
1068
1069
1070
1071
1072
15b6757b 1073 @b t
36c9828f
FM
1074
1075
1076
1077
15b6757b 1078 tight syntax (usual default; see below)
36c9828f
FM
1079
1080
1081
1082
1083
15b6757b 1084 @b w
36c9828f
FM
1085
1086
1087
1088
15b6757b
FM
1089 inverse
1090 partial newline-sensitive ("weird'') matching (see #Matching, below)
36c9828f
FM
1091
1092
1093
1094
1095
15b6757b 1096 @b x
36c9828f
FM
1097
1098
1099
1100
15b6757b 1101 expanded syntax (see below)
36c9828f
FM
1102
1103
1104
1105
1106
15b6757b
FM
1107 Embedded options take effect at the @b ) terminating the
1108 sequence. They are available only at the start of an ARE, and may not be
36c9828f 1109 used later within it.
15b6757b
FM
1110 In addition to the usual (@e tight) RE syntax, in which
1111 all characters are significant, there is an @e expanded syntax, available
1112 in AREs with the embedded
1113 x option. In the expanded syntax, white-space characters are ignored and
1114 all characters between a @b # and the following newline (or the end of the
1115 RE) are ignored, permitting paragraphing and commenting a complex RE. There
1116 are three exceptions to that basic rule:
36c9828f
FM
1117
1118
15b6757b 1119 a white-space character or '@b #' preceded
36c9828f 1120 by '@b \' is retained
15b6757b
FM
1121 white space or '@b #' within a bracket expression is retained
1122 white space and comments are illegal within multi-character symbols like
36c9828f
FM
1123 the ARE '@b (?:' or the BRE '@b \('
1124
1125
15b6757b
FM
1126 Expanded-syntax white-space characters are blank,
1127 tab, newline, and any character that belongs to the @e space character class.
1128 Finally, in an ARE, outside bracket expressions, the sequence '@b (?#ttt)' (where
1129 @e ttt is any text not containing a '@b )') is a comment, completely ignored. Again,
1130 this is not allowed between the characters of multi-character symbols like
1131 '@b (?:'. Such comments are more a historical artifact than a useful facility,
36c9828f 1132 and their use is deprecated; use the expanded syntax instead.
15b6757b
FM
1133 @e None of these
1134 metasyntax extensions is available if the application (or an initial @b ***=
1135 director) has specified that the user's input be treated as a literal string
36c9828f
FM
1136 rather than as an RE.
1137
15b6757b 1138 @section wxresynmatching Matching
36c9828f 1139
15b6757b
FM
1140 @ref resyn_overview
1141 In the event that an RE could match more than
1142 one substring of a given string, the RE matches the one starting earliest
1143 in the string. If the RE could match more than one substring starting at
1144 that point, its choice is determined by its @e preference: either the longest
36c9828f 1145 substring, or the shortest.
15b6757b
FM
1146 Most atoms, and all constraints, have no preference.
1147 A parenthesized RE has the same preference (possibly none) as the RE. A
1148 quantified atom with quantifier @b {m} or @b {m}? has the same preference (possibly
1149 none) as the atom itself. A quantified atom with other normal quantifiers
1150 (including @b {m,n} with @e m equal to @e n) prefers longest match. A quantified
1151 atom with other non-greedy quantifiers (including @b {m,n}? with @e m equal to
1152 @e n) prefers shortest match. A branch has the same preference as the first
1153 quantified atom in it which has a preference. An RE consisting of two or
36c9828f 1154 more branches connected by the @b | operator prefers longest match.
15b6757b
FM
1155 Subject to the constraints imposed by the rules for matching the whole RE, subexpressions
1156 also match the longest or shortest possible substrings, based on their
1157 preferences, with subexpressions starting earlier in the RE taking priority
1158 over ones starting later. Note that outer subexpressions thus take priority
36c9828f 1159 over their component subexpressions.
15b6757b
FM
1160 Note that the quantifiers @b {1,1} and
1161 @b {1,1}? can be used to force longest and shortest preference, respectively,
36c9828f 1162 on a subexpression or a whole RE.
15b6757b
FM
1163 Match lengths are measured in characters,
1164 not collating elements. An empty string is considered longer than no match
1165 at all. For example, @b bb* matches the three middle characters
1166 of '@b abbbc', @b (week|wee)(night|knights)
1167 matches all ten characters of '@b weeknights', when @b (.*).* is matched against
1168 @b abc the parenthesized subexpression matches all three characters, and when
1169 @b (a*)* is matched against @b bc both the whole RE and the parenthesized subexpression
36c9828f 1170 match an empty string.
15b6757b
FM
1171 If case-independent matching is specified, the effect
1172 is much as if all case distinctions had vanished from the alphabet. When
1173 an alphabetic that exists in multiple cases appears as an ordinary character
1174 outside a bracket expression, it is effectively transformed into a bracket
1175 expression containing both cases, so that @b x becomes '@b [xX]'. When it appears
1176 inside a bracket expression, all case counterparts of it are added to the
36c9828f 1177 bracket expression, so that @b [x] becomes @b [xX] and @b [^x] becomes '@b [^xX]'.
15b6757b
FM
1178 If newline-sensitive
1179 matching is specified, @b . and bracket expressions using @b ^ will never match
1180 the newline character (so that matches will never cross newlines unless
1181 the RE explicitly arranges it) and @b ^ and @b $ will match the empty string after
1182 and before a newline respectively, in addition to matching at beginning
1183 and end of string respectively. ARE @b \A and @b \Z continue to match beginning
36c9828f 1184 or end of string @e only.
15b6757b
FM
1185 If partial newline-sensitive matching is specified,
1186 this affects @b . and bracket expressions as with newline-sensitive matching,
36c9828f 1187 but not @b ^ and '@b $'.
15b6757b
FM
1188 If inverse partial newline-sensitive matching is specified,
1189 this affects @b ^ and @b $ as with newline-sensitive matching, but not @b . and bracket
36c9828f
FM
1190 expressions. This isn't very useful but is provided for symmetry.
1191
15b6757b 1192 @section relimits Limits And Compatibility
36c9828f 1193
15b6757b
FM
1194 @ref resyn_overview
1195 No particular limit is imposed on the length of REs. Programs
1196 intended to be highly portable should not employ REs longer than 256 bytes,
36c9828f 1197 as a POSIX-compliant implementation can refuse to accept such REs.
15b6757b
FM
1198 The only
1199 feature of AREs that is actually incompatible with POSIX EREs is that @b \
1200 does not lose its special significance inside bracket expressions. All other
1201 ARE features use syntax which is illegal or has undefined or unspecified
1202 effects in POSIX EREs; the @b *** syntax of directors likewise is outside
36c9828f 1203 the POSIX syntax for both BREs and EREs.
15b6757b
FM
1204 Many of the ARE extensions are
1205 borrowed from Perl, but some have been changed to clean them up, and a
1206 few Perl extensions are not present. Incompatibilities of note include '@b \b',
1207 '@b \B', the lack of special treatment for a trailing newline, the addition of
1208 complemented bracket expressions to the things affected by newline-sensitive
1209 matching, the restrictions on parentheses and back references in lookahead
1210 constraints, and the longest/shortest-match (rather than first-match) matching
36c9828f 1211 semantics.
15b6757b
FM
1212 The matching rules for REs containing both normal and non-greedy
1213 quantifiers have changed since early beta-test versions of this package.
1214 (The new rules are much simpler and cleaner, but don't work as hard at guessing
36c9828f 1215 the user's real intentions.)
15b6757b
FM
1216 Henry Spencer's original 1986 @e regexp package, still in widespread use,
1217 implemented an early version of today's EREs. There are four incompatibilities between @e regexp's
1218 near-EREs ('RREs' for short) and AREs. In roughly increasing order of significance:
36c9828f
FM
1219
1220
15b6757b
FM
1221 In AREs, @b \ followed by an alphanumeric character is either an escape or
1222 an error, while in RREs, it was just another way of writing the alphanumeric.
1223 This should not be a problem because there was no reason to write such
36c9828f 1224 a sequence in RREs.
15b6757b
FM
1225 @b { followed by a digit in an ARE is the beginning of
1226 a bound, while in RREs, @b { was always an ordinary character. Such sequences
1227 should be rare, and will often result in an error because following characters
36c9828f 1228 will not look like a valid bound.
15b6757b
FM
1229 In AREs, @b \ remains a special character
1230 within '@b []', so a literal @b \ within @b [] must be
1231 written '@b \\'. @b \\ also gives a literal
1232 @b \ within @b [] in RREs, but only truly paranoid programmers routinely doubled
36c9828f 1233 the backslash.
15b6757b
FM
1234 AREs report the longest/shortest match for the RE, rather
1235 than the first found in a specified search order. This may affect some RREs
1236 which were written in the expectation that the first match would be reported.
1237 (The careful crafting of RREs to optimize the search order for fast matching
1238 is obsolete (AREs examine all possible matches in parallel, and their performance
1239 is largely insensitive to their complexity) but cases where the search
1240 order was exploited to deliberately find a match which was @e not the longest/shortest
36c9828f
FM
1241 will need rewriting.)
1242
1243
1244
15b6757b 1245 @section wxresynbre Basic Regular Expressions
36c9828f 1246
15b6757b
FM
1247 @ref resyn_overview
1248 BREs differ from EREs in
1249 several respects. '@b |', '@b +', and @b ? are ordinary characters and there is no equivalent
1250 for their functionality. The delimiters for bounds
1251 are @b \{ and '@b \}', with @b { and
1252 @b } by themselves ordinary characters. The parentheses for nested subexpressions
1253 are @b \( and '@b \)', with @b ( and @b ) by themselves
1254 ordinary characters. @b ^ is an ordinary
1255 character except at the beginning of the RE or the beginning of a parenthesized
1256 subexpression, @b $ is an ordinary character except at the end of the RE or
1257 the end of a parenthesized subexpression, and @b * is an ordinary character
1258 if it appears at the beginning of the RE or the beginning of a parenthesized
1259 subexpression (after a possible leading '@b ^'). Finally, single-digit back references
1260 are available, and @b \ and @b \ are synonyms
1261 for @b [[::]] and @b [[::]] respectively;
36c9828f
FM
1262 no other escapes are available.
1263
15b6757b 1264 @section wxresynchars Regular Expression Character Names
36c9828f 1265
15b6757b
FM
1266 @ref resyn_overview
1267 Note that the character names are case sensitive.
36c9828f
FM
1268
1269
1270
1271
1272
1273
15b6757b 1274 NUL
36c9828f
FM
1275
1276
1277
1278
15b6757b 1279 '\0'
36c9828f
FM
1280
1281
1282
1283
1284
15b6757b 1285 SOH
36c9828f
FM
1286
1287
1288
1289
15b6757b 1290 '\001'
36c9828f
FM
1291
1292
1293
1294
1295
15b6757b 1296 STX
36c9828f
FM
1297
1298
1299
1300
15b6757b 1301 '\002'
36c9828f
FM
1302
1303
1304
1305
1306
15b6757b 1307 ETX
36c9828f
FM
1308
1309
1310
1311
15b6757b 1312 '\003'
36c9828f
FM
1313
1314
1315
1316
1317
15b6757b 1318 EOT
36c9828f
FM
1319
1320
1321
1322
15b6757b 1323 '\004'
36c9828f
FM
1324
1325
1326
1327
1328
15b6757b 1329 ENQ
36c9828f
FM
1330
1331
1332
1333
15b6757b 1334 '\005'
36c9828f
FM
1335
1336
1337
1338
1339
15b6757b 1340 ACK
36c9828f
FM
1341
1342
1343
1344
15b6757b 1345 '\006'
36c9828f
FM
1346
1347
1348
1349
1350
15b6757b 1351 BEL
36c9828f
FM
1352
1353
1354
1355
15b6757b 1356 '\007'
36c9828f
FM
1357
1358
1359
1360
1361
15b6757b 1362 alert
36c9828f
FM
1363
1364
1365
1366
15b6757b 1367 '\007'
36c9828f
FM
1368
1369
1370
1371
1372
15b6757b 1373 BS
36c9828f
FM
1374
1375
1376
1377
15b6757b 1378 '\010'
36c9828f
FM
1379
1380
1381
1382
1383
15b6757b 1384 backspace
36c9828f
FM
1385
1386
1387
1388
15b6757b 1389 '\b'
36c9828f
FM
1390
1391
1392
1393
1394
15b6757b 1395 HT
36c9828f
FM
1396
1397
1398
1399
15b6757b 1400 '\011'
36c9828f
FM
1401
1402
1403
1404
1405
15b6757b 1406 tab
36c9828f
FM
1407
1408
1409
1410
15b6757b 1411 '\t'
36c9828f
FM
1412
1413
1414
1415
1416
15b6757b 1417 LF
36c9828f
FM
1418
1419
1420
1421
15b6757b 1422 '\012'
36c9828f
FM
1423
1424
1425
1426
1427
15b6757b 1428 newline
36c9828f
FM
1429
1430
1431
1432
15b6757b 1433 '\n'
36c9828f
FM
1434
1435
1436
1437
1438
15b6757b 1439 VT
36c9828f
FM
1440
1441
1442
1443
15b6757b 1444 '\013'
36c9828f
FM
1445
1446
1447
1448
1449
15b6757b 1450 vertical-tab
36c9828f
FM
1451
1452
1453
1454
15b6757b 1455 '\v'
36c9828f
FM
1456
1457
1458
1459
1460
15b6757b 1461 FF
36c9828f
FM
1462
1463
1464
1465
15b6757b 1466 '\014'
36c9828f
FM
1467
1468
1469
1470
1471
15b6757b 1472 form-feed
36c9828f
FM
1473
1474
1475
1476
15b6757b 1477 '\f'
36c9828f
FM
1478
1479
1480
1481
1482
15b6757b 1483 CR
36c9828f
FM
1484
1485
1486
1487
15b6757b 1488 '\015'
36c9828f
FM
1489
1490
1491
1492
1493
15b6757b 1494 carriage-return
36c9828f
FM
1495
1496
1497
1498
15b6757b 1499 '\r'
36c9828f
FM
1500
1501
1502
1503
1504
15b6757b 1505 SO
36c9828f
FM
1506
1507
1508
1509
15b6757b 1510 '\016'
36c9828f
FM
1511
1512
1513
1514
1515
15b6757b 1516 SI
36c9828f
FM
1517
1518
1519
1520
15b6757b 1521 '\017'
36c9828f
FM
1522
1523
1524
1525
1526
15b6757b 1527 DLE
36c9828f
FM
1528
1529
1530
1531
15b6757b 1532 '\020'
36c9828f
FM
1533
1534
1535
1536
1537
15b6757b 1538 DC1
36c9828f
FM
1539
1540
1541
1542
15b6757b 1543 '\021'
36c9828f
FM
1544
1545
1546
1547
1548
15b6757b 1549 DC2
36c9828f
FM
1550
1551
1552
1553
15b6757b 1554 '\022'
36c9828f
FM
1555
1556
1557
1558
1559
15b6757b 1560 DC3
36c9828f
FM
1561
1562
1563
1564
15b6757b 1565 '\023'
36c9828f
FM
1566
1567
1568
1569
1570
15b6757b 1571 DC4
36c9828f
FM
1572
1573
1574
1575
15b6757b 1576 '\024'
36c9828f
FM
1577
1578
1579
1580
1581
15b6757b 1582 NAK
36c9828f
FM
1583
1584
1585
1586
15b6757b 1587 '\025'
36c9828f
FM
1588
1589
1590
1591
1592
15b6757b 1593 SYN
36c9828f
FM
1594
1595
1596
1597
15b6757b 1598 '\026'
36c9828f
FM
1599
1600
1601
1602
1603
15b6757b 1604 ETB
36c9828f
FM
1605
1606
1607
1608
15b6757b 1609 '\027'
36c9828f
FM
1610
1611
1612
1613
1614
15b6757b 1615 CAN
36c9828f
FM
1616
1617
1618
1619
15b6757b 1620 '\030'
36c9828f
FM
1621
1622
1623
1624
1625
15b6757b 1626 EM
36c9828f
FM
1627
1628
1629
1630
15b6757b 1631 '\031'
36c9828f
FM
1632
1633
1634
1635
1636
15b6757b 1637 SUB
36c9828f
FM
1638
1639
1640
1641
15b6757b 1642 '\032'
36c9828f
FM
1643
1644
1645
1646
1647
15b6757b 1648 ESC
36c9828f
FM
1649
1650
1651
1652
15b6757b 1653 '\033'
36c9828f
FM
1654
1655
1656
1657
1658
15b6757b 1659 IS4
36c9828f
FM
1660
1661
1662
1663
15b6757b 1664 '\034'
36c9828f
FM
1665
1666
1667
1668
1669
15b6757b 1670 FS
36c9828f
FM
1671
1672
1673
1674
15b6757b 1675 '\034'
36c9828f
FM
1676
1677
1678
1679
1680
15b6757b 1681 IS3
36c9828f
FM
1682
1683
1684
1685
15b6757b 1686 '\035'
36c9828f
FM
1687
1688
1689
1690
1691
15b6757b 1692 GS
36c9828f
FM
1693
1694
1695
1696
15b6757b 1697 '\035'
36c9828f
FM
1698
1699
1700
1701
1702
15b6757b 1703 IS2
36c9828f
FM
1704
1705
1706
1707
15b6757b 1708 '\036'
36c9828f
FM
1709
1710
1711
1712
1713
15b6757b 1714 RS
36c9828f
FM
1715
1716
1717
1718
15b6757b 1719 '\036'
36c9828f
FM
1720
1721
1722
1723
1724
15b6757b 1725 IS1
36c9828f
FM
1726
1727
1728
1729
15b6757b 1730 '\037'
36c9828f
FM
1731
1732
1733
1734
1735
15b6757b 1736 US
36c9828f
FM
1737
1738
1739
1740
15b6757b 1741 '\037'
36c9828f
FM
1742
1743
1744
1745
1746
15b6757b 1747 space
36c9828f
FM
1748
1749
1750
1751
15b6757b 1752 ' '
36c9828f
FM
1753
1754
1755
1756
1757
15b6757b 1758 exclamation-mark
36c9828f
FM
1759
1760
1761
1762
15b6757b 1763 '!'
36c9828f
FM
1764
1765
1766
1767
1768
15b6757b 1769 quotation-mark
36c9828f
FM
1770
1771
1772
1773
15b6757b 1774 '"'
36c9828f
FM
1775
1776
1777
1778
1779
15b6757b 1780 number-sign
36c9828f
FM
1781
1782
1783
1784
15b6757b 1785 '#'
36c9828f
FM
1786
1787
1788
1789
1790
15b6757b 1791 dollar-sign
36c9828f
FM
1792
1793
1794
1795
15b6757b 1796 '$'
36c9828f
FM
1797
1798
1799
1800
1801
15b6757b 1802 percent-sign
36c9828f
FM
1803
1804
1805
1806
15b6757b 1807 '%'
36c9828f
FM
1808
1809
1810
1811
1812
15b6757b 1813 ampersand
36c9828f
FM
1814
1815
1816
1817
15b6757b 1818 ''
36c9828f
FM
1819
1820
1821
1822
1823
15b6757b 1824 apostrophe
36c9828f
FM
1825
1826
1827
1828
15b6757b 1829 '\''
36c9828f
FM
1830
1831
1832
1833
1834
15b6757b 1835 left-parenthesis
36c9828f
FM
1836
1837
1838
1839
15b6757b 1840 '('
36c9828f
FM
1841
1842
1843
1844
1845
15b6757b 1846 right-parenthesis
36c9828f
FM
1847
1848
1849
1850
15b6757b 1851 ')'
36c9828f
FM
1852
1853
1854
1855
1856
15b6757b 1857 asterisk
36c9828f
FM
1858
1859
1860
1861
15b6757b 1862 '*'
36c9828f
FM
1863
1864
1865
1866
1867
15b6757b 1868 plus-sign
36c9828f
FM
1869
1870
1871
1872
15b6757b 1873 '+'
36c9828f
FM
1874
1875
1876
1877
1878
15b6757b 1879 comma
36c9828f
FM
1880
1881
1882
1883
15b6757b 1884 ','
36c9828f
FM
1885
1886
1887
1888
1889
15b6757b 1890 hyphen
36c9828f
FM
1891
1892
1893
1894
15b6757b 1895 '-'
36c9828f
FM
1896
1897
1898
1899
1900
15b6757b 1901 hyphen-minus
36c9828f
FM
1902
1903
1904
1905
15b6757b 1906 '-'
36c9828f
FM
1907
1908
1909
1910
1911
15b6757b 1912 period
36c9828f
FM
1913
1914
1915
1916
15b6757b 1917 '.'
36c9828f
FM
1918
1919
1920
1921
1922
15b6757b 1923 full-stop
36c9828f
FM
1924
1925
1926
1927
15b6757b 1928 '.'
36c9828f
FM
1929
1930
1931
1932
1933
15b6757b 1934 slash
36c9828f
FM
1935
1936
1937
1938
15b6757b 1939 '/'
36c9828f
FM
1940
1941
1942
1943
1944
15b6757b 1945 solidus
36c9828f
FM
1946
1947
1948
1949
15b6757b 1950 '/'
36c9828f
FM
1951
1952
1953
1954
1955
15b6757b 1956 zero
36c9828f
FM
1957
1958
1959
1960
15b6757b 1961 '0'
36c9828f
FM
1962
1963
1964
1965
1966
15b6757b 1967 one
36c9828f
FM
1968
1969
1970
1971
15b6757b 1972 '1'
36c9828f
FM
1973
1974
1975
1976
1977
15b6757b 1978 two
36c9828f
FM
1979
1980
1981
1982
15b6757b 1983 '2'
36c9828f
FM
1984
1985
1986
1987
1988
15b6757b 1989 three
36c9828f
FM
1990
1991
1992
1993
15b6757b 1994 '3'
36c9828f
FM
1995
1996
1997
1998
1999
15b6757b 2000 four
36c9828f
FM
2001
2002
2003
2004
15b6757b 2005 '4'
36c9828f
FM
2006
2007
2008
2009
2010
15b6757b 2011 five
36c9828f
FM
2012
2013
2014
2015
15b6757b 2016 '5'
36c9828f
FM
2017
2018
2019
2020
2021
15b6757b 2022 six
36c9828f
FM
2023
2024
2025
2026
15b6757b 2027 '6'
36c9828f
FM
2028
2029
2030
2031
2032
15b6757b 2033 seven
36c9828f
FM
2034
2035
2036
2037
15b6757b 2038 '7'
36c9828f
FM
2039
2040
2041
2042
2043
15b6757b 2044 eight
36c9828f
FM
2045
2046
2047
2048
15b6757b 2049 '8'
36c9828f
FM
2050
2051
2052
2053
2054
15b6757b 2055 nine
36c9828f
FM
2056
2057
2058
2059
15b6757b 2060 '9'
36c9828f
FM
2061
2062
2063
2064
2065
15b6757b 2066 colon
36c9828f
FM
2067
2068
2069
2070
15b6757b 2071 ':'
36c9828f
FM
2072
2073
2074
2075
2076
15b6757b 2077 semicolon
36c9828f
FM
2078
2079
2080
2081
15b6757b 2082 ';'
36c9828f
FM
2083
2084
2085
2086
2087
15b6757b 2088 less-than-sign
36c9828f
FM
2089
2090
2091
2092
15b6757b 2093 ''
36c9828f
FM
2094
2095
2096
2097
2098
15b6757b 2099 equals-sign
36c9828f
FM
2100
2101
2102
2103
15b6757b 2104 '='
36c9828f
FM
2105
2106
2107
2108
2109
15b6757b 2110 greater-than-sign
36c9828f
FM
2111
2112
2113
2114
15b6757b 2115 ''
36c9828f
FM
2116
2117
2118
2119
2120
15b6757b 2121 question-mark
36c9828f
FM
2122
2123
2124
2125
15b6757b 2126 '?'
36c9828f
FM
2127
2128
2129
2130
2131
15b6757b 2132 commercial-at
36c9828f
FM
2133
2134
2135
2136
15b6757b 2137 '@'
36c9828f
FM
2138
2139
2140
2141
2142
15b6757b 2143 left-square-bracket
36c9828f
FM
2144
2145
2146
2147
15b6757b 2148 '['
36c9828f
FM
2149
2150
2151
2152
2153
15b6757b 2154 backslash
36c9828f
FM
2155
2156
2157
2158
15b6757b 2159 '\'
36c9828f
FM
2160
2161
2162
2163
2164
15b6757b 2165 reverse-solidus
36c9828f
FM
2166
2167
2168
2169
15b6757b 2170 '\'
36c9828f
FM
2171
2172
2173
2174
2175
15b6757b 2176 right-square-bracket
36c9828f
FM
2177
2178
2179
2180
15b6757b 2181 ']'
36c9828f
FM
2182
2183
2184
2185
2186
15b6757b 2187 circumflex
36c9828f
FM
2188
2189
2190
2191
15b6757b 2192 '^'
36c9828f
FM
2193
2194
2195
2196
2197
15b6757b 2198 circumflex-accent
36c9828f
FM
2199
2200
2201
2202
15b6757b 2203 '^'
36c9828f
FM
2204
2205
2206
2207
2208
15b6757b 2209 underscore
36c9828f
FM
2210
2211
2212
2213
15b6757b 2214 '_'
36c9828f
FM
2215
2216
2217
2218
2219
15b6757b 2220 low-line
36c9828f
FM
2221
2222
2223
2224
15b6757b 2225 '_'
36c9828f
FM
2226
2227
2228
2229
2230
15b6757b 2231 grave-accent
36c9828f
FM
2232
2233
2234
2235
15b6757b 2236 '''
36c9828f
FM
2237
2238
2239
2240
2241
15b6757b 2242 left-brace
36c9828f
FM
2243
2244
2245
2246
15b6757b 2247 '{'
36c9828f
FM
2248
2249
2250
2251
2252
15b6757b 2253 left-curly-bracket
36c9828f
FM
2254
2255
2256
2257
15b6757b 2258 '{'
36c9828f
FM
2259
2260
2261
2262
2263
15b6757b 2264 vertical-line
36c9828f
FM
2265
2266
2267
2268
15b6757b 2269 '|'
36c9828f
FM
2270
2271
2272
2273
2274
15b6757b 2275 right-brace
36c9828f
FM
2276
2277
2278
2279
15b6757b 2280 '}'
36c9828f
FM
2281
2282
2283
2284
2285
15b6757b 2286 right-curly-bracket
36c9828f
FM
2287
2288
2289
2290
15b6757b 2291 '}'
36c9828f
FM
2292
2293
2294
2295
2296
15b6757b 2297 tilde
36c9828f
FM
2298
2299
2300
2301
15b6757b 2302 '~'
36c9828f
FM
2303
2304
2305
2306
2307
15b6757b 2308 DEL
36c9828f
FM
2309
2310
2311
2312
15b6757b 2313 '\177'
36c9828f 2314
15b6757b 2315 */
36c9828f
FM
2316
2317