]>
Commit | Line | Data |
---|---|---|
b37bf2e1 A |
1 | /* This is JavaScriptCore's variant of the PCRE library. While this library |
2 | started out as a copy of PCRE, many of the features of PCRE have been | |
3 | removed. This library now supports only the regular expression features | |
4 | required by the JavaScript language specification, and has only the functions | |
5 | needed by JavaScriptCore and the rest of WebKit. | |
6 | ||
7 | Originally written by Philip Hazel | |
8 | Copyright (c) 1997-2006 University of Cambridge | |
9 | Copyright (C) 2002, 2004, 2006, 2007 Apple Inc. All rights reserved. | |
10 | ||
11 | ----------------------------------------------------------------------------- | |
12 | Redistribution and use in source and binary forms, with or without | |
13 | modification, are permitted provided that the following conditions are met: | |
14 | ||
15 | * Redistributions of source code must retain the above copyright notice, | |
16 | this list of conditions and the following disclaimer. | |
17 | ||
18 | * Redistributions in binary form must reproduce the above copyright | |
19 | notice, this list of conditions and the following disclaimer in the | |
20 | documentation and/or other materials provided with the distribution. | |
21 | ||
22 | * Neither the name of the University of Cambridge nor the names of its | |
23 | contributors may be used to endorse or promote products derived from | |
24 | this software without specific prior written permission. | |
25 | ||
26 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
27 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
28 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
29 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |
30 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
31 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
32 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
33 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
34 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
35 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
36 | POSSIBILITY OF SUCH DAMAGE. | |
37 | ----------------------------------------------------------------------------- | |
38 | */ | |
39 | ||
40 | /************************************************* | |
41 | * Unicode Property Table handler * | |
42 | *************************************************/ | |
43 | ||
44 | /* Internal header file defining the layout of the bits in each pair of 32-bit | |
45 | words that form a data item in the table. */ | |
46 | ||
47 | typedef struct cnode { | |
48 | unsigned f0; | |
49 | unsigned f1; | |
50 | } cnode; | |
51 | ||
52 | /* Things for the f0 field */ | |
53 | ||
54 | #define f0_scriptmask 0xff000000 /* Mask for script field */ | |
55 | #define f0_scriptshift 24 /* Shift for script value */ | |
56 | #define f0_rangeflag 0x00f00000 /* Flag for a range item */ | |
57 | #define f0_charmask 0x001fffff /* Mask for code point value */ | |
58 | ||
59 | /* Things for the f1 field */ | |
60 | ||
61 | #define f1_typemask 0xfc000000 /* Mask for char type field */ | |
62 | #define f1_typeshift 26 /* Shift for the type field */ | |
63 | #define f1_rangemask 0x0000ffff /* Mask for a range offset */ | |
64 | #define f1_casemask 0x0000ffff /* Mask for a case offset */ | |
65 | #define f1_caseneg 0xffff8000 /* Bits for negation */ | |
66 | ||
67 | /* The data consists of a vector of structures of type cnode. The two unsigned | |
68 | 32-bit integers are used as follows: | |
69 | ||
70 | (f0) (1) The most significant byte holds the script number. The numbers are | |
71 | defined by the enum in ucp.h. | |
72 | ||
73 | (2) The 0x00800000 bit is set if this entry defines a range of characters. | |
74 | It is not set if this entry defines a single character | |
75 | ||
76 | (3) The 0x00600000 bits are spare. | |
77 | ||
78 | (4) The 0x001fffff bits contain the code point. No Unicode code point will | |
79 | ever be greater than 0x0010ffff, so this should be OK for ever. | |
80 | ||
81 | (f1) (1) The 0xfc000000 bits contain the character type number. The numbers are | |
82 | defined by an enum in ucp.h. | |
83 | ||
84 | (2) The 0x03ff0000 bits are spare. | |
85 | ||
86 | (3) The 0x0000ffff bits contain EITHER the unsigned offset to the top of | |
87 | range if this entry defines a range, OR the *signed* offset to the | |
88 | character's "other case" partner if this entry defines a single | |
89 | character. There is no partner if the value is zero. | |
90 | ||
91 | ------------------------------------------------------------------------------- | |
92 | | script (8) |.|.|.| codepoint (21) || type (6) |.|.| spare (8) | offset (16) | | |
93 | ------------------------------------------------------------------------------- | |
94 | | | | | | | |
95 | | | |-> spare | |-> spare | |
96 | | | | | |
97 | | |-> spare |-> spare | |
98 | | | |
99 | |-> range flag | |
100 | ||
101 | The upper/lower casing information is set only for characters that come in | |
102 | pairs. The non-one-to-one mappings in the Unicode data are ignored. | |
103 | ||
104 | When searching the data, proceed as follows: | |
105 | ||
106 | (1) Set up for a binary chop search. | |
107 | ||
108 | (2) If the top is not greater than the bottom, the character is not in the | |
109 | table. Its type must therefore be "Cn" ("Undefined"). | |
110 | ||
111 | (3) Find the middle vector element. | |
112 | ||
113 | (4) Extract the code point and compare. If equal, we are done. | |
114 | ||
115 | (5) If the test character is smaller, set the top to the current point, and | |
116 | goto (2). | |
117 | ||
118 | (6) If the current entry defines a range, compute the last character by adding | |
119 | the offset, and see if the test character is within the range. If it is, | |
120 | we are done. | |
121 | ||
122 | (7) Otherwise, set the bottom to one element past the current point and goto | |
123 | (2). | |
124 | */ | |
125 | ||
126 | /* End of ucpinternal.h */ |