]>
Commit | Line | Data |
---|---|---|
1 | /* utf16js - easy interfaces for Unicode compliance | |
2 | * Copyright (C) 2012 Jay Freeman (saurik) | |
3 | */ | |
4 | ||
5 | /* Modified BSD License {{{ */ | |
6 | /* | |
7 | * Redistribution and use in source and binary | |
8 | * forms, with or without modification, are permitted | |
9 | * provided that the following conditions are met: | |
10 | * | |
11 | * 1. Redistributions of source code must retain the | |
12 | * above copyright notice, this list of conditions | |
13 | * and the following disclaimer. | |
14 | * 2. Redistributions in binary form must reproduce the | |
15 | * above copyright notice, this list of conditions | |
16 | * and the following disclaimer in the documentation | |
17 | * and/or other materials provided with the | |
18 | * distribution. | |
19 | * 3. The name of the author may not be used to endorse | |
20 | * or promote products derived from this software | |
21 | * without specific prior written permission. | |
22 | * | |
23 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' | |
24 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, | |
25 | * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF | |
26 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
27 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE | |
28 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
29 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
30 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
31 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
32 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | |
33 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR | |
34 | * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN | |
35 | * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF | |
36 | * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
37 | */ | |
38 | /* }}} */ | |
39 | ||
40 | if (typeof define !== 'function') { var define = require('amdefine')(module) } | |
41 | ||
42 | define([], function() { | |
43 | ||
44 | var utf16 = function(string) { | |
45 | this.string = string; | |
46 | this.length = utf16.strlen(string); | |
47 | }; | |
48 | ||
49 | utf16.decode = function(string, points) { | |
50 | if (typeof points === "undefined") | |
51 | points = []; | |
52 | ||
53 | for (var i = 0, e = string.length; i != e; ++i) { | |
54 | var unit = string.charCodeAt(i); | |
55 | var part = unit & 0xfc00; | |
56 | if (part == 0xdc00) | |
57 | return null; | |
58 | else if (part != 0xd800) | |
59 | points.push(unit); | |
60 | else if (++i == e) | |
61 | return null; | |
62 | else { | |
63 | var next = string.charCodeAt(i); | |
64 | if ((next & 0xfc00) != 0xdc00) | |
65 | return null; | |
66 | points.push(0x10000 | (unit & 0x03ff) << 10 | next & 0x03ff); | |
67 | } | |
68 | } | |
69 | ||
70 | return points; | |
71 | }; | |
72 | ||
73 | utf16.encode = function(points) { | |
74 | var units = []; | |
75 | for (var i = 0, e = points.length; i != e; ++i) { | |
76 | var point = points[i]; | |
77 | if (point < 0x10000) | |
78 | units.push(point); | |
79 | else { | |
80 | point -= 0x10000; | |
81 | units.push(0xd800 | (0xffc00 & point) >> 10, 0xdc00 | 0x03ff & point); | |
82 | } | |
83 | } return String.fromCharCode.apply(String, units); | |
84 | }; | |
85 | ||
86 | utf16.fromCharCode = function() { | |
87 | return new utf16(utf16.encode(arguments), arguments.length); | |
88 | }; | |
89 | ||
90 | utf16.adjust = function(mode, string, after) { | |
91 | var before = after.slice(); | |
92 | var offset = 0, index = 0; | |
93 | ||
94 | var check = function() { | |
95 | var from = mode ? index : offset; | |
96 | var to = mode ? offset : index; | |
97 | ||
98 | for (var o = 0, l = after.length; o != l; ++o) | |
99 | if (before[o] == from) { | |
100 | before[o] = null; | |
101 | after[o] = to; | |
102 | } | |
103 | }; | |
104 | ||
105 | for (var e = string.length; index != e; ++index) { | |
106 | check(); | |
107 | ||
108 | var unit = string.charCodeAt(index); | |
109 | var part = unit & 0xfc00; | |
110 | ||
111 | if (part == 0xdc00) | |
112 | return null; | |
113 | else if (part != 0xd800); | |
114 | else if (++index == e) | |
115 | return null; | |
116 | else { | |
117 | var next = string.charCodeAt(index); | |
118 | if ((next & 0xfc00) != 0xdc00) | |
119 | return null; | |
120 | } | |
121 | ||
122 | ++offset; | |
123 | } | |
124 | ||
125 | check(); | |
126 | return after; | |
127 | }; | |
128 | ||
129 | utf16.point2unit = function(string, offsets) { | |
130 | return utf16.adjust(false, string, offsets); | |
131 | }; | |
132 | ||
133 | utf16.unit2point = function(string, offsets) { | |
134 | return utf16.adjust(true, string, offsets); | |
135 | }; | |
136 | ||
137 | utf16.charAt = function(string, offset) { | |
138 | return utf16.substring(string, offset, offset + 1); | |
139 | }; | |
140 | ||
141 | utf16.charCodeAt = function(string, offset) { | |
142 | var char = utf16.charAt(string, offset); | |
143 | if (char.length != 2) | |
144 | return char.charCodeAt(0); | |
145 | return 0x10000 | (char.charCodeAt(0) & 0x03ff) << 10 | char.charCodeAt(1) & 0x03ff | |
146 | }; | |
147 | ||
148 | utf16.strlen = function(string) { | |
149 | return utf16.unit2point(string, [string.length])[0]; | |
150 | }; | |
151 | ||
152 | utf16.substr = function(string, start, length) { | |
153 | var stop; | |
154 | if (typeof length !== "undefined") | |
155 | stop = start + length; | |
156 | return utf16.substring(string, start, stop); | |
157 | }; | |
158 | ||
159 | utf16.substring = function(string, start, stop) { | |
160 | var range = utf16.point2unit(string, [start, stop]); | |
161 | return string.substring(range[0], range[1]); | |
162 | }; | |
163 | ||
164 | utf16.prototype.charAt = function(index) { | |
165 | return new utf16(utf16.charAt(this.string, index)); | |
166 | }; | |
167 | ||
168 | utf16.prototype.charCodeAt = function(index) { | |
169 | return utf16.charCodeAt(this.string, index); | |
170 | }; | |
171 | ||
172 | utf16.prototype.substr = function(start, length) { | |
173 | return new utf16(utf16.substr(this.string, start, length)); | |
174 | }; | |
175 | ||
176 | utf16.prototype.substring = function(start, end) { | |
177 | return new utf16(utf16.substring(this.string, start, end)); | |
178 | }; | |
179 | ||
180 | utf16.prototype.toString = function() { | |
181 | return this.string; | |
182 | }; | |
183 | ||
184 | return utf16; | |
185 | ||
186 | }); |