]> git.saurik.com Git - utf16js.git/blob - utf16.js
ddaae933016378f56e81624b9f565014a06e8771
[utf16js.git] / utf16.js
1 /* utf16js - easy interfaces for Unicode compliance
2 * Copyright (C) 2012 Jay Freeman (saurik)
3 */
4
5 /* Modified BSD License {{{ */
6 /*
7 * Redistribution and use in source and binary
8 * forms, with or without modification, are permitted
9 * provided that the following conditions are met:
10 *
11 * 1. Redistributions of source code must retain the
12 * above copyright notice, this list of conditions
13 * and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the
15 * above copyright notice, this list of conditions
16 * and the following disclaimer in the documentation
17 * and/or other materials provided with the
18 * distribution.
19 * 3. The name of the author may not be used to endorse
20 * or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS''
24 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
25 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
26 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
28 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
29 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
30 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
31 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
34 * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
35 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
36 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 */
38 /* }}} */
39
40 if (typeof define !== 'function') { var define = require('amdefine')(module) }
41
42 define([], function() {
43
44 var utf16 = function(string) {
45 this.string = string;
46 this.length = utf16.strlen(string);
47 };
48
49 utf16.decode = function(string, points) {
50 if (typeof points === "undefined")
51 points = [];
52
53 for (var i = 0, e = string.length; i != e; ++i) {
54 var unit = string.charCodeAt(i);
55 var part = unit & 0xfc00;
56 if (part == 0xdc00)
57 return null;
58 else if (part != 0xd800)
59 points.push(unit);
60 else if (++i == e)
61 return null;
62 else {
63 var next = string.charCodeAt(i);
64 if ((next & 0xfc00) != 0xdc00)
65 return null;
66 points.push(0x10000 | (unit & 0x03ff) << 10 | next & 0x03ff);
67 }
68 }
69
70 return points;
71 };
72
73 utf16.encode = function(points) {
74 var units = [];
75 for (var i = 0, e = points.length; i != e; ++i) {
76 var point = points[i];
77 if (point < 0x10000)
78 units.push(point);
79 else {
80 point -= 0x10000;
81 units.push(0xd800 | (0xffc00 & point) >> 10, 0xdc00 | 0x03ff & point);
82 }
83 } return String.fromCharCode.apply(String, units);
84 };
85
86 utf16.fromCharCode = function() {
87 return new utf16(utf16.encode(arguments), arguments.length);
88 };
89
90 utf16.adjust = function(mode, string, after) {
91 var before = after.slice();
92 var offset = 0, index = 0;
93
94 var check = function() {
95 var from = mode ? index : offset;
96 var to = mode ? offset : index;
97
98 for (var o = 0, l = after.length; o != l; ++o)
99 if (before[o] == from) {
100 before[o] = null;
101 after[o] = to;
102 }
103 };
104
105 for (var e = string.length; index != e; ++index) {
106 check();
107
108 var unit = string.charCodeAt(index);
109 var part = unit & 0xfc00;
110
111 if (part == 0xdc00)
112 return null;
113 else if (part != 0xd800);
114 else if (++index == e)
115 return null;
116 else {
117 var next = string.charCodeAt(index);
118 if ((next & 0xfc00) != 0xdc00)
119 return null;
120 }
121
122 ++offset;
123 }
124
125 check();
126 return after;
127 };
128
129 utf16.point2unit = function(string, offsets) {
130 return utf16.adjust(false, string, offsets);
131 };
132
133 utf16.unit2point = function(string, offsets) {
134 return utf16.adjust(true, string, offsets);
135 };
136
137 utf16.charAt = function(string, offset) {
138 return utf16.substring(string, offset, offset + 1);
139 };
140
141 utf16.charCodeAt = function(string, offset) {
142 var char = utf16.charAt(string, offset);
143 if (char.length != 2)
144 return char.charCodeAt(0);
145 return 0x10000 | (char.charCodeAt(0) & 0x03ff) << 10 | char.charCodeAt(1) & 0x03ff
146 };
147
148 utf16.strlen = function(string) {
149 return utf16.unit2point(string, [string.length])[0];
150 };
151
152 utf16.substr = function(string, start, length) {
153 var stop;
154 if (typeof length !== "undefined")
155 stop = start + length;
156 return utf16.substring(string, start, stop);
157 };
158
159 utf16.substring = function(string, start, stop) {
160 var range = utf16.point2unit(string, [start, stop]);
161 return string.substring(range[0], range[1]);
162 };
163
164 utf16.prototype.charAt = function(index) {
165 return new utf16(utf16.charAt(this.string, index));
166 };
167
168 utf16.prototype.charCodeAt = function(index) {
169 return utf16.charCodeAt(this.string, index);
170 };
171
172 utf16.prototype.substr = function(start, length) {
173 return new utf16(utf16.substr(this.string, start, length));
174 };
175
176 utf16.prototype.substring = function(start, end) {
177 return new utf16(utf16.substring(this.string, start, end));
178 };
179
180 utf16.prototype.toString = function() {
181 return this.string;
182 };
183
184 return utf16;
185
186 });