]> git.saurik.com Git - utf16js.git/blob - utf16.js
87f9b910e870700fa55c9fb027cf507344750834
[utf16js.git] / utf16.js
1 if (typeof define !== 'function') { var define = require('amdefine')(module) }
2
3 define(function(require) {
4
5 var decode = function(string, points) {
6 if (typeof points === "undefined")
7 points = [];
8
9 for (var i = 0, e = string.length; i != e; ++i) {
10 var unit = string.charCodeAt(i);
11 var part = unit & 0xfc00;
12 if (part == 0xdc00)
13 return null;
14 else if (part != 0xd800)
15 points.push(unit);
16 else if (++i == e)
17 return null;
18 else {
19 var next = string.charCodeAt(i);
20 if ((next & 0xfc00) != 0xdc00)
21 return null;
22 points.push(0x10000 | (unit & 0x03ff) << 10 | next & 0x03ff);
23 }
24 }
25
26 return points;
27 };
28
29 var encode = function(points) {
30 var units = [];
31 for (var i = 0, e = points.length; i != e; ++i) {
32 var point = points[i];
33 if (point < 0x10000)
34 units.push(point);
35 else {
36 point -= 0x10000;
37 units.push(0xd800 | (0xffc00 & point) >> 10, 0xdc00 | 0x03ff & point);
38 }
39 } return String.fromCharCode.apply(String, units);
40 };
41
42 var adjust = function(mode, string, after) {
43 var before = after.slice();
44 var offset = 0, index = 0;
45
46 var check = function() {
47 var from = mode ? index : offset;
48 var to = mode ? offset : index;
49
50 for (var o = 0, l = after.length; o != l; ++o)
51 if (before[o] == from) {
52 before[o] = null;
53 after[o] = to;
54 }
55 };
56
57 for (var e = string.length; index != e; ++index) {
58 check();
59
60 var unit = string.charCodeAt(index);
61 var part = unit & 0xfc00;
62
63 if (part == 0xdc00)
64 return null;
65 else if (part != 0xd800);
66 else if (++index == e)
67 return null;
68 else {
69 var next = string.charCodeAt(index);
70 if ((next & 0xfc00) != 0xdc00)
71 return null;
72 }
73
74 ++offset;
75 }
76
77 check();
78 return after;
79 };
80
81 var point2unit = function(string, offsets) {
82 return adjust(false, string, offsets);
83 };
84
85 var unit2point = function(string, offsets) {
86 return adjust(true, string, offsets);
87 };
88
89 var charAt = function(string, offset) {
90 return substring(string, offset, offset + 1);
91 };
92
93 var charCodeAt = function(string, offset) {
94 var char = charAt(string, offset);
95 if (char.length != 2)
96 return char.charCodeAt(0);
97 return 0x10000 | (char.charCodeAt(0) & 0x03ff) << 10 | char.charCodeAt(1) & 0x03ff
98 };
99
100 var length = function(string) {
101 return unit2point(string, [string.length])[0];
102 };
103
104 var substr = function(string, start, length) {
105 var stop;
106 if (typeof length !== "undefined")
107 stop = start + length;
108 return substring(string, start, stop);
109 };
110
111 var substring = function(string, start, stop) {
112 var range = point2unit(string, [start, stop]);
113 return string.substring(range[0], range[1]);
114 };
115
116 var Unicode = function(string) {
117 this.string = string;
118 this.length = length(string);
119 };
120
121 Unicode.fromCharCode = function() {
122 return encode(arguments);
123 };
124
125 Unicode.prototype.charAt = function(index) {
126 return new Unicode(charAt(this.string, index));
127 };
128
129 Unicode.prototype.charCodeAt = function(index) {
130 return charCodeAt(this.string, index);
131 };
132
133 Unicode.prototype.substr = function(start, length) {
134 return new Unicode(substr(this.string, start, length));
135 };
136
137 Unicode.prototype.substring = function(start, end) {
138 return new Unicode(substring(this.string, start, end));
139 };
140
141 Unicode.prototype.toString = function() {
142 return this.string;
143 };
144
145 return {
146 decode: decode,
147 encode: encode,
148
149 point2unit: point2unit,
150 unit2point: unit2point,
151
152 charAt: charAt,
153 charCodeAt: charCodeAt,
154 length: length,
155 substr: substr,
156 substring: substring,
157
158 Unicode: Unicode,
159 };
160
161 });