]> git.saurik.com Git - utf16js.git/blob - utf16.js
b76fc714864106a53a8b96fc1e7ec75c281a14a6
[utf16js.git] / utf16.js
1 if (typeof define !== 'function') { var define = require('amdefine')(module) }
2
3 define(function(require) {
4
5 var utf16 = function(string) {
6 this.string = string;
7 this.length = utf16.strlen(string);
8 };
9
10 utf16.decode = function(string, points) {
11 if (typeof points === "undefined")
12 points = [];
13
14 for (var i = 0, e = string.length; i != e; ++i) {
15 var unit = string.charCodeAt(i);
16 var part = unit & 0xfc00;
17 if (part == 0xdc00)
18 return null;
19 else if (part != 0xd800)
20 points.push(unit);
21 else if (++i == e)
22 return null;
23 else {
24 var next = string.charCodeAt(i);
25 if ((next & 0xfc00) != 0xdc00)
26 return null;
27 points.push(0x10000 | (unit & 0x03ff) << 10 | next & 0x03ff);
28 }
29 }
30
31 return points;
32 };
33
34 utf16.encode = function(points) {
35 var units = [];
36 for (var i = 0, e = points.length; i != e; ++i) {
37 var point = points[i];
38 if (point < 0x10000)
39 units.push(point);
40 else {
41 point -= 0x10000;
42 units.push(0xd800 | (0xffc00 & point) >> 10, 0xdc00 | 0x03ff & point);
43 }
44 } return String.fromCharCode.apply(String, units);
45 };
46
47 utf16.fromCharCode = function() {
48 return new utf16(utf16.encode(arguments), arguments.length);
49 };
50
51 utf16.adjust = function(mode, string, after) {
52 var before = after.slice();
53 var offset = 0, index = 0;
54
55 var check = function() {
56 var from = mode ? index : offset;
57 var to = mode ? offset : index;
58
59 for (var o = 0, l = after.length; o != l; ++o)
60 if (before[o] == from) {
61 before[o] = null;
62 after[o] = to;
63 }
64 };
65
66 for (var e = string.length; index != e; ++index) {
67 check();
68
69 var unit = string.charCodeAt(index);
70 var part = unit & 0xfc00;
71
72 if (part == 0xdc00)
73 return null;
74 else if (part != 0xd800);
75 else if (++index == e)
76 return null;
77 else {
78 var next = string.charCodeAt(index);
79 if ((next & 0xfc00) != 0xdc00)
80 return null;
81 }
82
83 ++offset;
84 }
85
86 check();
87 return after;
88 };
89
90 utf16.point2unit = function(string, offsets) {
91 return utf16.adjust(false, string, offsets);
92 };
93
94 utf16.unit2point = function(string, offsets) {
95 return utf16.adjust(true, string, offsets);
96 };
97
98 utf16.charAt = function(string, offset) {
99 return utf16.substring(string, offset, offset + 1);
100 };
101
102 utf16.charCodeAt = function(string, offset) {
103 var char = utf16.charAt(string, offset);
104 if (char.length != 2)
105 return char.charCodeAt(0);
106 return 0x10000 | (char.charCodeAt(0) & 0x03ff) << 10 | char.charCodeAt(1) & 0x03ff
107 };
108
109 utf16.strlen = function(string) {
110 return utf16.unit2point(string, [string.length])[0];
111 };
112
113 utf16.substr = function(string, start, length) {
114 var stop;
115 if (typeof length !== "undefined")
116 stop = start + length;
117 return utf16.substring(string, start, stop);
118 };
119
120 utf16.substring = function(string, start, stop) {
121 var range = utf16.point2unit(string, [start, stop]);
122 return string.substring(range[0], range[1]);
123 };
124
125 utf16.prototype.charAt = function(index) {
126 return new utf16(utf16.charAt(this.string, index));
127 };
128
129 utf16.prototype.charCodeAt = function(index) {
130 return utf16.charCodeAt(this.string, index);
131 };
132
133 utf16.prototype.substr = function(start, length) {
134 return new utf16(utf16.substr(this.string, start, length));
135 };
136
137 utf16.prototype.substring = function(start, end) {
138 return new utf16(utf16.substring(this.string, start, end));
139 };
140
141 utf16.prototype.toString = function() {
142 return this.string;
143 };
144
145 return utf16;
146
147 });