X-Git-Url: https://git.saurik.com/utf16js.git/blobdiff_plain/5951f915abb80a3b18a89fed5ffbcb3c7dcf956e..7267a723a4383894c065276e2e43cc7b386d8f48:/utf16.js diff --git a/utf16.js b/utf16.js index 89322ec..b76fc71 100644 --- a/utf16.js +++ b/utf16.js @@ -2,31 +2,39 @@ if (typeof define !== 'function') { var define = require('amdefine')(module) } define(function(require) { -var decode = function(array, string) { +var utf16 = function(string) { + this.string = string; + this.length = utf16.strlen(string); +}; + +utf16.decode = function(string, points) { + if (typeof points === "undefined") + points = []; + for (var i = 0, e = string.length; i != e; ++i) { var unit = string.charCodeAt(i); var part = unit & 0xfc00; if (part == 0xdc00) return null; else if (part != 0xd800) - array.push(unit); + points.push(unit); else if (++i == e) return null; else { var next = string.charCodeAt(i); if ((next & 0xfc00) != 0xdc00) return null; - array.push(0x10000 | (unit & 0x03ff) << 10 | next & 0x03ff); + points.push(0x10000 | (unit & 0x03ff) << 10 | next & 0x03ff); } } - return array; + return points; }; -var encode = function(array) { +utf16.encode = function(points) { var units = []; - for (var i = 0, e = array.length; i != e; ++i) { - var point = array[i]; + for (var i = 0, e = points.length; i != e; ++i) { + var point = points[i]; if (point < 0x10000) units.push(point); else { @@ -36,9 +44,104 @@ var encode = function(array) { } return String.fromCharCode.apply(String, units); }; -return { - decode: decode, - encode: encode, +utf16.fromCharCode = function() { + return new utf16(utf16.encode(arguments), arguments.length); +}; + +utf16.adjust = function(mode, string, after) { + var before = after.slice(); + var offset = 0, index = 0; + + var check = function() { + var from = mode ? index : offset; + var to = mode ? offset : index; + + for (var o = 0, l = after.length; o != l; ++o) + if (before[o] == from) { + before[o] = null; + after[o] = to; + } + }; + + for (var e = string.length; index != e; ++index) { + check(); + + var unit = string.charCodeAt(index); + var part = unit & 0xfc00; + + if (part == 0xdc00) + return null; + else if (part != 0xd800); + else if (++index == e) + return null; + else { + var next = string.charCodeAt(index); + if ((next & 0xfc00) != 0xdc00) + return null; + } + + ++offset; + } + + check(); + return after; +}; + +utf16.point2unit = function(string, offsets) { + return utf16.adjust(false, string, offsets); }; +utf16.unit2point = function(string, offsets) { + return utf16.adjust(true, string, offsets); +}; + +utf16.charAt = function(string, offset) { + return utf16.substring(string, offset, offset + 1); +}; + +utf16.charCodeAt = function(string, offset) { + var char = utf16.charAt(string, offset); + if (char.length != 2) + return char.charCodeAt(0); + return 0x10000 | (char.charCodeAt(0) & 0x03ff) << 10 | char.charCodeAt(1) & 0x03ff +}; + +utf16.strlen = function(string) { + return utf16.unit2point(string, [string.length])[0]; +}; + +utf16.substr = function(string, start, length) { + var stop; + if (typeof length !== "undefined") + stop = start + length; + return utf16.substring(string, start, stop); +}; + +utf16.substring = function(string, start, stop) { + var range = utf16.point2unit(string, [start, stop]); + return string.substring(range[0], range[1]); +}; + +utf16.prototype.charAt = function(index) { + return new utf16(utf16.charAt(this.string, index)); +}; + +utf16.prototype.charCodeAt = function(index) { + return utf16.charCodeAt(this.string, index); +}; + +utf16.prototype.substr = function(start, length) { + return new utf16(utf16.substr(this.string, start, length)); +}; + +utf16.prototype.substring = function(start, end) { + return new utf16(utf16.substring(this.string, start, end)); +}; + +utf16.prototype.toString = function() { + return this.string; +}; + +return utf16; + });