X-Git-Url: https://git.saurik.com/utf16js.git/blobdiff_plain/5951f915abb80a3b18a89fed5ffbcb3c7dcf956e..7eb3fa2818e5de34ea2291ed0fde00ecf2da58b7:/utf16.js diff --git a/utf16.js b/utf16.js index 89322ec..310bbfd 100644 --- a/utf16.js +++ b/utf16.js @@ -1,32 +1,79 @@ +/* utf16js - easy interfaces for Unicode compliance + * Copyright (C) 2012 Jay Freeman (saurik) +*/ + +/* Modified BSD License {{{ */ +/* + * Redistribution and use in source and binary + * forms, with or without modification, are permitted + * provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the + * above copyright notice, this list of conditions + * and the following disclaimer. + * 2. Redistributions in binary form must reproduce the + * above copyright notice, this list of conditions + * and the following disclaimer in the documentation + * and/or other materials provided with the + * distribution. + * 3. The name of the author may not be used to endorse + * or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR + * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ +/* }}} */ + if (typeof define !== 'function') { var define = require('amdefine')(module) } define(function(require) { -var decode = function(array, string) { +var utf16 = function(string) { + this.string = string; + this.length = utf16.strlen(string); +}; + +utf16.decode = function(string, points) { + if (typeof points === "undefined") + points = []; + for (var i = 0, e = string.length; i != e; ++i) { var unit = string.charCodeAt(i); var part = unit & 0xfc00; if (part == 0xdc00) return null; else if (part != 0xd800) - array.push(unit); + points.push(unit); else if (++i == e) return null; else { var next = string.charCodeAt(i); if ((next & 0xfc00) != 0xdc00) return null; - array.push(0x10000 | (unit & 0x03ff) << 10 | next & 0x03ff); + points.push(0x10000 | (unit & 0x03ff) << 10 | next & 0x03ff); } } - return array; + return points; }; -var encode = function(array) { +utf16.encode = function(points) { var units = []; - for (var i = 0, e = array.length; i != e; ++i) { - var point = array[i]; + for (var i = 0, e = points.length; i != e; ++i) { + var point = points[i]; if (point < 0x10000) units.push(point); else { @@ -36,9 +83,104 @@ var encode = function(array) { } return String.fromCharCode.apply(String, units); }; -return { - decode: decode, - encode: encode, +utf16.fromCharCode = function() { + return new utf16(utf16.encode(arguments), arguments.length); +}; + +utf16.adjust = function(mode, string, after) { + var before = after.slice(); + var offset = 0, index = 0; + + var check = function() { + var from = mode ? index : offset; + var to = mode ? offset : index; + + for (var o = 0, l = after.length; o != l; ++o) + if (before[o] == from) { + before[o] = null; + after[o] = to; + } + }; + + for (var e = string.length; index != e; ++index) { + check(); + + var unit = string.charCodeAt(index); + var part = unit & 0xfc00; + + if (part == 0xdc00) + return null; + else if (part != 0xd800); + else if (++index == e) + return null; + else { + var next = string.charCodeAt(index); + if ((next & 0xfc00) != 0xdc00) + return null; + } + + ++offset; + } + + check(); + return after; +}; + +utf16.point2unit = function(string, offsets) { + return utf16.adjust(false, string, offsets); +}; + +utf16.unit2point = function(string, offsets) { + return utf16.adjust(true, string, offsets); +}; + +utf16.charAt = function(string, offset) { + return utf16.substring(string, offset, offset + 1); +}; + +utf16.charCodeAt = function(string, offset) { + var char = utf16.charAt(string, offset); + if (char.length != 2) + return char.charCodeAt(0); + return 0x10000 | (char.charCodeAt(0) & 0x03ff) << 10 | char.charCodeAt(1) & 0x03ff +}; + +utf16.strlen = function(string) { + return utf16.unit2point(string, [string.length])[0]; +}; + +utf16.substr = function(string, start, length) { + var stop; + if (typeof length !== "undefined") + stop = start + length; + return utf16.substring(string, start, stop); +}; + +utf16.substring = function(string, start, stop) { + var range = utf16.point2unit(string, [start, stop]); + return string.substring(range[0], range[1]); +}; + +utf16.prototype.charAt = function(index) { + return new utf16(utf16.charAt(this.string, index)); +}; + +utf16.prototype.charCodeAt = function(index) { + return utf16.charCodeAt(this.string, index); +}; + +utf16.prototype.substr = function(start, length) { + return new utf16(utf16.substr(this.string, start, length)); +}; + +utf16.prototype.substring = function(start, end) { + return new utf16(utf16.substring(this.string, start, end)); }; +utf16.prototype.toString = function() { + return this.string; +}; + +return utf16; + });