- // mask and value of lead byte's most significant bits, by length:
- static const unsigned char leadMarkerMask[] = { 0x80, 0xE0, 0xF0, 0xF8 };
- static const unsigned char leadMarkerVal[] = { 0x00, 0xC0, 0xE0, 0xF0 };
+ // Char. number range | UTF-8 octet sequence
+ // (hexadecimal) | (binary)
+ // ----------------------+----------------------------------------
+ // 0000 0000 - 0000 007F | 0xxxxxxx
+ // 0000 0080 - 0000 07FF | 110xxxxx 10xxxxxx
+ // 0000 0800 - 0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
+ // 0001 0000 - 0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+ //
+ // Code point value is stored in bits marked with 'x',
+ // lowest-order bit of the value on the right side in the diagram
+ // above. (from RFC 3629)