+ size_t len = 0;
+
+ while (*psz && ((!buf) || (len<n))) {
+ unsigned char cc=*psz++, fc=cc;
+ unsigned cnt;
+ for (cnt=0; fc&0x80; cnt++) fc<<=1;
+ if (!cnt) {
+ // plain ASCII char
+ if (buf) *buf++=cc;
+ len++;
+ } else {
+ cnt--;
+ if (!cnt) {
+ // invalid UTF-8 sequence
+ return (size_t)-1;
+ } else {
+ unsigned ocnt=cnt-1;
+ unsigned long res=cc&(0x3f>>cnt);
+ while (cnt--) {
+ cc = *psz++;
+ if ((cc&0xC0)!=0x80) {
+ // invalid UTF-8 sequence
+ return (size_t)-1;
+ }
+ res=(res<<6)|(cc&0x3f);
+ }
+ if (res<=utf8_max[ocnt]) {
+ // illegal UTF-8 encoding
+ return (size_t)-1;
+ }
+ if (buf) *buf++=res;
+ len++;
+ }
+ }
+ }
+ if (buf && (len<n)) *buf = 0;
+ return len;