- /*
- * All the error handling should return a value
- * that needs count bytes so that UTF8_GET_CHAR_SAFE() works right.
- *
- * Starting with Unicode 3.0.1, non-shortest forms are illegal.
- * Starting with Unicode 3.2, surrogate code points must not be
- * encoded in UTF-8, and there are no irregular sequences any more.
- *
- * U8_ macros (new in ICU 2.4) return negative values for error conditions.
- */
-
- /* correct sequence - all trail bytes have (b7..b6)==(10)? */
- /* illegal is also set if count>=4 */
- if(illegal || (c)<utf8_minLegal[count] || (U_IS_SURROGATE(c) && strict!=-2)) {
- /* error handling */
- uint8_t errorCount=count;
- /* don't go beyond this sequence */
- i=*pi;
- while(count>0 && U8_IS_TRAIL(s[i])) {
- ++(i);
- --count;
- }
- if(strict>=0) {
- c=utf8_errorValue[errorCount-count];
- } else {
- c=U_SENTINEL;
- }
- } else if((strict)>0 && U_IS_UNICODE_NONCHAR(c)) {
- /* strict: forbid non-characters like U+fffe */
- c=utf8_errorValue[count];
- }
- } else /* too few bytes left */ {
- /* error handling */
- int32_t i0=i;
- /* don't just set (i)=(length) in case there is an illegal sequence */
- while((i)<(length) && U8_IS_TRAIL(s[i])) {
- ++(i);
- }
- if(strict>=0) {
- c=utf8_errorValue[i-i0];
- } else {
- c=U_SENTINEL;
- }