- for(offset=0; offset<(int32_t)sizeof(input); offset++){
- setOffset=offset;
- UTF8_SET_CHAR_START_UNSAFE(input, setOffset);
- if(setOffset != start_unsafe[i]){
- log_err("ERROR: UTF8_SET_CHAR_START_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_unsafe[i], setOffset);
- }
-
- setOffset=offset;
- U8_SET_CP_START_UNSAFE(input, setOffset);
- if(setOffset != start_unsafe[i]){
- log_err("ERROR: U8_SET_CP_START_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_unsafe[i], setOffset);
- }
-
- setOffset=offset;
- UTF8_SET_CHAR_START_SAFE(input, 0, setOffset);
- if(setOffset != start_safe[i]){
- log_err("ERROR: UTF8_SET_CHAR_START_SAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_safe[i], setOffset);
- }
-
- setOffset=offset;
- U8_SET_CP_START(input, 0, setOffset);
- if(setOffset != start_safe[i]){
- log_err("ERROR: U8_SET_CP_START failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_safe[i], setOffset);
- }
-
- if (offset != 0) { /* Can't have it go off the end of the array */
- setOffset=offset;
- UTF8_SET_CHAR_LIMIT_UNSAFE(input, setOffset);
- if(setOffset != limit_unsafe[i]){
- log_err("ERROR: UTF8_SET_CHAR_LIMIT_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, limit_unsafe[i], setOffset);
- }
-
- setOffset=offset;
- U8_SET_CP_LIMIT_UNSAFE(input, setOffset);
- if(setOffset != limit_unsafe[i]){
- log_err("ERROR: U8_SET_CP_LIMIT_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, limit_unsafe[i], setOffset);
- }
- }
-
- setOffset=offset;
- UTF8_SET_CHAR_LIMIT_SAFE(input,0, setOffset, sizeof(input));
- if(setOffset != limit_safe[i]){
- log_err("ERROR: UTF8_SET_CHAR_LIMIT_SAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, limit_safe[i], setOffset);
- }
-
- setOffset=offset;
- U8_SET_CP_LIMIT(input,0, setOffset, sizeof(input));
- if(setOffset != limit_safe[i]){
- log_err("ERROR: U8_SET_CP_LIMIT failed for offset=%ld. Expected:%ld Got:%ld\n", offset, limit_safe[i], setOffset);
- }
-
- i++;
+ for(offset=0; offset<=UPRV_LENGTHOF(input); offset++){
+ if (offset<UPRV_LENGTHOF(input)){
+#if !U_HIDE_OBSOLETE_UTF_OLD_H
+ setOffset=offset;
+ UTF8_SET_CHAR_START_SAFE(input, 0, setOffset);
+ if(setOffset != start_safe[i]){
+ log_err("ERROR: UTF8_SET_CHAR_START_SAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_safe[i], setOffset);
+ }
+#endif
+ setOffset=offset;
+ U8_SET_CP_START(input, 0, setOffset);
+ if(setOffset != start_safe[i]){
+ log_err("ERROR: U8_SET_CP_START failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_safe[i], setOffset);
+ }
+ }
+#if !U_HIDE_OBSOLETE_UTF_OLD_H
+ setOffset=offset;
+ UTF8_SET_CHAR_LIMIT_SAFE(input,0, setOffset, sizeof(input));
+ if(setOffset != limit_safe[i]){
+ log_err("ERROR: UTF8_SET_CHAR_LIMIT_SAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, limit_safe[i], setOffset);
+ }
+#endif
+ setOffset=offset;
+ U8_SET_CP_LIMIT(input,0, setOffset, sizeof(input));
+ if(setOffset != limit_safe[i]){
+ log_err("ERROR: U8_SET_CP_LIMIT failed for offset=%ld. Expected:%ld Got:%ld\n", offset, limit_safe[i], setOffset);
+ }
+
+ i++;
+ }
+}
+
+static void TestSetCharUnsafe() {
+ static const uint8_t input[]
+ = {0x61, 0xe4, 0xba, 0x8c, 0x7f, 0x2e, 0x62, 0xc5, 0x7f, 0x61, 0x80, 0x80, 0xe0, 0x80, 0x80, 0x00 };
+ static const int16_t start_unsafe[]
+ = {0, 1, 1, 1, 4, 5, 6, 7, 8, 9, 9, 9, 12, 12, 12, 15 };
+ static const int16_t limit_unsafe[]
+ = {0, 1, 4, 4, 4, 5, 6, 7, 9, 9, 10, 10, 10, 15, 15, 15, 16 };
+
+ uint32_t i=0;
+ int32_t offset=0, setOffset=0;
+ for(offset=0; offset<=UPRV_LENGTHOF(input); offset++){
+ if (offset<UPRV_LENGTHOF(input)){
+#if !U_HIDE_OBSOLETE_UTF_OLD_H
+ setOffset=offset;
+ UTF8_SET_CHAR_START_UNSAFE(input, setOffset);
+ if(setOffset != start_unsafe[i]){
+ log_err("ERROR: UTF8_SET_CHAR_START_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_unsafe[i], setOffset);
+ }
+#endif
+ setOffset=offset;
+ U8_SET_CP_START_UNSAFE(input, setOffset);
+ if(setOffset != start_unsafe[i]){
+ log_err("ERROR: U8_SET_CP_START_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_unsafe[i], setOffset);
+ }
+ }
+
+ if (offset != 0) { /* Can't have it go off the end of the array */
+#if !U_HIDE_OBSOLETE_UTF_OLD_H
+ setOffset=offset;
+ UTF8_SET_CHAR_LIMIT_UNSAFE(input, setOffset);
+ if(setOffset != limit_unsafe[i]){
+ log_err("ERROR: UTF8_SET_CHAR_LIMIT_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, limit_unsafe[i], setOffset);
+ }
+#endif
+ setOffset=offset;
+ U8_SET_CP_LIMIT_UNSAFE(input, setOffset);
+ if(setOffset != limit_unsafe[i]){
+ log_err("ERROR: U8_SET_CP_LIMIT_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, limit_unsafe[i], setOffset);
+ }
+ }
+
+ i++;
+ }
+}
+
+static void TestTruncateIfIncomplete() {
+ // Difference from U8_SET_CP_START():
+ // U8_TRUNCATE_IF_INCOMPLETE() does not look at s[length].
+ // Therefore, if the last byte is a lead byte, then this macro truncates
+ // even if the byte at the input index cannot continue a valid sequence
+ // (including when that is not a trail byte).
+ // On the other hand, if the last byte is a trail byte, then the two macros behave the same.
+ static const struct {
+ const char *s;
+ int32_t expected;
+ } cases[] = {
+ { "", 0 },
+ { "a", 1 },
+ { "\x80", 1 },
+ { "\xC1", 1 },
+ { "\xC2", 0 },
+ { "\xE0", 0 },
+ { "\xF4", 0 },
+ { "\xF5", 1 },
+ { "\x80\x80", 2 },
+ { "\xC2\xA0", 2 },
+ { "\xE0\x9F", 2 },
+ { "\xE0\xA0", 0 },
+ { "\xED\x9F", 0 },
+ { "\xED\xA0", 2 },
+ { "\xF0\x8F", 2 },
+ { "\xF0\x90", 0 },
+ { "\xF4\x8F", 0 },
+ { "\xF4\x90", 2 },
+ { "\xF5\x80", 2 },
+ { "\x80\x80\x80", 3 },
+ { "\xC2\xA0\x80", 3 },
+ { "\xE0\xA0\x80", 3 },
+ { "\xF0\x8F\x80", 3 },
+ { "\xF0\x90\x80", 0 },
+ { "\xF4\x8F\x80", 0 },
+ { "\xF4\x90\x80", 3 },
+ { "\xF5\x80\x80", 3 },
+ { "\x80\x80\x80\x80", 4 },
+ { "\xC2\xA0\x80\x80", 4 },
+ { "\xE0\xA0\x80\x80", 4 },
+ { "\xF0\x90\x80\x80", 4 },
+ { "\xF5\x80\x80\x80", 4 }
+ };
+ int32_t i;
+ for (i = 0; i < UPRV_LENGTHOF(cases); ++i) {
+ const char *s = cases[i].s;
+ int32_t expected = cases[i].expected;
+ int32_t length = (int32_t)strlen(s);
+ int32_t adjusted = length;
+ U8_TRUNCATE_IF_INCOMPLETE(s, 0, adjusted);
+ if (adjusted != expected) {
+ log_err("ERROR: U8_TRUNCATE_IF_INCOMPLETE failed for i=%d, length=%d. Expected:%d Got:%d\n",
+ (int)i, (int)length, (int)expected, (int)adjusted);
+ }