|
@@ -4012,6 +4012,53 @@ test_util_string_is_C_identifier(void *ptr)
|
|
|
;
|
|
|
}
|
|
|
|
|
|
+static void
|
|
|
+test_util_string_is_utf8(void *ptr)
|
|
|
+{
|
|
|
+ (void)ptr;
|
|
|
+
|
|
|
+ tt_int_op(1, OP_EQ, string_is_utf8(NULL, 0));
|
|
|
+ tt_int_op(1, OP_EQ, string_is_utf8("", 1));
|
|
|
+ tt_int_op(1, OP_EQ, string_is_utf8("\uFEFF", 3));
|
|
|
+ tt_int_op(1, OP_EQ, string_is_utf8("\uFFFE", 3));
|
|
|
+ tt_int_op(1, OP_EQ, string_is_utf8("ascii\x7f\n", 7));
|
|
|
+ tt_int_op(1, OP_EQ, string_is_utf8("Risqu\u00e9=1", 9));
|
|
|
+
|
|
|
+ // Validate exactly 'len' bytes.
|
|
|
+ tt_int_op(0, OP_EQ, string_is_utf8("\0\x80", 2));
|
|
|
+ tt_int_op(0, OP_EQ, string_is_utf8("Risqu\u00e9=1", 6));
|
|
|
+
|
|
|
+ // Reject sequences with missing bytes.
|
|
|
+ tt_int_op(0, OP_EQ, string_is_utf8("\x80", 1));
|
|
|
+ tt_int_op(0, OP_EQ, string_is_utf8("\xc2", 1));
|
|
|
+ tt_int_op(0, OP_EQ, string_is_utf8("\xc2 ", 2));
|
|
|
+ tt_int_op(0, OP_EQ, string_is_utf8("\xe1\x80", 2));
|
|
|
+ tt_int_op(0, OP_EQ, string_is_utf8("\xe1\x80 ", 3));
|
|
|
+ tt_int_op(0, OP_EQ, string_is_utf8("\xf1\x80\x80", 3));
|
|
|
+ tt_int_op(0, OP_EQ, string_is_utf8("\xf1\x80\x80 ", 4));
|
|
|
+
|
|
|
+ // Reject encodings that are overly long.
|
|
|
+ tt_int_op(0, OP_EQ, string_is_utf8("\xc1\xbf", 2));
|
|
|
+ tt_int_op(1, OP_EQ, string_is_utf8("\xc2\x80", 2));
|
|
|
+ tt_int_op(0, OP_EQ, string_is_utf8("\xe0\x9f\xbf", 3));
|
|
|
+ tt_int_op(1, OP_EQ, string_is_utf8("\xe0\xa0\x80", 3));
|
|
|
+ tt_int_op(0, OP_EQ, string_is_utf8("\xf0\x8f\xbf\xbf", 4));
|
|
|
+ tt_int_op(1, OP_EQ, string_is_utf8("\xf0\x90\x80\x80", 4));
|
|
|
+
|
|
|
+ // Reject UTF-16 surrogate halves.
|
|
|
+ tt_int_op(1, OP_EQ, string_is_utf8("\xed\x9f\xbf", 3));
|
|
|
+ tt_int_op(0, OP_EQ, string_is_utf8("\xed\xa0\x80", 3));
|
|
|
+ tt_int_op(0, OP_EQ, string_is_utf8("\xed\xbf\xbf", 3));
|
|
|
+ tt_int_op(1, OP_EQ, string_is_utf8("\xee\x80\x80", 3));
|
|
|
+
|
|
|
+ // The maximum legal codepoint, 10FFFF.
|
|
|
+ tt_int_op(1, OP_EQ, string_is_utf8("\xf4\x8f\xbf\xbf", 4));
|
|
|
+ tt_int_op(0, OP_EQ, string_is_utf8("\xf4\x90\x80\x80", 4));
|
|
|
+
|
|
|
+ done:
|
|
|
+ ;
|
|
|
+}
|
|
|
+
|
|
|
static void
|
|
|
test_util_asprintf(void *ptr)
|
|
|
{
|
|
@@ -6409,6 +6456,7 @@ struct testcase_t util_tests[] = {
|
|
|
UTIL_TEST(clamp_double_to_int64, 0),
|
|
|
UTIL_TEST(find_str_at_start_of_line, 0),
|
|
|
UTIL_TEST(string_is_C_identifier, 0),
|
|
|
+ UTIL_TEST(string_is_utf8, 0),
|
|
|
UTIL_TEST(asprintf, 0),
|
|
|
UTIL_TEST(listdir, 0),
|
|
|
UTIL_TEST(parent_dir, 0),
|