utf8: add unit test for g_utf8_make_valid
[glib.git] / glib / tests / utf8-pointer.c
blob6fa4b4c0ef1efd901216cfc08cc70d109d6fbe7e
1 /* GLIB - Library of useful routines for C programming
2 * Copyright (C) 1995-1997 Peter Mattis, Spencer Kimball and Josh MacDonald
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 * Modified by the GLib Team and others 1997-2000. See the AUTHORS
20 * file for a list of people on the GLib Team. See the ChangeLog
21 * files for a list of changes. These files are distributed with
22 * GLib at ftp://ftp.gtk.org/pub/gtk/.
25 #include <string.h>
26 #include <glib.h>
28 /* Test conversions between offsets and pointers */
30 static void test_utf8 (gconstpointer d)
32 gint num_chars;
33 const gchar **p;
34 gint i, j;
35 const gchar *string = d;
37 g_assert (g_utf8_validate (string, -1, NULL));
39 num_chars = g_utf8_strlen (string, -1);
41 p = (const gchar **) g_malloc (num_chars * sizeof (gchar *));
43 p[0] = string;
44 for (i = 1; i < num_chars; i++)
45 p[i] = g_utf8_next_char (p[i-1]);
47 for (i = 0; i < num_chars; i++)
48 for (j = 0; j < num_chars; j++)
50 g_assert (g_utf8_offset_to_pointer (p[i], j - i) == p[j]);
51 g_assert (g_utf8_pointer_to_offset (p[i], p[j]) == j - i);
54 g_free (p);
57 gchar *longline = "asdasdas dsaf asfd as fdasdf asfd asdf as dfas dfasdf a"
58 "asd fasdf asdf asdf asd fasfd as fdasfd asdf as fdççççççççças ffsd asfd as fdASASASAs As"
59 "Asfdsf sdfg sdfg dsfg dfg sdfgsdfgsdfgsdfg sdfgsdfg sdfg sdfg sdf gsdfg sdfg sd"
60 "asd fasdf asdf asdf asd fasfd as fdaèèèèèèè òòòòòòòòòòòòsfd asdf as fdas ffsd asfd as fdASASASAs D"
61 "Asfdsf sdfg sdfg dsfg dfg sdfgsdfgsdfgsdfg sdfgsdfg sdfgùùùùùùùùùùùùùù sdfg sdf gsdfg sdfg sd"
62 "asd fasdf asdf asdf asd fasfd as fdasfd asd@@@@@@@f as fdas ffsd asfd as fdASASASAs D "
63 "Asfdsf sdfg sdfg dsfg dfg sdfgsdfgsdfgsdfg sdfgsdf€€€€€€€€€€€€€€€€€€g sdfg sdfg sdf gsdfg sdfg sd"
64 "asd fasdf asdf asdf asd fasfd as fdasfd asdf as fdas ffsd asfd as fdASASASAs D"
65 "Asfdsf sdfg sdfg dsfg dfg sdfgsdfgsdfgsdfg sdfgsdfg sdfg sdfg sdf gsdfg sdfg sd\n\nlalala\n";
67 static void
68 test_length (void)
70 g_assert (g_utf8_strlen ("1234", -1) == 4);
71 g_assert (g_utf8_strlen ("1234", 0) == 0);
72 g_assert (g_utf8_strlen ("1234", 1) == 1);
73 g_assert (g_utf8_strlen ("1234", 2) == 2);
74 g_assert (g_utf8_strlen ("1234", 3) == 3);
75 g_assert (g_utf8_strlen ("1234", 4) == 4);
76 g_assert (g_utf8_strlen ("1234", 5) == 4);
78 g_assert (g_utf8_strlen (longline, -1) == 762);
79 g_assert (g_utf8_strlen (longline, strlen (longline)) == 762);
80 g_assert (g_utf8_strlen (longline, 1024) == 762);
82 g_assert (g_utf8_strlen (NULL, 0) == 0);
84 g_assert (g_utf8_strlen ("a\340\250\201c", -1) == 3);
85 g_assert (g_utf8_strlen ("a\340\250\201c", 1) == 1);
86 g_assert (g_utf8_strlen ("a\340\250\201c", 2) == 1);
87 g_assert (g_utf8_strlen ("a\340\250\201c", 3) == 1);
88 g_assert (g_utf8_strlen ("a\340\250\201c", 4) == 2);
89 g_assert (g_utf8_strlen ("a\340\250\201c", 5) == 3);
92 static void
93 test_find (void)
95 /* U+0B0B Oriya Letter Vocalic R (\340\254\213)
96 * U+10900 Phoenician Letter Alf (\360\220\244\200)
97 * U+0041 Latin Capital Letter A (\101)
98 * U+1EB6 Latin Capital Letter A With Breve And Dot Below (\341\272\266)
100 const gchar *str = "\340\254\213\360\220\244\200\101\341\272\266\0\101";
101 const gchar *p = str + strlen (str);
102 const gchar *q;
104 q = g_utf8_find_prev_char (str, p);
105 g_assert (q == str + 8);
106 q = g_utf8_find_prev_char (str, q);
107 g_assert (q == str + 7);
108 q = g_utf8_find_prev_char (str, q);
109 g_assert (q == str + 3);
110 q = g_utf8_find_prev_char (str, q);
111 g_assert (q == str);
112 q = g_utf8_find_prev_char (str, q);
113 g_assert (q == NULL);
115 p = str + 2;
116 q = g_utf8_find_next_char (p, NULL);
117 g_assert (q == str + 3);
118 q = g_utf8_find_next_char (q, NULL);
119 g_assert (q == str + 7);
121 q = g_utf8_find_next_char (p, str + 6);
122 g_assert (q == str + 3);
123 q = g_utf8_find_next_char (q, str + 6);
124 g_assert (q == NULL);
126 q = g_utf8_find_next_char (str, str);
127 g_assert (q == NULL);
129 q = g_utf8_find_next_char (str + strlen (str), NULL);
130 g_assert (q == str + strlen (str) + 1);
133 int main (int argc, char *argv[])
135 g_test_init (&argc, &argv, NULL);
137 g_test_add_data_func ("/utf8/offsets", longline, test_utf8);
138 g_test_add_func ("/utf8/lengths", test_length);
139 g_test_add_func ("/utf8/find", test_find);
141 return g_test_run ();