1 /* Word breaks in UTF-8 strings.
2 Copyright (C) 2009-2024 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2009.
5 This file is free software.
6 It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
7 You can redistribute it and/or modify it under either
8 - the terms of the GNU Lesser General Public License as published
9 by the Free Software Foundation, either version 3, or (at your
10 option) any later version, or
11 - the terms of the GNU General Public License as published by the
12 Free Software Foundation; either version 2, or (at your option)
14 - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
16 This file is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 Lesser General Public License and the GNU General Public License
22 You should have received a copy of the GNU Lesser General Public
23 License and of the GNU General Public License along with this
24 program. If not, see <https://www.gnu.org/licenses/>. */
35 #include "uniwbrk/wbrktable.h"
37 #define FUNC u8_wordbreaks
39 #define U_MBTOUC_UNSAFE u8_mbtouc_unsafe
40 #include "u-wordbreaks.h"
48 /* Read the contents of an input stream, and return it, terminated with a NUL
51 read_file (FILE *stream
)
59 while (! feof (stream
))
61 if (size
+ BUFSIZE
> alloc
)
63 alloc
= alloc
+ alloc
/ 2;
64 if (alloc
< size
+ BUFSIZE
)
65 alloc
= size
+ BUFSIZE
;
66 buf
= realloc (buf
, alloc
);
69 fprintf (stderr
, "out of memory\n");
73 count
= fread (buf
+ size
, 1, BUFSIZE
, stream
);
85 buf
= realloc (buf
, size
+ 1);
88 fprintf (stderr
, "out of memory\n");
97 main (int argc
, char * argv
[])
101 /* Display all the word breaks in the input string. */
102 char *input
= read_file (stdin
);
103 int length
= strlen (input
);
104 char *breaks
= malloc (length
);
107 u8_wordbreaks ((uint8_t *) input
, length
, breaks
);
109 for (i
= 0; i
< length
; i
++)
114 /* U+2027 in UTF-8 encoding */
115 putc (0xe2, stdout
); putc (0x80, stdout
); putc (0xa7, stdout
);
122 putc (input
[i
], stdout
);