regen pidl all: rm epan/dissectors/pidl/*-stamp; pushd epan/dissectors/pidl/ && make...
[wireshark-sm.git] / wsutil / unicode-utils.h
blobd8a9a9bfcd7715a21048ee339031fee132d9b204
1 /* unicode-utils.h
2 * Unicode utility definitions
4 * Wireshark - Network traffic analyzer
5 * By Gerald Combs <gerald@wireshark.org>
6 * Copyright 2006 Gerald Combs
8 * SPDX-License-Identifier: GPL-2.0-or-later
9 */
11 #ifndef __UNICODEUTIL_H__
12 #define __UNICODEUTIL_H__
14 #include <wireshark.h>
16 #ifdef _WIN32
17 #include <windows.h>
18 #include <tchar.h>
19 #include <wchar.h>
20 #endif
22 /**
23 * @file
24 * Unicode convenience routines.
27 #ifdef __cplusplus
28 extern "C" {
29 #endif
31 #ifdef WS_DEBUG_UTF_8
32 #define DEBUG_UTF_8_ENABLED true
33 #else
34 #define DEBUG_UTF_8_ENABLED false
35 #endif
37 #define _CHECK_UTF_8(level, str, len) \
38 do { \
39 const char *__uni_endptr; \
40 if (DEBUG_UTF_8_ENABLED && (str) != NULL && \
41 !g_utf8_validate(str, len, &__uni_endptr)) { \
42 ws_log_utf8(str, len, __uni_endptr); \
43 } \
44 } while (0)
46 #define WS_UTF_8_CHECK(str, len) \
47 _CHECK_UTF_8(LOG_LEVEL_DEBUG, str, len)
49 #define WS_UTF_8_DEBUG_HERE(str, len) \
50 _CHECK_UTF_8(LOG_LEVEL_ECHO, str, len)
52 WSUTIL_EXPORT
53 const int ws_utf8_seqlen[256];
55 /** Given the first byte in an UTF-8 encoded code point,
56 * return the length of the multibyte sequence, or *ZERO*
57 * if the byte is invalid as the first byte in a multibyte
58 * sequence.
60 #define ws_utf8_char_len(ch) (ws_utf8_seqlen[(ch)])
63 * Given a wmem scope, a pointer, and a length, treat the string of bytes
64 * referred to by the pointer and length as a UTF-8 string, and return a
65 * pointer to a UTF-8 string, allocated using the wmem scope, with all
66 * ill-formed sequences replaced with the Unicode REPLACEMENT CHARACTER
67 * according to the recommended "best practices" given in the Unicode
68 * Standard and specified by W3C/WHATWG.
70 WS_DLL_PUBLIC uint8_t *
71 ws_utf8_make_valid(wmem_allocator_t *scope, const uint8_t *ptr, ssize_t length);
74 * Same as ws_utf8_make_valid() but returns a wmem_strbuf_t.
76 WS_DLL_PUBLIC wmem_strbuf_t *
77 ws_utf8_make_valid_strbuf(wmem_allocator_t *scope, const uint8_t *ptr, ssize_t length);
79 #ifdef _WIN32
81 /** Given a UTF-8 string, convert it to UTF-16. This is meant to be used
82 * to convert between GTK+ 2.x (UTF-8) to Windows (UTF-16).
84 * @param utf8str The string to convert. May be NULL.
85 * @return The string converted to UTF-16. If utf8str is NULL, returns
86 * NULL. The return value should NOT be freed by the caller.
88 WS_DLL_PUBLIC
89 const wchar_t * utf_8to16(const char *utf8str);
91 /** Create a UTF-16 string (in place) according to the format string.
93 * @param utf16buf The buffer to return the UTF-16 string in.
94 * @param utf16buf_len The size of the 'utf16buf' parameter
95 * @param fmt A standard printf() format string
97 WS_DLL_PUBLIC
98 void utf_8to16_snprintf(TCHAR *utf16buf, int utf16buf_len, const char* fmt, ...)
99 G_GNUC_PRINTF(3, 4);
101 /** Given a UTF-16 string, convert it to UTF-8. This is meant to be used
102 * to convert between GTK+ 2.x (UTF-8) to Windows (UTF-16).
104 * @param utf16str The string to convert. May be NULL.
105 * @return The string converted to UTF-8. If utf16str is NULL, returns
106 * NULL. The return value should NOT be freed by the caller.
108 WS_DLL_PUBLIC
109 char * utf_16to8(const wchar_t *utf16str);
111 /** Convert the supplied program argument list from UTF-16 to UTF-8
112 * return a pointer to the array of UTF-8 arguments. This is intended
113 * to be used to normalize command line arguments at program startup.
115 * @param argc The number of arguments.
116 * @param argv The argument values (vector).
118 WS_DLL_PUBLIC
119 char **arg_list_utf_16to8(int argc, wchar_t *wc_argv[]);
121 #endif /* _WIN32 */
124 * defines for helping with UTF-16 surrogate pairs
127 #define IS_LEAD_SURROGATE(uchar2) \
128 ((uchar2) >= 0xd800 && (uchar2) < 0xdc00)
129 #define IS_TRAIL_SURROGATE(uchar2) \
130 ((uchar2) >= 0xdc00 && (uchar2) < 0xe000)
131 #define SURROGATE_VALUE(lead, trail) \
132 (((((lead) - 0xd800) << 10) | ((trail) - 0xdc00)) + 0x10000)
134 #ifdef __cplusplus
136 #endif
138 #endif /* __UNICODEUTIL_H__ */