utf8: add unit test for g_utf8_make_valid
[glib.git] / gio / gconverter.c
blob88b22c16b8c6ebdd7a0fcf151bffb9f008f08a4f
1 /* GIO - GLib Input, Output and Streaming Library
3 * Copyright (C) 2009 Red Hat, Inc.
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General
16 * Public License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 * Author: Alexander Larsson <alexl@redhat.com>
21 #include "config.h"
22 #include "gconverter.h"
23 #include "glibintl.h"
26 /**
27 * SECTION:gconverter
28 * @short_description: Data conversion interface
29 * @include: gio/gio.h
30 * @see_also: #GInputStream, #GOutputStream
32 * #GConverter is implemented by objects that convert
33 * binary data in various ways. The conversion can be
34 * stateful and may fail at any place.
36 * Some example conversions are: character set conversion,
37 * compression, decompression and regular expression
38 * replace.
40 * Since: 2.24
41 **/
44 typedef GConverterIface GConverterInterface;
45 G_DEFINE_INTERFACE (GConverter, g_converter, G_TYPE_OBJECT)
47 static void
48 g_converter_default_init (GConverterInterface *iface)
52 /**
53 * g_converter_convert:
54 * @converter: a #GConverter.
55 * @inbuf: (array length=inbuf_size) (element-type guint8): the buffer
56 * containing the data to convert.
57 * @inbuf_size: the number of bytes in @inbuf
58 * @outbuf: (element-type guint8) (array length=outbuf_size): a buffer to write
59 * converted data in.
60 * @outbuf_size: the number of bytes in @outbuf, must be at least one
61 * @flags: a #GConverterFlags controlling the conversion details
62 * @bytes_read: (out): will be set to the number of bytes read from @inbuf on success
63 * @bytes_written: (out): will be set to the number of bytes written to @outbuf on success
64 * @error: location to store the error occurring, or %NULL to ignore
66 * This is the main operation used when converting data. It is to be called
67 * multiple times in a loop, and each time it will do some work, i.e.
68 * producing some output (in @outbuf) or consuming some input (from @inbuf) or
69 * both. If its not possible to do any work an error is returned.
71 * Note that a single call may not consume all input (or any input at all).
72 * Also a call may produce output even if given no input, due to state stored
73 * in the converter producing output.
75 * If any data was either produced or consumed, and then an error happens, then
76 * only the successful conversion is reported and the error is returned on the
77 * next call.
79 * A full conversion loop involves calling this method repeatedly, each time
80 * giving it new input and space output space. When there is no more input
81 * data after the data in @inbuf, the flag %G_CONVERTER_INPUT_AT_END must be set.
82 * The loop will be (unless some error happens) returning %G_CONVERTER_CONVERTED
83 * each time until all data is consumed and all output is produced, then
84 * %G_CONVERTER_FINISHED is returned instead. Note, that %G_CONVERTER_FINISHED
85 * may be returned even if %G_CONVERTER_INPUT_AT_END is not set, for instance
86 * in a decompression converter where the end of data is detectable from the
87 * data (and there might even be other data after the end of the compressed data).
89 * When some data has successfully been converted @bytes_read and is set to
90 * the number of bytes read from @inbuf, and @bytes_written is set to indicate
91 * how many bytes was written to @outbuf. If there are more data to output
92 * or consume (i.e. unless the %G_CONVERTER_INPUT_AT_END is specified) then
93 * %G_CONVERTER_CONVERTED is returned, and if no more data is to be output
94 * then %G_CONVERTER_FINISHED is returned.
96 * On error %G_CONVERTER_ERROR is returned and @error is set accordingly.
97 * Some errors need special handling:
99 * %G_IO_ERROR_NO_SPACE is returned if there is not enough space
100 * to write the resulting converted data, the application should
101 * call the function again with a larger @outbuf to continue.
103 * %G_IO_ERROR_PARTIAL_INPUT is returned if there is not enough
104 * input to fully determine what the conversion should produce,
105 * and the %G_CONVERTER_INPUT_AT_END flag is not set. This happens for
106 * example with an incomplete multibyte sequence when converting text,
107 * or when a regexp matches up to the end of the input (and may match
108 * further input). It may also happen when @inbuf_size is zero and
109 * there is no more data to produce.
111 * When this happens the application should read more input and then
112 * call the function again. If further input shows that there is no
113 * more data call the function again with the same data but with
114 * the %G_CONVERTER_INPUT_AT_END flag set. This may cause the conversion
115 * to finish as e.g. in the regexp match case (or, to fail again with
116 * %G_IO_ERROR_PARTIAL_INPUT in e.g. a charset conversion where the
117 * input is actually partial).
119 * After g_converter_convert() has returned %G_CONVERTER_FINISHED the
120 * converter object is in an invalid state where its not allowed
121 * to call g_converter_convert() anymore. At this time you can only
122 * free the object or call g_converter_reset() to reset it to the
123 * initial state.
125 * If the flag %G_CONVERTER_FLUSH is set then conversion is modified
126 * to try to write out all internal state to the output. The application
127 * has to call the function multiple times with the flag set, and when
128 * the available input has been consumed and all internal state has
129 * been produced then %G_CONVERTER_FLUSHED (or %G_CONVERTER_FINISHED if
130 * really at the end) is returned instead of %G_CONVERTER_CONVERTED.
131 * This is somewhat similar to what happens at the end of the input stream,
132 * but done in the middle of the data.
134 * This has different meanings for different conversions. For instance
135 * in a compression converter it would mean that we flush all the
136 * compression state into output such that if you uncompress the
137 * compressed data you get back all the input data. Doing this may
138 * make the final file larger due to padding though. Another example
139 * is a regexp conversion, where if you at the end of the flushed data
140 * have a match, but there is also a potential longer match. In the
141 * non-flushed case we would ask for more input, but when flushing we
142 * treat this as the end of input and do the match.
144 * Flushing is not always possible (like if a charset converter flushes
145 * at a partial multibyte sequence). Converters are supposed to try
146 * to produce as much output as possible and then return an error
147 * (typically %G_IO_ERROR_PARTIAL_INPUT).
149 * Returns: a #GConverterResult, %G_CONVERTER_ERROR on error.
151 * Since: 2.24
153 GConverterResult
154 g_converter_convert (GConverter *converter,
155 const void *inbuf,
156 gsize inbuf_size,
157 void *outbuf,
158 gsize outbuf_size,
159 GConverterFlags flags,
160 gsize *bytes_read,
161 gsize *bytes_written,
162 GError **error)
164 GConverterIface *iface;
166 g_return_val_if_fail (G_IS_CONVERTER (converter), G_CONVERTER_ERROR);
167 g_return_val_if_fail (outbuf_size > 0, G_CONVERTER_ERROR);
169 *bytes_read = 0;
170 *bytes_written = 0;
172 iface = G_CONVERTER_GET_IFACE (converter);
174 return (* iface->convert) (converter,
175 inbuf, inbuf_size,
176 outbuf, outbuf_size,
177 flags,
178 bytes_read, bytes_written, error);
182 * g_converter_reset:
183 * @converter: a #GConverter.
185 * Resets all internal state in the converter, making it behave
186 * as if it was just created. If the converter has any internal
187 * state that would produce output then that output is lost.
189 * Since: 2.24
191 void
192 g_converter_reset (GConverter *converter)
194 GConverterIface *iface;
196 g_return_if_fail (G_IS_CONVERTER (converter));
198 iface = G_CONVERTER_GET_IFACE (converter);
200 (* iface->reset) (converter);