openat: don’t close (-1)
[gnulib.git] / lib / readtokens.c
blob14e4b7dac17b3122861a43446ee0e1fadf3913b9
1 /* readtokens.c -- Functions for reading tokens from an input stream.
3 Copyright (C) 1990-1991, 1999-2004, 2006, 2009-2024 Free Software
4 Foundation, Inc.
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <https://www.gnu.org/licenses/>.
19 Written by Jim Meyering. */
21 /* This almost supersedes xreadline stuff -- using delim="\n"
22 gives the same functionality, except that these functions
23 would never return empty lines. */
25 #include <config.h>
27 #include "readtokens.h"
29 #include <limits.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
34 #include "xalloc.h"
36 #if USE_UNLOCKED_IO
37 # include "unlocked-io.h"
38 #endif
40 /* Initialize a tokenbuffer. */
42 void
43 init_tokenbuffer (token_buffer *tokenbuffer)
45 tokenbuffer->size = 0;
46 tokenbuffer->buffer = NULL;
49 typedef size_t word;
50 enum { bits_per_word = sizeof (word) * CHAR_BIT };
52 static bool
53 get_nth_bit (size_t n, word const *bitset)
55 return bitset[n / bits_per_word] >> n % bits_per_word & 1;
58 static void
59 set_nth_bit (size_t n, word *bitset)
61 size_t one = 1;
62 bitset[n / bits_per_word] |= one << n % bits_per_word;
65 /* Read a token from STREAM into TOKENBUFFER.
66 A token is delimited by any of the N_DELIM bytes in DELIM.
67 Upon return, the token is in tokenbuffer->buffer and
68 has a trailing '\0' instead of any original delimiter.
69 The function value is the length of the token not including
70 the final '\0'. Upon EOF (i.e. on the call after the last
71 token is read) or error, return -1 without modifying tokenbuffer.
72 The EOF and error conditions may be distinguished in the caller
73 by testing ferror (STREAM).
75 This function works properly on lines containing NUL bytes
76 and on files that do not end with a delimiter. */
78 size_t
79 readtoken (FILE *stream,
80 const char *delim,
81 size_t n_delim,
82 token_buffer *tokenbuffer)
84 int c;
85 idx_t i;
86 word isdelim[(UCHAR_MAX + bits_per_word) / bits_per_word];
88 memset (isdelim, 0, sizeof isdelim);
89 for (i = 0; i < n_delim; i++)
91 unsigned char ch = delim[i];
92 set_nth_bit (ch, isdelim);
95 /* skip over any leading delimiters */
96 for (c = getc (stream); c >= 0 && get_nth_bit (c, isdelim); c = getc (stream))
98 /* empty */
101 char *p = tokenbuffer->buffer;
102 idx_t n = tokenbuffer->size;
103 i = 0;
104 for (;;)
106 if (c < 0 && i == 0)
107 return -1;
109 if (i == n)
110 p = xpalloc (p, &n, 1, -1, sizeof *p);
112 if (c < 0)
114 p[i] = 0;
115 break;
117 if (get_nth_bit (c, isdelim))
119 p[i] = 0;
120 break;
122 p[i++] = c;
123 c = getc (stream);
126 tokenbuffer->buffer = p;
127 tokenbuffer->size = n;
128 return i;
131 /* Build a NULL-terminated array of pointers to tokens
132 read from STREAM. Return the number of tokens read.
133 All storage is obtained through calls to xmalloc-like functions.
135 %%% Question: is it worth it to do a single
136 %%% realloc() of 'tokens' just before returning? */
138 size_t
139 readtokens (FILE *stream,
140 size_t projected_n_tokens,
141 const char *delim,
142 size_t n_delim,
143 char ***tokens_out,
144 size_t **token_lengths)
146 token_buffer tb, *token = &tb;
147 char **tokens;
148 size_t *lengths;
149 idx_t sz, n_tokens;
151 if (projected_n_tokens == 0)
152 projected_n_tokens = 64;
153 else
154 projected_n_tokens++; /* add one for trailing NULL pointer */
156 sz = projected_n_tokens;
157 tokens = xnmalloc (sz, sizeof *tokens);
158 lengths = xnmalloc (sz, sizeof *lengths);
160 n_tokens = 0;
161 init_tokenbuffer (token);
162 for (;;)
164 char *tmp;
165 size_t token_length = readtoken (stream, delim, n_delim, token);
166 if (n_tokens >= sz)
168 tokens = xpalloc (tokens, &sz, 1, -1, sizeof *tokens);
169 lengths = xreallocarray (lengths, sz, sizeof *lengths);
172 if (token_length == (size_t) -1)
174 /* don't increment n_tokens for NULL entry */
175 tokens[n_tokens] = NULL;
176 lengths[n_tokens] = 0;
177 break;
179 tmp = xnmalloc (token_length + 1, sizeof *tmp);
180 lengths[n_tokens] = token_length;
181 tokens[n_tokens] = memcpy (tmp, token->buffer, token_length + 1);
182 n_tokens++;
185 free (token->buffer);
186 *tokens_out = tokens;
187 if (token_lengths != NULL)
188 *token_lengths = lengths;
189 else
190 free (lengths);
191 return n_tokens;