factor: eliminate print_uuint recursion
[coreutils.git] / gl / lib / randperm.c
blob14a3045241c4e9cbf165c1d178bd34e4559a3dc1
1 /* Generate random permutations.
3 Copyright (C) 2006-2024 Free Software Foundation, Inc.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
18 /* Written by Paul Eggert. */
20 #include <config.h>
22 #include "randperm.h"
24 #include <limits.h>
25 #include <stdbit.h>
26 #include <stdckdint.h>
27 #include <stdint.h>
28 #include <stdlib.h>
30 #include "attribute.h"
31 #include "hash.h"
32 #include "xalloc.h"
34 /* Return an upper bound on the number of random bytes needed to
35 generate the first H elements of a random permutation of N
36 elements. H must not exceed N. */
38 size_t
39 randperm_bound (size_t h, size_t n)
41 /* Upper bound on number of bits needed to generate the first number
42 of the permutation. */
43 unsigned int lg_n = stdc_bit_width (n) + 1;
45 /* Upper bound on number of bits needed to generate the first H elements. */
46 uintmax_t ar;
47 if (ckd_mul (&ar, lg_n, h))
48 return SIZE_MAX;
50 /* Convert the bit count to a byte count. */
51 size_t bound = ar / CHAR_BIT + (ar % CHAR_BIT != 0);
53 return bound;
56 /* Swap elements I and J in array V. */
58 static void
59 swap (size_t *v, size_t i, size_t j)
61 size_t t = v[i];
62 v[i] = v[j];
63 v[j] = t;
66 /* Structures and functions for a sparse_map abstract data type that's
67 used to effectively swap elements I and J in array V like swap(),
68 but in a more memory efficient manner (when the number of permutations
69 performed is significantly less than the size of the input). */
71 struct sparse_ent_
73 size_t index;
74 size_t val;
77 static size_t
78 sparse_hash_ (void const *x, size_t table_size)
80 struct sparse_ent_ const *ent = x;
81 return ent->index % table_size;
84 static bool
85 sparse_cmp_ (void const *x, void const *y)
87 struct sparse_ent_ const *ent1 = x;
88 struct sparse_ent_ const *ent2 = y;
89 return ent1->index == ent2->index;
92 typedef Hash_table sparse_map;
94 /* Initialize the structure for the sparse map,
95 when a best guess as to the number of entries
96 specified with SIZE_HINT. */
98 static sparse_map *
99 sparse_new (size_t size_hint)
101 return hash_initialize (size_hint, nullptr, sparse_hash_, sparse_cmp_, free);
104 /* Swap the values for I and J. If a value is not already present
105 then assume it's equal to the index. Update the value for
106 index I in array V. */
108 static void
109 sparse_swap (sparse_map *sv, size_t *v, size_t i, size_t j)
111 struct sparse_ent_ *v1 = hash_remove (sv, &(struct sparse_ent_) {i,0});
112 struct sparse_ent_ *v2 = hash_remove (sv, &(struct sparse_ent_) {j,0});
114 /* FIXME: reduce the frequency of these mallocs. */
115 if (!v1)
117 v1 = xmalloc (sizeof *v1);
118 v1->index = v1->val = i;
120 if (!v2)
122 v2 = xmalloc (sizeof *v2);
123 v2->index = v2->val = j;
126 size_t t = v1->val;
127 v1->val = v2->val;
128 v2->val = t;
129 if (!hash_insert (sv, v1))
130 xalloc_die ();
131 if (!hash_insert (sv, v2))
132 xalloc_die ();
134 v[i] = v1->val;
137 static void
138 sparse_free (sparse_map *sv)
140 hash_free (sv);
144 /* From R, allocate and return a malloc'd array of the first H elements
145 of a random permutation of N elements. H must not exceed N.
146 Return nullptr if H is zero. */
148 size_t *
149 randperm_new (struct randint_source *r, size_t h, size_t n)
151 size_t *v;
153 switch (h)
155 case 0:
156 v = nullptr;
157 break;
159 case 1:
160 v = xmalloc (sizeof *v);
161 v[0] = randint_choose (r, n);
162 break;
164 default:
166 /* The algorithm is essentially the same in both
167 the sparse and non sparse case. In the sparse case we use
168 a hash to implement sparse storage for the set of n numbers
169 we're shuffling. When to use the sparse method was
170 determined with the help of this script:
172 #!/bin/sh
173 for n in $(seq 2 32); do
174 for h in $(seq 2 32); do
175 test $h -gt $n && continue
176 for s in o n; do
177 test $s = o && shuf=shuf || shuf=./shuf
178 num=$(env time -f "$s:${h},${n} = %e,%M" \
179 $shuf -i0-$((2**$n-2)) -n$((2**$h-2)) | wc -l)
180 test $num = $((2**$h-2)) || echo "$s:${h},${n} = failed" >&2
181 done
182 done
183 done
185 This showed that if sparseness = n/h, then:
187 sparseness = 128 => .125 mem used, and about same speed
188 sparseness = 64 => .25 mem used, but 1.5 times slower
189 sparseness = 32 => .5 mem used, but 2 times slower
191 Also the memory usage was only significant when n > 128Ki
193 bool sparse = (n >= (128 * 1024)) && (n / h >= 32);
195 size_t i;
196 sparse_map *sv;
198 if (sparse)
200 sv = sparse_new (h * 2);
201 if (sv == nullptr)
202 xalloc_die ();
203 v = xnmalloc (h, sizeof *v);
205 else
207 sv = nullptr; /* To placate GCC's -Wuninitialized. */
208 v = xnmalloc (n, sizeof *v);
209 for (i = 0; i < n; i++)
210 v[i] = i;
213 for (i = 0; i < h; i++)
215 size_t j = i + randint_choose (r, n - i);
216 if (sparse)
217 sparse_swap (sv, v, i, j);
218 else
219 swap (v, i, j);
222 if (sparse)
223 sparse_free (sv);
224 else
225 v = xnrealloc (v, h, sizeof *v);
227 break;
230 return v;