2 * Bitset - Efficient bit sets
3 * Copyright (C) 2003 Nicolas Cannasse
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version,
9 * with the special exception on linking described in file LICENSE.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 external bcreate
: int -> intern
= "caml_create_string"
24 external fast_get
: intern
-> int -> int = "%string_unsafe_get"
25 external fast_set
: intern
-> int -> int -> unit = "%string_unsafe_set"
26 external fast_bool
: int -> bool = "%identity"
27 external fast_blit
: intern
-> int -> intern
-> int -> int -> unit = "caml_blit_string" "noalloc"
28 external fast_fill
: intern
-> int -> int -> int -> unit = "caml_fill_string" "noalloc"
29 external fast_length
: intern
-> int = "%string_length"
32 assert (ndx
>= 0 && ndx
< fast_length s
);
36 assert (ndx
>= 0 && ndx
< fast_length s
);
39 let bblit src srcoff dst dstoff len
=
40 assert (srcoff
>= 0 && dstoff
>= 0 && len
>= 0);
41 fast_blit src srcoff dst dstoff len
43 let bfill dst start len c
=
44 assert (start
>= 0 && len
>= 0);
45 fast_fill dst start len c
47 exception Negative_index
of string
50 mutable data
: intern
;
54 let error fname
= raise
(Negative_index fname
)
62 let int_size = 7 (* value used to round up index *)
63 let log_int_size = 3 (* number of shifts *)
66 if n
< 0 then error "create";
67 let size = (n
+int_size) lsr log_int_size in
68 let b = bcreate
size in
76 let b = bcreate t
.len
in
77 bblit t
.data
0 b 0 t
.len
;
86 if x
< 0 then error "set";
87 let pos = x
lsr log_int_size and delta
= x
land int_size in
89 if pos >= size then begin
90 let b = bcreate
(pos+1) in
91 bblit t
.data
0 b 0 size;
92 bfill b size (pos - size + 1) 0;
96 bset t
.data
pos ((bget t
.data
pos) lor (1 lsl delta
))
99 if x
< 0 then error "unset";
100 let pos = x
lsr log_int_size and delta
= x
land int_size in
102 bset t
.data
pos ((bget t
.data
pos) land (0xFF lxor (1 lsl delta
)))
105 if x
< 0 then error "toggle";
106 let pos = x
lsr log_int_size and delta
= x
land int_size in
108 if pos >= size then begin
109 let b = bcreate
(pos+1) in
110 bblit t
.data
0 b 0 size;
111 bfill b size (pos - size + 1) 0;
115 bset t
.data
pos ((bget t
.data
pos) lxor (1 lsl delta
))
122 if x
< 0 then error "is_set";
123 let pos = x
lsr log_int_size and delta
= x
land int_size in
126 fast_bool
(((bget t
.data
pos) lsr delta
) land 1)
131 exception Break_int
of int
133 (* Find highest set element or raise Not_found *)
135 (* Find highest set bit in a byte. Does not work with zero. *)
139 if b land (1 lsl n
) = 0 then
146 for i
= n downto 0 do
147 let byte = bget buf i
in
148 if byte <> 0 then raise
(Break_int
((i
lsl log_int_size)+(byte_msb byte)))
153 | _
-> raise Not_found
156 let some_msb b = try Some
(find_msb b) with Not_found
-> None
in
157 match (some_msb t1
, some_msb t2
) with
158 (None
, Some _
) -> -1 (* 0-y -> -1 *)
159 | (Some _
, None
) -> 1 (* x-0 -> 1 *)
160 | (None
, None
) -> 0 (* 0-0 -> 0 *)
161 | (Some a
, Some
b) -> (* x-y *)
166 (* MSBs differ, we need to scan arrays until we find a
168 let ndx = a
lsr log_int_size in
169 assert (ndx < t1
.len
&& ndx < t2
.len
);
171 for i
= ndx downto 0 do
172 let b1 = bget t1
.data i
173 and b2
= bget t2
.data i
in
174 if b1 <> b2
then raise
(Break_int
(compare b1 b2
))
184 let partial_count t x
=
188 else if fast_bool
(x
land 1) then
189 1 + (nbits (x
lsr 1))
194 let pos = x
lsr log_int_size and delta
= x
land int_size in
199 let x = bget t
.data
n in
200 loop (n+1) (acc
+ nbits x)
205 loop (pos+1) (nbits ((bget t
.data
pos) lsr delta
))
210 (* Find the first set bit in the bit array *)
211 let find_first_set b n =
212 (* TODO there are many ways to speed this up. Lookup table would be
213 one way to speed this up. *)
217 if b land (1 lsl n) <> 0 then n else loop (n+1) in
221 let rec find_bit byte_ndx bit_offs
=
222 if byte_ndx
>= b.len
then
225 let byte = (bget buf byte_ndx
) lsr bit_offs
in
227 find_bit (byte_ndx
+ 1) 0
229 Some
((find_lsb byte) + (byte_ndx
lsl log_int_size) + bit_offs
) in
230 find_bit (n lsr log_int_size) (n land int_size)
236 match find_first_set t !cur with
241 raise Enum.No_more_elements in
244 ~count:(fun () -> partial_count t !cur)
245 ~clone:(fun () -> make !cur)
249 let raw_create size =
250 let b = bcreate
size in
252 { data
= b; len
= size }
255 let max_size = max a
.len
b.len
in
256 let d = raw_create max_size in
257 let sl = min a
.len
b.len
in
260 (* Note: rest of the array is set to zero automatically *)
262 bset d.data i
((bget abuf i
) land (bget bbuf i
))
266 (* Note: rest of the array is handled automatically correct, since we
267 took a copy of the bigger set. *)
269 let d = if a
.len
> b.len
then copy a
else copy b in
270 let sl = min a
.len
b.len
in
274 bset d.data i
((bget abuf i
) lor (bget bbuf i
))
279 let maxlen = max a
.len
b.len
in
280 let buf = bcreate
maxlen in
281 bblit a
.data
0 buf 0 a
.len
;
282 let sl = min a
.len
b.len
in
286 bset buf i
((bget abuf i
) land (lnot
(bget bbuf i
)))
288 { data
= buf; len
= maxlen }
291 let maxlen = max a
.len
b.len
in
292 let buf = bcreate
maxlen in
293 (* Copy larger (assumes missing bits are zero) *)
294 bblit (if a
.len
> b.len
then a
.data
else b.data
) 0 buf 0 maxlen;
295 let sl = min a
.len
b.len
in
299 bset buf i
((bget abuf i
) lxor (bget bbuf i
))
301 { data
= buf; len
= maxlen }
303 (* TODO the following set operations can be made faster if you do the
304 set operation in-place instead of taking a copy. But be careful
305 when the sizes of the bitvector strings differ. *)
307 let d = inter t t'
in
311 let differentiate t t'
=
317 let d = union t t'
in
321 let differentiate_sym t t'
=
322 let d = sym_diff t t'
in