build: skip checking for networks source directories
[mldonkey.git] / src / utils / lib / levenshtein.ml
blob2874d680ee1564cad1052315eae1ec0ad44b0290
1 (* Levenshtein distance between two arrays of elements
2 It uses O(C.length s1) space.
3 The function can be curried on last argument to factorize array
4 allocation costs.
6 Complexity O(C.length s1 * C.length s2)
7 *)
9 type levenshtein_costs = {
10 insert_cost : int;
11 delete_cost : int;
12 replace_cost : int;
15 module type Chain = sig
16 type t
17 type element
18 val length : t -> int
19 val get : t -> int -> element
20 val equal : element -> element -> bool
21 end
23 module Make (C : Chain) = struct
25 let distance lc =
26 fun s1 ->
27 let l1 = C.length s1 in
28 let current_row = Array.make (l1 + 1) 0 in
29 let next_row = Array.make (l1 + 1) 0 in
30 fun s2 ->
31 let l2 = C.length s2 in
32 (* invariant:
33 matrix.(a).(b) =
34 levenshtein.distance lc (String.sub s1 0 a) (String.sub s2 0 b)
36 current_row.(i) = matrix.(i).(j)
37 next_row.(i) = matrix.(i).(j+1) *)
38 current_row.(0) <- 0;
39 for i = 1 to l1 do
40 current_row.(i) <- current_row.(i - 1) + lc.delete_cost
41 done;
42 let min_3int a b c : int =
43 let min = if a <= b then a else b in
44 if min <= c then min else c in
45 let rec aux j current_row next_row =
46 if j = l2 then current_row.(l1)
47 else
48 let c2 = C.get s2 j in
49 next_row.(0) <- current_row.(0) + lc.insert_cost;
50 for i = 1 to l1 do
51 let i1 = i - 1 in
52 next_row.(i) <-
53 min_3int
54 (next_row.(i1) + lc.delete_cost)
55 (current_row.(i) + lc.insert_cost)
56 (if C.equal (C.get s1 i1) c2 then current_row.(i1)
57 else current_row.(i1) + lc.replace_cost)
58 done;
59 aux (j + 1) next_row current_row in (* swap rows *)
60 aux 0 current_row next_row
61 end
63 module ChainString = struct
64 type t = string
65 type element = char
66 let length = String.length
67 let get = String.get
68 let equal c1 c2 = c1 = c2
69 end
71 module ForString = Make(ChainString)
73 module ChainWords = struct
74 type t = string array
75 type element = string
76 let length = Array.length
77 let get = Array.get
78 let equal s1 s2 = String.compare s1 s2 = 0
79 end
81 module ForWords = Make(ChainWords)