Seppo.Social -> Seppo.mro.name
[Seppo.git] / test / t_tag.ml
blob68fbf197b748427205297406f4b84b23b62bc899
1 open Seppo_lib
3 let test_diff () =
4 (match Tag.diff String.compare [] [] with
5 | [], [], [] -> assert true
6 | _ -> assert false);
7 (let s, p, m = Tag.diff String.compare [ "b"; "d"; "f" ] [ "a"; "d"; "e" ] in
8 s |> String.concat " " |> Assrt.equals_string __LOC__ "d";
9 p |> String.concat " " |> Assrt.equals_string __LOC__ "a e";
10 m |> String.concat " " |> Assrt.equals_string __LOC__ "b f");
11 assert true
13 let assert_cat msg exp res =
14 Assrt.equals_int msg (List.length exp) (List.length res);
15 Assrt.equals_string msg
16 (exp |> String.concat " ")
17 (res |> List.map (fun (Tag.Tag t) -> t) |> String.concat " ")
19 let test_map () =
20 (* https://v2.ocaml.org/api/Map.Make.html *)
21 let m = Tag.(Tmap.(empty |> add_tag_list [ Tag "#Ä"; Tag "#à" ])) in
22 m |> Tag.Tmap.cardinal |> Assrt.equals_int __LOC__ 1;
23 let (Tag t) = m |> Tag.Tmap.find_opt "#a" |> Option.get in
24 t |> Assrt.equals_string __LOC__ "#Ä";
25 assert true
27 (* Example from Uuseg
28 let test_segment_word () =
29 let ret = Tag.utf_8_segments `Word "Uhu" in
30 ret |> List.length |> Assrt.equals_int __LOC__ 1;
31 let ret = Tag.utf_8_segments `Word "Hello World!" in
32 ret |> List.length |> Assrt.equals_int __LOC__ 4;
33 let ret = Tag.utf_8_segments `Word "Hello, #World!" in
34 ret |> List.length |> Assrt.equals_int __LOC__ 6;
35 let ret = Tag.utf_8_segments `Word "Hello, #🌍World!" in
36 ret |> List.length |> Assrt.equals_int __LOC__ 7;
37 let ret = Tag.utf_8_segments `Word "Hello, #🌍🎉World!" in
38 ret |> List.length |> Assrt.equals_int __LOC__ 8;
39 assert true
42 let test_of_string () =
43 Tag.of_string "Hello, #World!" |> assert_cat "of_string 1" [ "#World" ];
44 Tag.of_string "abc#def" |> assert_cat "of_string 1.1" [];
45 Tag.of_string "abc #def" |> assert_cat "of_string 1.2" [ "#def" ];
46 Tag.of_string "abc.#def" |> assert_cat "of_string 1.3" [ "#def" ];
47 Tag.of_string "ab #@uu @cd@ef #gh ij"
48 |> assert_cat "of_string 1.4" [ "#@uu"; "#gh" ];
49 Tag.of_string "H #Uhu un #🐫 d #So ja"
50 |> assert_cat "of_string 2" [ "#Uhu"; "#So" ];
51 Tag.of_string "H #Uhu un 🐫 d #So ja"
52 |> assert_cat "of_string 3" [ "#Uhu"; "#So" ];
53 (* TODO the flag: *)
54 Tag.of_string
55 "#† #RIP #AD2021 📻 🇦🇹 Hannes Leopoldseder ist tot - ooe.ORF.at\n\n\
56 via https://twitter.com/wasbuxton/status/1361797119871508485\n\
57 via https://twitter.com/jnd1er\n\
58 auch https://ooe.orf.at/stories/3090120/"
59 |> assert_cat "of_string 4" [ "#†"; "#RIP"; "#AD2021" ];
60 Tag.of_string
61 "#† #RIP #AD2021 Dr. Brad J. Cox Ph.D. Obituary - Manassas, VA | SCNow\n\n\
62 \"… Dr. Brad J. Cox Ph. DDr. Brad Cox, Ph. D of Manassas, Virginia, died \
63 on January\n\
64 2, 2021 at his residence. Dr. Cox was a computer scientist known mostly for\n\
65 creating the Objective – C programming language …\"\n\n\
66 via https://news.ycombinator.com/item?id=25876767 #objc"
67 |> assert_cat "of_string 5" [ "#†"; "#RIP"; "#AD2021"; "#objc" ];
68 assert true
70 let test_zero_width_space () =
71 Tag.of_string "Hello, #World​s!" |> assert_cat "zero_width_space 1" [ "#World" ];
72 assert true
74 let test_sift_channel () =
75 let ic = open_in "t_tag.1.txt" in
76 let ret = Tag.sift_channel ic in
77 close_in ic;
78 (match ret with
79 | Ok v -> v |> assert_cat "comp" [ "#World" ]
80 | Error v -> v |> Assrt.equals_string __LOC__ "#Hello");
81 assert true
83 (* https://codeberg.org/mro/ShaarliGo/src/branch/master/t_tags.go#L57 *)
84 let test_fold () =
85 let tst msg exp src =
86 Tag.Tag src |> Tag.fold |> Assrt.equals_string msg exp
88 tst "fold 1" "hallo wyrld!" "Hälló wÿrld!";
89 tst "fold 2" "demaiziere" "DeMaizière";
90 tst "fold 3" "cegłowski!" "Cegłowski!";
91 tst "fold 3" "iieeean" "íìéèêäñ";
92 assert true
94 let test_normalize () =
95 let sh, lo, ts =
96 Tag.normalise "#Uhu" "Aha\n#more" [ Tag "#Uhu"; Tag "#less" ] Tag.Tmap.empty
98 sh |> Assrt.equals_string __LOC__ "#Uhu";
99 lo |> Assrt.equals_string __LOC__ "Aha\n#more\n#less";
100 ts |> assert_cat "comp" [ "#Uhu"; "#less"; "#more" ];
101 let sh, lo, ts =
102 Tag.normalise "#Uh/u" "Aha\n#mo.re" [ Tag "#Uhu"; Tag "#less" ] Tag.Tmap.empty
104 sh |> Assrt.equals_string __LOC__ "#Uh/u";
105 lo |> Assrt.equals_string __LOC__ "Aha\n#mo.re\n#Uhu #less";
106 ts |> assert_cat "comp" [ "#Uh"; "#Uhu"; "#less"; "#mo" ];
107 assert true
109 let test_cdb () =
110 let fn = "tmp/tag.cdb" in
111 Unix.(try unlink fn with Unix_error (ENOENT, "unlink", _) -> ());
112 File.touch fn;
113 let db = Mapcdb.Cdb "tmp/tag.cdb" in
114 let db = Mapcdb.add_string "#a" "#Ä" db in
115 let (Mapcdb.Cdb db') = db in
116 db' |> Assrt.equals_string __LOC__ fn;
117 let (s,l,ts) = Tag.cdb_normalise "uhu #á" "aha #B" [] db in
118 s |> Assrt.equals_string __LOC__ "uhu #á";
119 l |> Assrt.equals_string __LOC__ "aha #B";
120 ts |> List.length |> Assrt.equals_int __LOC__ 2;
121 Mapcdb.find_string_opt "#a" db |> Option.get |> Assrt.equals_string __LOC__ "#Ä";
122 Mapcdb.find_string_opt "#b" db |> Option.get |> Assrt.equals_string __LOC__ "#B";
123 assert true
125 let () =
126 Unix.chdir "../../../test/";
127 (* test_segment_word (); *)
128 test_map ();
129 test_diff ();
130 test_of_string ();
131 test_zero_width_space ();
132 test_sift_channel ();
133 test_fold ();
134 test_normalize ();
135 test_cdb ();
136 assert true