Git/spar/: New commits
[sunny256-utils.git] / unichar
blobb740ce11b56bf9decf28f3205fa19b4a3915f45f
1 #!/bin/sh
3 #==============================================================================
4 # unichar
5 # File ID: 23f515ba-5d47-11df-bcb0-90e6ba3022ac
7 # Create a searchable SQLite database of all Unicode characters.
9 # Author: Øyvind A. Holm <sunny@sunbase.org>
10 # License: GNU General Public License version 2 or later.
11 #==============================================================================
13 progname=unichar
14 VERSION=0.5.0
16 db="$HOME/.unichar.sqlite"
17 guipdf="$HOME/bin/Lib/unicode/current/charts/CodeCharts.pdf"
18 uc_url="ftp://unicode.org/Public/UNIDATA/UnicodeData.txt"
19 pdfprog=evince
21 opt_convert=0
22 opt_get=0
23 opt_gui=0
24 opt_help=0
25 opt_init=''
26 opt_quiet=0
27 opt_verbose=0
28 while test -n "$1"; do
29 case "$1" in
30 -c|--convert) opt_convert=1; shift ;;
31 --get) opt_get=1; shift ;;
32 -g|--gui) opt_gui=1; shift ;;
33 -h|--help) opt_help=1; shift ;;
34 --init) opt_init=$2; shift 2 ;;
35 -q|--quiet) opt_quiet=$(($opt_quiet + 1)); shift ;;
36 -v|--verbose) opt_verbose=$(($opt_verbose + 1)); shift ;;
37 --version) echo $progname $VERSION; exit 0 ;;
38 --) shift; break ;;
40 if printf '%s\n' "$1" | grep -q ^-; then
41 echo "$progname: $1: Unknown option" >&2
42 exit 1
43 else
44 break
46 break ;;
47 esac
48 done
49 opt_verbose=$(($opt_verbose - $opt_quiet))
51 if test "$opt_help" = "1"; then
52 test $opt_verbose -gt 0 && { echo; echo $progname $VERSION; }
53 cat <<END
55 Search for Unicode characters, different arguments are ANDed against
56 each other.
58 Usage:
60 $progname --init directory_with_Unicode_files [--get]
61 Create the database and optionally download the necessary files from
62 unicode.org .
64 $progname search_string [search_string [...]]
65 Search through the database after Unicode characters.
67 $progname -c
68 See description for -c/--convert.
70 Options:
72 -c, --convert
73 Read $progname output (search result) from stdin and convert it into
74 proper UTF-8.
75 --get
76 Used together with --init. Automatically download the necessary
77 files from ftp://unicode.org/Public/UNIDATA/ into directory
78 specified with --init. At the moment only UnicodeData.txt is used.
79 -g, --gui
80 Display the Unicode character table with $pdfprog(1).
81 Location: $guipdf
82 -h, --help
83 Show this help.
84 --init DIRECTORY
85 Initialise a new database from UnicodeData.txt in DIRECTORY.
86 -q, --quiet
87 Be more quiet. Can be repeated to increase silence.
88 -v, --verbose
89 Increase level of verbosity. Can be repeated.
90 --version
91 Print version information.
93 For example (with Unicode 8.0.0):
95 $ $progname latin | wc -l
96 1492
98 $ $progname latin ring | wc -l
101 $ $progname die face
102 2680;DIE FACE-1;So;0;ON;;;;;N;;;;;
103 2681;DIE FACE-2;So;0;ON;;;;;N;;;;;
104 2682;DIE FACE-3;So;0;ON;;;;;N;;;;;
105 2683;DIE FACE-4;So;0;ON;;;;;N;;;;;
106 2684;DIE FACE-5;So;0;ON;;;;;N;;;;;
107 2685;DIE FACE-6;So;0;ON;;;;;N;;;;;
109 $ $progname fac angr
110 1F620;ANGRY FACE;So;0;ON;;;;;N;;;;;
112 $ $progname latin above ring | $progname -c
113 ÅåŮůǺǻẘẙ
116 exit 0
119 if test -n "$opt_init"; then
120 if test -e "$db"; then
121 echo $progname: $db: SQLite database already exists >&2
122 echo $progname: Please remove it and try again >&2
123 exit 1
126 if test "$opt_get" = "1"; then
127 mkdir -p "$opt_init"
128 cd "$opt_init" || {
129 echo $progname: Cannot change directory to \'$opt_init\' >&2
130 exit 1
132 if wget --version | grep -q "GNU Wget"; then
133 wget "$uc_url" || {
134 echo $progname: Error during download from $uc_url >&2
135 exit 1
137 elif curl --version | grep -q '^curl [0-9]'; then
138 curl "$uc_url" -o UnicodeData.txt || {
139 echo $progname: Error during download from $uc_url >&2
140 exit 1
142 else
143 echo $progname: Unable to find a suitable download program >&2
144 echo $progname: \(searched for wget and curl\) >&2
145 exit 1
147 cd - >/dev/null
150 if test ! -d "$opt_init/."; then
151 echo $progname: $opt_init: Non-existent or unavailable directory >&2
152 exit 1
155 ucdata="$opt_init/UnicodeData.txt"
157 if test ! -r "$ucdata"; then
158 echo $progname: $ucdata not found or is not readable >&2
159 exit 1
162 sqlite3 "$db" "CREATE TABLE chars (s varchar);"
163 sqlite3 "$db" "CREATE INDEX idx_chars_s ON chars (s);"
164 sqlite3 "$db" ".import '$ucdata' chars" && {
165 echo $progname: $ucdata successfully imported into $db >&2
166 exit 0
167 } || {
168 echo $progname: Something went wrong during import into $db >&2
169 exit 1
173 if test "$opt_convert" = "1"; then
174 # Convert from unichar output to actual UTF-8
175 cut -f 1 -d ';' | fromhex -u
176 exit 0
179 if test "$opt_gui" = "1"; then
180 $pdfprog "$guipdf" &
181 exit 0
184 if test ! -r "$db"; then
185 echo $progname: Database $db not found, >&2
186 echo you need to run \'$progname --init\' >&2
187 exit 1
190 unset and_str search_str
191 for f in "$@"; do
192 search_str="$search_str$and_str s LIKE '%$f%'"
193 and_str=" AND"
194 done
195 test -z "$search_str" && {
196 echo $progname: No search string specified >&2
197 exit 1
199 sqlite3 "$db" "SELECT * FROM chars WHERE $search_str"
201 # vim: set ts=8 sw=8 sts=8 noet fo+=w tw=79 fenc=UTF-8 :