Rename *ll* and *ul* to ll and ul in in-interval
[maxima.git] / share / contrib / unicodedata / unicodedata.el
blobbf3d86bafcd3d0578ea65d8e7f6463c6bdf2e151
1 ;; -*- Mode: emacs-lisp; lexical-binding: t -*- ;;
2 ;; Copyright Leo Butler (leo.butler@member.fsf.org) 2015
3 ;; Released under terms of GPLv3+
4 ;;
5 ;; Generate the unicodedata-txt.lisp file from UnicodeData.txt
6 ;;
7 ;; Batch mode:
8 ;; emacs -nw -q --eval "(progn (byte-compile-file \"unicodedata.el\" t) (unicode-init) (unicode-print-lisp))"
9 ;;
11 (defun unicode->list (file)
12 (interactive "sFile? ")
13 (cl-labels ((string->number (x)
14 (string-to-number x 16))
15 (char (n)
16 (if (characterp n)
17 (format "%c" n)
18 n))
19 (assemble-list (x)
20 (let ((n (string->number (car x))))
21 (cons (car x) (cons n (cons (char n) (cdr x)))))))
22 (let ((u (save-excursion
23 (or (get-buffer file) (find-file-literally file))
24 (buffer-substring-no-properties (point-min) (point-max)))))
25 (mapcar #'assemble-list
26 (mapcar (lambda(s) (split-string s ";" nil nil))
27 (split-string u "\n" t))))))
29 (defun unicode-hashtable->list (ht)
30 (let (l)
31 (maphash #'(lambda(k v)
32 (push (list k v) l))
33 ht)
34 l))
36 (eval-when (compile load)
37 (defvar unicode-code-value-hex)
38 (defvar unicode-code-value)
39 (defvar unicode-code-character)
40 (defvar unicode-character-name)
41 (defvar unicode-general-category))
43 (defun unicode-find (predicate)
44 "Creates a function that applies PREDICATE(K V) to the key K and value V of each element
45 in a hashtable HT. Analogous to CL-REMOVE-IF-NOT."
46 (let ((this `(lambda (ht)
47 (let ((oht (make-hash-table :test #'eq)))
48 (maphash #'(lambda (k v)
49 (if (funcall #',predicate k v)
50 (puthash k v oht))) ht)
51 oht))))
52 (cl-coerce this 'function)))
54 (defun unicode->hashtable (file)
55 (interactive "sFile? ")
56 (let ((ht (make-hash-table :size 29215 :test #'equal))
57 (l (unicode->list file)))
58 (cl-loop for c in l
59 for h = (funcall unicode-code-value-hex c)
60 for v = (funcall unicode-code-value c)
61 for ch = (funcall unicode-character-name c)
62 for g = (funcall unicode-general-category c)
63 for gc = (gethash g ht)
65 (puthash h c ht)
66 (puthash v c ht)
67 (puthash ch c ht)
68 (puthash g (push c gc) ht))
69 ht))
71 (eval-when (compile load)
72 (defvar unicode-data-hashtable ) ;; (unicode->hashtable unicode-data-txt-buffer))
73 (defvar unicode-data-list ) ;; (unicode->list unicode-data-txt-buffer))
74 (defvar unicode-data-txt-url "http://www.unicode.org/Public/UNIDATA/UnicodeData.txt"
75 "URL of UnicodeData.txt file.")
76 (defvar unicode-data-txt-buffer "UnicodeData.txt" "Name of buffer created by UNICODEDATA.TXT.GET."))
78 (defun unicodedata.txt.get (&optional url proceed further-callbacks)
79 (interactive "sURL? ")
80 (setq url (or url unicode-data-txt-url))
81 (let* ((transform (lambda (buffer)
82 (with-current-buffer buffer
83 (setq unicode-data-hashtable (unicode->hashtable (buffer-name)))
84 (message "Done processing %s." unicode-data-txt-buffer))))
85 (callback (lambda (status)
86 (unless status
87 (search-forward-regexp "^0000;" (point-max))
88 (forward-line -1)
89 (kill-whole-line (- (1+ (line-number-at-pos))))
90 (kill-line)
91 (rename-buffer unicode-data-txt-buffer)
92 (funcall transform unicode-data-txt-buffer)
93 (cl-loop for f in further-callbacks do (funcall f))))))
94 (cond ((find-file-literally unicode-data-txt-buffer)
95 (funcall transform unicode-data-txt-buffer))
96 ((or proceed (y-or-n-p "Retrieve UNICODE DATA online? "))
97 (url-retrieve url callback))
99 (message "Stopped.")))))
101 (defun unicode-search-character-name (regex &optional ht)
102 (interactive)
103 (let ((ht (or ht unicode-data-hashtable))
104 (l))
105 (cl-loop for k being the hash-keys of ht
106 if (and (stringp k) (string-match regex k))
107 do (push (gethash k ht) l))
109 (defun unicode-search-value (n &optional ht)
110 (interactive)
111 (let ((ht (or ht unicode-data-hashtable)))
112 (gethash n ht)))
113 (defun unicode-search-value-hex (n &optional ht)
114 (interactive)
115 (unicode-search-value (string-to-number n 16) ht))
116 (defun unicode-search-category (c &optional ht)
117 (interactive)
118 (let ((ht (or ht unicode-data-hashtable)))
119 (gethash c ht)))
121 ;; see http://www.unicode.org/Public//3.0-Update1/UnicodeData-3.0.1.html#General%20Category
122 (defun unicode-math-characters ()
123 (interactive)
124 (let ((ht (make-hash-table :size 8000 :test #'eq))
125 (l (append (unicode-search-category "Sm") ;; math symbol
126 (unicode-search-category "Mn") ;; non-spacing mark
127 ;;(unicode-search-category "Me") ;; enclosing mark
128 (unicode-search-category "No") ;; number other
129 (unicode-search-category "Nl") ;; number letter
130 (unicode-search-category "Nd") ;; number decimal digit
131 (unicode-search-category "Pc") ;; number decimal digit
132 (unicode-search-category "Pc") ;; Punctuation, Connector
133 (unicode-search-category "Pd") ;; Punctuation, Dash
134 (unicode-search-category "Ps") ;; Punctuation, Open
135 (unicode-search-category "Pe") ;; Punctuation, Close
136 (unicode-search-category "Pi") ;; Punctuation, Initial quote (may behave like Ps or Pe depending on usage)
137 (unicode-search-category "Pf") ;; Punctuation, Final quote (may behave like Ps or Pe depending on usage)
138 ;;(unicode-search-category "Po") ;; Punctuation, Other
139 (unicode-search-character-name (regexp-opt '("APL FUNCTIONAL SYMBOL" "MATHEMATICAL" "GREEK" "LATIN"))))))
140 (cl-loop for i in l
141 do (puthash (funcall unicode-code-value i) i ht))
142 ht))
144 (eval-when (compile load)
145 (defvar unicode-math-characters nil) ;; (unicode-math-characters))
146 (defvar unicode-data-output-file "unicodedata-txt.lisp"))
148 (defun unicode-print-lisp (&optional data-file ht)
149 (interactive)
150 (setq ht (or ht (unicode-math-characters))
151 data-file (or data-file unicode-data-output-file))
152 (if (get-buffer data-file) (kill-buffer data-file))
153 (if (file-exists-p data-file) (delete-file data-file))
154 (let (s)
155 (with-temp-buffer
156 (let* ((standard-output (current-buffer))
157 (coding-system-for-read 'utf-8-unix)
158 (coding-system-for-write 'utf-8-unix)
159 (print-as-symbol (lambda (x) (if (equal "" x) (format " nil") (format " |%s|" x))))
160 (print-as-hex (lambda (x) (if (equal "" x) (format " nil") (progn (with-output-to-string (princ " #x")(princ x))))))
161 (print-car (lambda (x)
162 (princ "'(#x")(princ (car x))))
163 (print-rest (lambda (x)
164 (insert (concat
165 (funcall print-as-symbol (nth 2 x))
166 (funcall print-as-symbol (nth 3 x))
167 (funcall print-as-symbol (nth 4 x))
168 (funcall print-as-symbol (nth 5 x))
169 (funcall print-as-symbol (nth 6 x))
170 (funcall print-as-symbol (nth 7 x))
171 (funcall print-as-symbol (nth 8 x))
172 (funcall print-as-symbol (nth 9 x))
173 (funcall print-as-symbol (nth 10 x))
174 (funcall print-as-symbol (nth 11 x))
175 (funcall print-as-symbol (nth 12 x))
176 (funcall print-as-symbol (nth 13 x))
177 (funcall print-as-hex (nth 14 x))
178 (funcall print-as-hex (nth 15 x))
179 ")\n"))))
180 (printer (lambda (x)
181 (funcall print-car x) (funcall print-rest x))))
182 (insert ";; -*- mode:lisp; coding: utf-8 -*-\n")
183 (insert (format ";; Derived from %s\n#.(list\n" unicode-data-txt-url))
184 (mapc printer (cl-loop for k being the hash-keys of ht
185 for v = (gethash k ht)
186 if (and (numberp k) (> k 127))
187 collect v))
188 (insert ")\n")
189 (setq s (buffer-substring-no-properties (point-min) (point-max)))))
190 (with-current-buffer (find-file-literally data-file)
191 (insert (encode-coding-string s 'utf-8-unix))
192 (basic-save-buffer)
193 (kill-buffer))))
195 (defun unicode-init ()
196 (interactive)
197 (mapc #'(lambda(x) (set (car x) `(lambda (l) (nth ,(cadr x) l))))
198 '((unicode-code-value-hex 0)
199 (unicode-code-value 1)
200 (unicode-code-character 2)
201 (unicode-character-name 3)
202 (unicode-general-category 4)))
203 (unicodedata.txt.get nil t)
204 (unicode->hashtable unicode-data-txt-buffer)
207 ;; end of unicodedata.el ;;