Rename *ll* and *ul* to ll and ul in in-interval
[maxima.git] / share / contrib / unicodedata / unicodedata.lisp
blob710dc13ed66d0f89be4626a89aba9868b2684bfb
1 ;; -*- mode: lisp; coding: utf-8 -*-
2 ;; Copyright Leo Butler (leo.butler@member.fsf.org) 2015
3 ;; Released under the terms of GPLv3+
5 #|
7 Rewrite Maxima's alphabetp function to handle wide-characters.
8 Add the ability for user to alter the table of alphabetic characters at runtime.
10 To do:
11 -detect if unicode characters are characters
15 (defpackage :unicodedata
16 (:use :common-lisp :maxima)
17 (:import-from :maxima merror alphabetp *alphabet* $file_search1 $file_search_lisp $done $all mfuncall mlist)
20 (in-package :unicodedata)
22 (let ((unicode-data-hashtable (make-hash-table :test #'eql))
23 (alphabetp-hashtable (make-hash-table :test #'equal)))
26 (labels ((lookup (i &optional (unicode-data-hashtable unicode-data-hashtable)) (symbol-name (car (gethash i unicode-data-hashtable))))
27 (description (x) (symbol-name (second x)))
28 (create-selector (regexp)
29 (cond ((stringp regexp)
30 (let ((s (string-downcase regexp)))
31 (lambda (x) (search s (string-downcase x)))))
32 ((eql regexp '$all)
33 (lambda (x) (declare (ignore x)) t))
34 ((null regexp)
35 (lambda (x) (declare (ignore x)) nil))
37 (merror "regexp must be a string, the symbol `all' or empty."))))
38 (create-adder (lookup-char)
39 (labels ((this (regexp append)
40 (let ((selector (create-selector regexp)))
41 (unless append (clrhash alphabetp-hashtable))
42 (maphash (lambda (k v)
43 (if (and k (funcall selector (funcall lookup-char v)))
44 (maxima::$set_alpha_char k)))
45 unicode-data-hashtable))
46 '$done))
47 #'this))
48 #-sbcl (char-sym (x) (first x))
49 (category (x) (symbol-name (third x))))
51 (let ((stack '()))
52 (defun unicode-alphabetp (c)
53 (cond ((< (char-code c) 128.) ;; this character is ascii and must be non-alphabetic
54 (setf stack '())) ;; there are no characters on the stack nor known to be part of a wide-character
55 ((null stack) ;; len=0
56 (push c stack)
57 (if (gethash stack alphabetp-hashtable)
58 stack
59 (setf stack '())))
60 (t ;; 1 or more characters in stack
61 (push c stack)
62 (cond ((gethash stack alphabetp-hashtable) ;; c is part of wide-character
63 stack)
65 (setf stack '()) ;; c is not end of wide-character
66 (unicode-alphabetp c)))))) ;; but it may start new one.
67 ;; redefine alphabetp from src/nparse.lisp
68 ;; to use unicode-alphabetp
69 (defun alphabetp (n)
70 (and (characterp n)
71 (cond ((or (alpha-char-p n)
72 (member n *alphabet*))
73 (setf stack '())
76 (unicode-alphabetp n))))))
78 (defun maxima::$set_alpha_char (char-sym)
79 "A user-level function to add a wide character to the hashtable of
80 known alphabetical characters."
81 (let ((char-sym-list (coerce (cond ((stringp char-sym) char-sym)
82 ((symbolp char-sym) (symbol-name char-sym))
83 ((and (integerp char-sym) (< 127. char-sym) (> 917999. char-sym))
84 #+(or sbcl clisp)
85 (format nil "~c" (code-char char-sym))
86 #-(or sbcl clisp)
87 (lookup char-sym unicode-data-hashtable))
88 (t (merror "first argument must be a string, symbol or integer")))
89 'list)))
90 (do ((x (reverse char-sym-list) (cdr x))) ((null x) '$done)
91 (setf (gethash x alphabetp-hashtable)
92 #-t (push (list char-sym-list (gethash char-sym unicode-data-hashtable)) (gethash x alphabetp-hashtable))
93 #+t t))))
95 (defun maxima::$unicode_init (&optional (regexp nil) file)
96 (let ((data-file (mfuncall '$file_search1 (or file "unicodedata-txt.lisp") '((mlist) $file_search_lisp))))
97 (loop for (n char-sym description category) in (with-open-file (instr data-file :direction :input) (read instr t nil nil))
99 (setf (gethash n unicode-data-hashtable) (list char-sym description category)))
100 (if regexp (maxima::$unicode_add regexp))))
103 (defun maxima::$unicode_add_category (category &optional append)
104 (funcall (create-adder #'category) category append))
106 (defun maxima::$unicode_add (&optional (regexp nil) (append nil))
107 "Select the wide characters via a MAXIMA-NREGEX regexp. If
108 REGEXP is the symbol `all', this is equivalent to \".\"; if
109 REGEXP is NIL, then no matches are made, i.e. the hash
110 table UNICODE-DATA-HASHTABLE is emptied. Example:
111 unicode_add(\"greek .+ letter [^ ]+$\");"
112 (cond ((and (listp regexp) (not (null regexp)))
113 (let ((r (loop for regex in (cdr regexp)
114 for append = append then t
115 collect (maxima::$unicode_add regex append))))
116 (cons '(mlist) r)))
118 (let ((n (hash-table-count alphabetp-hashtable)))
119 (funcall (create-adder #'description) regexp append)
120 (list '(mlist) regexp (- (hash-table-count alphabetp-hashtable) n))))))
122 (defun print-hashtable (&optional (ht unicode-data-hashtable))
123 (let ((*print-base* 16.)
124 (*print-readably* t)
125 (*print-radix* t))
126 (maphash (lambda(k v) (format t "(~a ~{~s ~})~%" k (if (listp v) v (list v)))) ht)))
127 (defun print-alphabetp-hashtable ()
128 (print-hashtable alphabetp-hashtable))))
130 ; end of unicodedata.lisp