1 ;; -*- mode: lisp; coding: utf-8 -*-
2 ;; Copyright Leo Butler (leo.butler@member.fsf.org) 2015
3 ;; Released under the terms of GPLv3+
7 Rewrite Maxima
's alphabetp function to handle wide-characters.
8 Add the ability for user to alter the table of alphabetic characters at runtime.
11 -detect if unicode characters are characters
15 (defpackage :unicodedata
16 (:use
:common-lisp
:maxima
)
17 (:import-from
:maxima merror alphabetp
*alphabet
* $file_search1 $file_search_lisp $done $all mfuncall mlist
)
20 (in-package :unicodedata
)
22 (let ((unicode-data-hashtable (make-hash-table :test
#'eql
))
23 (alphabetp-hashtable (make-hash-table :test
#'equal
)))
26 (labels ((lookup (i &optional
(unicode-data-hashtable unicode-data-hashtable
)) (symbol-name (car (gethash i unicode-data-hashtable
))))
27 (description (x) (symbol-name (second x
)))
28 (create-selector (regexp)
29 (cond ((stringp regexp
)
30 (let ((s (string-downcase regexp
)))
31 (lambda (x) (search s
(string-downcase x
)))))
33 (lambda (x) (declare (ignore x
)) t
))
35 (lambda (x) (declare (ignore x
)) nil
))
37 (merror "regexp must be a string, the symbol `all' or empty."))))
38 (create-adder (lookup-char)
39 (labels ((this (regexp append
)
40 (let ((selector (create-selector regexp
)))
41 (unless append
(clrhash alphabetp-hashtable
))
42 (maphash (lambda (k v
)
43 (if (and k
(funcall selector
(funcall lookup-char v
)))
44 (maxima::$set_alpha_char k
)))
45 unicode-data-hashtable
))
48 #-sbcl
(char-sym (x) (first x
))
49 (category (x) (symbol-name (third x
))))
52 (defun unicode-alphabetp (c)
53 (cond ((< (char-code c
) 128.
) ;; this character is ascii and must be non-alphabetic
54 (setf stack
'())) ;; there are no characters on the stack nor known to be part of a wide-character
55 ((null stack
) ;; len=0
57 (if (gethash stack alphabetp-hashtable
)
60 (t ;; 1 or more characters in stack
62 (cond ((gethash stack alphabetp-hashtable
) ;; c is part of wide-character
65 (setf stack
'()) ;; c is not end of wide-character
66 (unicode-alphabetp c
)))))) ;; but it may start new one.
67 ;; redefine alphabetp from src/nparse.lisp
68 ;; to use unicode-alphabetp
71 (cond ((or (alpha-char-p n
)
72 (member n
*alphabet
*))
76 (unicode-alphabetp n
))))))
78 (defun maxima::$set_alpha_char
(char-sym)
79 "A user-level function to add a wide character to the hashtable of
80 known alphabetical characters."
81 (let ((char-sym-list (coerce (cond ((stringp char-sym
) char-sym
)
82 ((symbolp char-sym
) (symbol-name char-sym
))
83 ((and (integerp char-sym
) (< 127. char-sym
) (> 917999. char-sym
))
85 (format nil
"~c" (code-char char-sym
))
87 (lookup char-sym unicode-data-hashtable
))
88 (t (merror "first argument must be a string, symbol or integer")))
90 (do ((x (reverse char-sym-list
) (cdr x
))) ((null x
) '$done
)
91 (setf (gethash x alphabetp-hashtable
)
92 #-t
(push (list char-sym-list
(gethash char-sym unicode-data-hashtable
)) (gethash x alphabetp-hashtable
))
95 (defun maxima::$unicode_init
(&optional
(regexp nil
) file
)
96 (let ((data-file (mfuncall '$file_search1
(or file
"unicodedata-txt.lisp") '((mlist) $file_search_lisp
))))
97 (loop for
(n char-sym description category
) in
(with-open-file (instr data-file
:direction
:input
) (read instr t nil nil
))
99 (setf (gethash n unicode-data-hashtable
) (list char-sym description category
)))
100 (if regexp
(maxima::$unicode_add regexp
))))
103 (defun maxima::$unicode_add_category
(category &optional append
)
104 (funcall (create-adder #'category
) category append
))
106 (defun maxima::$unicode_add
(&optional
(regexp nil
) (append nil
))
107 "Select the wide characters via a MAXIMA-NREGEX regexp. If
108 REGEXP is the symbol `all', this is equivalent to \".\"; if
109 REGEXP is NIL, then no matches are made, i.e. the hash
110 table UNICODE-DATA-HASHTABLE is emptied. Example:
111 unicode_add(\"greek .+ letter [^ ]+$\");"
112 (cond ((and (listp regexp
) (not (null regexp
)))
113 (let ((r (loop for regex in
(cdr regexp
)
114 for append
= append then t
115 collect
(maxima::$unicode_add regex append
))))
118 (let ((n (hash-table-count alphabetp-hashtable
)))
119 (funcall (create-adder #'description
) regexp append
)
120 (list '(mlist) regexp
(- (hash-table-count alphabetp-hashtable
) n
))))))
122 (defun print-hashtable (&optional
(ht unicode-data-hashtable
))
123 (let ((*print-base
* 16.
)
126 (maphash (lambda(k v
) (format t
"(~a ~{~s ~})~%" k
(if (listp v
) v
(list v
)))) ht
)))
127 (defun print-alphabetp-hashtable ()
128 (print-hashtable alphabetp-hashtable
))))
130 ; end of unicodedata.lisp