From 801b7d97c81e22ecf6e531536e4bb3aff8446aab Mon Sep 17 00:00:00 2001 From: Robert Dodier Date: Thu, 10 Nov 2022 09:19:07 -0800 Subject: [PATCH] Remove ZERO WIDTH NO-BREAK SPACE from list of Unicode space characters for parser, since it is used by document processors to hint at word breaks for typesetting, and does not represent a word boundary. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Thanks to Gunter Königsmann for pointing it out. --- src/nparse.lisp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/nparse.lisp b/src/nparse.lisp index f6dc50358..31aecac81 100644 --- a/src/nparse.lisp +++ b/src/nparse.lisp @@ -20,7 +20,8 @@ #-(or unicode sb-unicode openmcl-unicode-strings abcl (and allegro ics)) nil #+(or unicode sb-unicode openmcl-unicode-strings abcl (and allegro ics)) ;; Adapted from the list given by: https://jkorpela.fi/chars/spaces.html - ;; omitting SPACE, OGHAM SPACE MARK, MONGOLIAN VOWEL SEPARATOR, and IDEOGRAPHIC SPACE. + ;; omitting SPACE, OGHAM SPACE MARK, MONGOLIAN VOWEL SEPARATOR, IDEOGRAPHIC SPACE, + ;; and ZERO WIDTH NO-BREAK SPACE. '( #.(code-char #x00A0) ;; NO-BREAK SPACE #.(code-char #x2000) ;; EN QUAD @@ -37,7 +38,6 @@ #.(code-char #x200B) ;; ZERO WIDTH SPACE #.(code-char #x202F) ;; NARROW NO-BREAK SPACE #.(code-char #x205F) ;; MEDIUM MATHEMATICAL SPACE - #.(code-char #xFEFF) ;; ZERO WIDTH NO-BREAK SPACE )) (defmvar *whitespace-chars* (append *ascii-space-chars-for-maxima* *unicode-space-chars-for-maxima*)) -- 2.11.4.GIT