1 % \iffalse meta-comment
3 % Copyright 2013-2016 Javier Bezos and any individual authors
4 % listed elsewhere in this file. All rights reserved.
6 % This file is part of the Babel system.
7 % --------------------------------------
9 % It may be distributed and/or modified under the
10 % conditions of the LaTeX Project Public License, either version 1.3
11 % of this license or (at your option) any later version.
12 % The latest version of this license is in
13 % http://www.latex-project.org/lppl.txt
14 % and version 1.3 or later is part of all distributions of LaTeX
15 % version 2003/12/01 or later.
17 % This work has the LPPL maintenance status "maintained".
19 % The Current Maintainer of this work is Javier Bezos.
21 % The list of all files belonging to the Babel system is
22 % given in the file `manifest.bbl. See also `legal.bbl' for additional
25 % The list of derived (unpacked) files belonging to the distribution
26 % and covered by LPPL is defined by the unpacking scripts (with
27 % extension .ins) which are part of the distribution.
33 \ProvidesFile{bbunicode.dtx}
34 [2016/02/01 v1.1a Babel hooks for Unicode engines]
37 %% File `bbunicode.dtx'
38 %% Babel package for LaTeX version 2e
39 %% Copyright (C) 2103-2016
43 \documentclass{ltxdoc}
44 \font\manual=logo10 % font used for the METAFONT logo, etc.
45 \newcommand*\MF{{\manual META}\-{\manual FONT}}
46 \newcommand*{\babel}{\textsf{babel}}
47 \newcommand*{\langvar}{$\langle \it lang \rangle$}
48 \newcommand*{\note}[1]{}
49 \newcommand*{\pkg}[1]{\textsf{#1}}
50 \newcommand*{\Lopt}[1]{\textsf{#1}}
51 \newcommand*{\file}[1]{\texttt{#1}}
53 \DocInput{bbunicode.dtx}
58 % \GetFileInfo{bbunicode.dtx}
61 % \section{Tentative font handling}
63 % A general solution is far from trivial:
65 % \item |\addfontfeature| only sets it for the current family and it's
66 % not very efficient, and
67 % \item |\defaultfontfeatures| requires to redefine the font (and the
68 % opti\texttt{}ons aren't ``orthogonal'').
74 \bbl@for\bbl@tempa{#1}{%
75 \edef\bbl@tempb{\noexpand\bbl@FSstore{\bbl@tempa}}
76 \bbl@tempb{rm}\rmdefault\bbl@save@rmdefault
77 \bbl@tempb{sf}\sfdefault\bbl@save@sfdefault
78 \bbl@tempb{tt}\ttdefault\bbl@save@ttdefault}}
79 \def\bbl@FSstore#1#2#3#4{%
80 \bbl@csarg\edef{#2default#1}{#3}%
81 \expandafter\addto\csname extras#1\endcsname{%
84 \edef#3{\csname bbl@#2default#1\endcsname}%
85 \fontfamily{#3}\selectfont
87 \edef#3{\csname bbl@#2default#1\endcsname}%
89 \expandafter\addto\csname noextras#1\endcsname{%
91 \fontfamily{#4}\selectfont
94 \let\bbl@langfeatures\@empty
95 \def\babelFSfeatures{%
96 \let\bbl@ori@fontspec\fontspec
97 \renewcommand\fontspec[1][]{%
98 \bbl@ori@fontspec[\bbl@langfeatures##1]}
99 \let\babelFSfeatures\bbl@FSfeatures
101 \def\bbl@FSfeatures#1#2{%
102 \expandafter\addto\csname extras#1\endcsname{%
103 \babel@save\bbl@langfeatures
104 \edef\bbl@langfeatures{#2,}}}
107 % \section{Hooks for XeTeX and LuaTeX}
111 % Unfortunately, the current encoding cannot be retrieved and
112 % therefore it is reset always to |utf8|, which seems a sensible
115 % \LaTeX{} sets many ``codes'' just before loading
116 % \verb|hyphen.cfg|. That is not a problem in luatex, but in xetex
117 % they must be reset to the proper value. Most of the work is done in
118 % \textsf{xe(la)tex.ini}, so here we just ``undo'' some of the
119 % changes done by \LaTeX. Anyway, for consistency Lua\TeX{} also
120 % resets the catcodes.
121 % \changes{bbunicode~1.0c}{2014/03/10}{Reset ``codes'' set by \cs{LaTeX}
122 % to what xetex expects. Used also in luatex.}
123 % \changes{bbunicode~1.0f}{2015/12/06}{This block was assigned to
124 % xetex, even in luatex. Fixed here and below.}
126 %<<*Restore Unicode catcodes before loading patterns>>
128 % Reset chars "80-"C0 to category "other", no case mapping:
129 \catcode`\@=11 \count@=128
130 \loop\ifnum\count@<192
131 \global\uccode\count@=0 \global\lccode\count@=0
132 \global\catcode\count@=12 \global\sfcode\count@=1000
133 \advance\count@ by 1 \repeat
136 \global\uccode"##1=0 \global\lccode"##1=0
137 \global\catcode"##1=12 \global\sfcode"##1=1000 }%
139 \def\L ##1 ##2 ##3 {\global\catcode"##1=11
140 \global\uccode"##1="##2
141 \global\lccode"##1="##3
142 % Uppercase letters have sfcode=999:
143 \ifnum"##1="##3 \else \global\sfcode"##1=999 \fi }%
144 % Letter without case mappings:
145 \def\l ##1 {\L ##1 ##1 ##1 }%
155 %<</Restore Unicode catcodes before loading patterns>>
162 \def\BabelStringsDefault{unicode}
163 \let\xebbl@stop\relax
164 \AddBabelHook{xetex}{encodedcommands}{%
166 \ifx\bbl@tempa\@empty
167 \XeTeXinputencoding"bytes"%
169 \XeTeXinputencoding"#1"%
171 \def\xebbl@stop{\XeTeXinputencoding"utf8"}}
172 \AddBabelHook{xetex}{stopcommands}{%
174 \let\xebbl@stop\relax}
175 \AddBabelHook{xetex}{loadkernel}{%
176 <@Restore Unicode catcodes before loading patterns@>}
180 % \subsection{LuaTeX}
182 % The new loader for luatex is based solely on |language.dat|, which is
183 % read on the fly. The code shouldn't be executed when the format is
184 % build, so we check if |\bbl@get@enc| is defined. Then comes a
185 % simplified version of the loader in |hyphen.cfg| (without the
186 % hyphenmins stuff, which is under the direct control of \babel). A
187 % language has been loaded if |bbl@hyphendata@<num>| exists. The names
188 % |\l@<language>| are defined and take some value from the beginning
189 % because all ldf files assume this for the corresponding language to be
190 % considered valid. Of course, there is room for improvements.
191 % \changes{bbunicode~1.0b}{2013/04/22}{luatex-hyphen is loaded
192 % with require. Changes supplied by \'{E}lie Roux.}
193 % \changes{bbunicode~1.0c}{2014/03/10}{Defined hook for
194 % `initiateactive', to fetch the next token and continue only if
196 % \changes{bbunicode~1.0d}{2014/03/21}{Removed the `misfeature' for
198 % \changes{bbunicode~1.0e}{2015/05/10}{Use brackets instead of
199 % \cs{luaescapestring}}
200 % \changes{bbunicode~1.0e}{2015/07/26}{Added function addpatterns and
201 % modified the patterns hook.}
202 % \changes{bbunicode~1.1a}{2016/01/26}{New hyphenation loader for luatex.}
206 \ifx\bbl@get@enc\@undefined
207 \def\bbl@process@line#1#2 #3 #4 {%
209 \bbl@process@synonym{#2}%
211 \bbl@process@language{#1#2}{#3}{#4}%
214 \def\bbl@process@language#1#2#3{%
216 {\expandafter\addlanguage\csname l@#1\endcsname
217 \expandafter\language\csname l@#1\endcsname
219 \edef\bbl@languages{%
220 \bbl@languages\bbl@elt{#1}{\the\language}{#2}{#3}}}%
222 \def\bbl@process@synonym#1{%
224 {\expandafter\chardef\csname l@#1\endcsname\last@language
226 \edef\bbl@languages{%
227 \bbl@languages\bbl@elt{#1}{\the\last@language}{}{}}}%
229 \ifnum\last@language>\z@
230 \bbl@warning{Wrong or old hyphenation setup. Please, rebuild\\%
231 the format. I'll try to fix it for this run.\\%
233 \def\bbl@elt#1#2#3#4{%
235 \noexpand\bbl@elt{#1}{#2}{#3}{#4}%
237 \edef\bbl@languages{\bbl@languages}%
239 \ifnum\l@english=\z@\else
240 \bbl@warning{Wrong hyphenation setup. The 0th language must\\%
241 be `english'. Reported}%
243 \@namedef{bbl@hyphendata@0}{{hyphen.tex}{}}%
244 \openin1=language.dat
246 \bbl@warning{I couldn't find language.dat. No additional\\%
247 patterns loaded. Reported}%
253 \if T\ifeof1F\fi T\relax
254 \ifx\bbl@line\@empty\else
255 \edef\bbl@line{\bbl@line\space\space\space}%
256 \expandafter\bbl@process@line\bbl@line\relax
260 \def\bbl@get@enc#1:#2:#3\@@@{\def\bbl@hyph@enc{#2}}
261 \def\bbl@luapatterns#1#2{%
267 \ifx\bbl@tempa\@empty\else
278 function Babel.bytes(line)
279 return line:gsub("(.)",
280 function (chr) return unicode.utf8.char(string.byte(chr)) end)
282 function Babel.begin_process_input()
283 if luatexbase and luatexbase.add_to_callback then
284 luatexbase.add_to_callback('process_input_buffer',
285 Babel.bytes,'Babel.bytes')
287 Babel.callback = callback.find('process_input_buffer')
288 callback.register('process_input_buffer',Babel.bytes)
291 function Babel.end_process_input ()
292 if luatexbase and luatexbase.remove_from_callback then
293 luatexbase.remove_from_callback('process_input_buffer','Babel.bytes')
295 callback.register('process_input_buffer',Babel.callback)
298 function Babel.addpatterns(pp, lg)
299 local lg = lang.new(lg)
300 local pats = lang.patterns(lg) or ''
301 lang.clear_patterns(lg)
302 for p in pp:gmatch('[^%s]+') do
304 for i in string.utfcharacters(p:gsub('%d', '')) do
305 ss = ss .. '%d?' .. i
307 ss = ss:gsub('^%%d%?%.', '%%.') .. '%d?'
308 ss = ss:gsub('%.%%d%?$', '%%.')
309 pats, n = pats:gsub('%s' .. ss .. '%s', ' ' .. p .. ' ')
312 [[\string\csname\space bbl@info\endcsname{New pattern: ]]
314 pats = pats .. ' ' .. p
317 [[\string\csname\space bbl@info\endcsname{Renew pattern: ]]
321 lang.patterns(lg, pats)
325 \def\BabelStringsDefault{unicode}
326 \let\luabbl@stop\relax
327 \AddBabelHook{luatex}{encodedcommands}{%
328 \def\bbl@tempa{utf8}\def\bbl@tempb{#1}%
329 \ifx\bbl@tempa\bbl@tempb\else
330 \directlua{Babel.begin_process_input()}%
332 \directlua{Babel.end_process_input()}}%
334 \AddBabelHook{luatex}{stopcommands}{%
336 \let\luabbl@stop\relax}
337 \AddBabelHook{luatex}{patterns}{%
338 \@ifundefined{bbl@hyphendata@\the\language}%
339 {\def\bbl@elt##1##2##3##4{%
342 \ifx\bbl@tempb\@empty\else % if not synonymous
343 \def\bbl@tempc{{##3}{##4}}%
345 \def\bbl@tempb{#2}% eg, spanish, dutch:OT1, etc.
346 \ifx\bbl@tempa\bbl@tempb
347 \bbl@csarg\edef{hyphendata@##2}{\bbl@tempc}%
350 \@ifundefined{bbl@hyphendata@\the\language}%
351 {\bbl@info{No hyphenation patterns were set for\\%
352 language ‘#2’. Reported}}%
353 {\expandafter\expandafter\expandafter\bbl@luapatterns
354 \csname bbl@hyphendata@\the\language\endcsname}}{}%
355 \@ifundefined{bbl@patterns@}{}{%
357 \@expandtwoargs\in@{,\number\language,}{,\bbl@pttnlist}%
359 \ifx\bbl@patterns@\@empty\else
360 \directlua{ Babel.addpatterns(
361 [[\bbl@patterns@]], \number\language) }%
363 \@ifundefined{bbl@patterns@#1}%
365 {\directlua{ Babel.addpatterns(
366 [[\space\csname bbl@patterns@#1\endcsname]],
367 \number\language) }}%
368 \xdef\bbl@pttnlist{\bbl@pttnlist\number\language,}%
371 \AddBabelHook{luatex}{everylanguage}{%
372 \def\process@language##1##2##3{%
373 \def\process@line####1####2 ####3 ####4 {}}}
376 % \begin{macro}{\babelpatterns}
378 % This macro adds patterns. Two macros are used to store them:
379 % |\bbl@patterns@| for the global ones and |\bbl@patterns<lang>|
380 % for language ones. We make sure there is a space between words
381 % when multiple commands are used.
382 % \changes{bbunicode~1.0e}{2015/07/26}{Macro \cs{babelpatterns} added}
385 \@onlypreamble\babelpatterns
387 \newcommand\babelpatterns[2][\@empty]{%
388 \ifx\bbl@patterns@\relax
389 \let\bbl@patterns@\@empty
391 \ifx\bbl@pttnlist\@empty\else
393 You must not intermingle \string\selectlanguage\space and\\%
394 \string\babelpatterns\space or some patterns will not\\%
395 be taken into account. Reported}%
398 \protected@edef\bbl@patterns@{\bbl@patterns@\space#2}%
400 \edef\bbl@tempb{\zap@space#1 \@empty}%
401 \bbl@for\bbl@tempa\bbl@tempb{%
402 \bbl@fixname\bbl@tempa
403 \bbl@iflanguage\bbl@tempa{%
404 \bbl@csarg\protected@edef{patterns@\bbl@tempa}{%
405 \@ifundefined{bbl@patterns@\bbl@tempa}%
407 {\csname bbl@patterns@\bbl@tempa\endcsname\space}%
416 \AddBabelHook{luatex}{loadkernel}{%
417 <@Restore Unicode catcodes before loading patterns@>}
425 %% {Upper-case \A\B\C\D\E\F\G\H\I\J\K\L\M\N\O\P\Q\R\S\T\U\V\W\X\Y\Z
426 %% Lower-case \a\b\c\d\e\f\g\h\i\j\k\l\m\n\o\p\q\r\s\t\u\v\w\x\y\z
427 %% Digits \0\1\2\3\4\5\6\7\8\9
428 %% Exclamation \! Double quote \" Hash (number) \#
429 %% Dollar \$ Percent \% Ampersand \&
430 %% Acute accent \' Left paren \( Right paren \)
431 %% Asterisk \* Plus \+ Comma \,
432 %% Minus \- Point \. Solidus \/
433 %% Colon \: Semicolon \; Less than \<
434 %% Equals \= Greater than \> Question mark \?
435 %% Commercial at \@ Left bracket \[ Backslash \\
436 %% Right bracket \] Circumflex \^ Underscore \_
437 %% Grave accent \` Left brace \{ Vertical bar \|
438 %% Right brace \} Tilde \~}