Non-word characters don't terminate tag names.
[mediawiki.git] / includes / normal / UtfNormalDefines.php
blobb07e3399ad1c00b4aa6144370c5430f5faea13ed
1 <?php
2 /**
3 * Some constant definitions for the unicode normalization module.
5 * Note: these constants must all be resolvable at compile time by HipHop,
6 * since this file will not be executed during request startup for a compiled
7 * MediaWiki.
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write to the Free Software Foundation, Inc.,
21 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 * http://www.gnu.org/copyleft/gpl.html
24 * @file
25 * @ingroup UtfNormal
28 define( 'UNICODE_HANGUL_FIRST', 0xac00 );
29 define( 'UNICODE_HANGUL_LAST', 0xd7a3 );
31 define( 'UNICODE_HANGUL_LBASE', 0x1100 );
32 define( 'UNICODE_HANGUL_VBASE', 0x1161 );
33 define( 'UNICODE_HANGUL_TBASE', 0x11a7 );
35 define( 'UNICODE_HANGUL_LCOUNT', 19 );
36 define( 'UNICODE_HANGUL_VCOUNT', 21 );
37 define( 'UNICODE_HANGUL_TCOUNT', 28 );
38 define( 'UNICODE_HANGUL_NCOUNT', UNICODE_HANGUL_VCOUNT * UNICODE_HANGUL_TCOUNT );
40 define( 'UNICODE_HANGUL_LEND', UNICODE_HANGUL_LBASE + UNICODE_HANGUL_LCOUNT - 1 );
41 define( 'UNICODE_HANGUL_VEND', UNICODE_HANGUL_VBASE + UNICODE_HANGUL_VCOUNT - 1 );
42 define( 'UNICODE_HANGUL_TEND', UNICODE_HANGUL_TBASE + UNICODE_HANGUL_TCOUNT - 1 );
44 define( 'UNICODE_SURROGATE_FIRST', 0xd800 );
45 define( 'UNICODE_SURROGATE_LAST', 0xdfff );
46 define( 'UNICODE_MAX', 0x10ffff );
47 define( 'UNICODE_REPLACEMENT', 0xfffd );
50 define( 'UTF8_HANGUL_FIRST', "\xea\xb0\x80" /*codepointToUtf8( UNICODE_HANGUL_FIRST )*/ );
51 define( 'UTF8_HANGUL_LAST', "\xed\x9e\xa3" /*codepointToUtf8( UNICODE_HANGUL_LAST )*/ );
53 define( 'UTF8_HANGUL_LBASE', "\xe1\x84\x80" /*codepointToUtf8( UNICODE_HANGUL_LBASE )*/ );
54 define( 'UTF8_HANGUL_VBASE', "\xe1\x85\xa1" /*codepointToUtf8( UNICODE_HANGUL_VBASE )*/ );
55 define( 'UTF8_HANGUL_TBASE', "\xe1\x86\xa7" /*codepointToUtf8( UNICODE_HANGUL_TBASE )*/ );
57 define( 'UTF8_HANGUL_LEND', "\xe1\x84\x92" /*codepointToUtf8( UNICODE_HANGUL_LEND )*/ );
58 define( 'UTF8_HANGUL_VEND', "\xe1\x85\xb5" /*codepointToUtf8( UNICODE_HANGUL_VEND )*/ );
59 define( 'UTF8_HANGUL_TEND', "\xe1\x87\x82" /*codepointToUtf8( UNICODE_HANGUL_TEND )*/ );
61 define( 'UTF8_SURROGATE_FIRST', "\xed\xa0\x80" /*codepointToUtf8( UNICODE_SURROGATE_FIRST )*/ );
62 define( 'UTF8_SURROGATE_LAST', "\xed\xbf\xbf" /*codepointToUtf8( UNICODE_SURROGATE_LAST )*/ );
63 define( 'UTF8_MAX', "\xf4\x8f\xbf\xbf" /*codepointToUtf8( UNICODE_MAX )*/ );
64 define( 'UTF8_REPLACEMENT', "\xef\xbf\xbd" /*codepointToUtf8( UNICODE_REPLACEMENT )*/ );
65 #define( 'UTF8_REPLACEMENT', '!' );
67 define( 'UTF8_OVERLONG_A', "\xc1\xbf" );
68 define( 'UTF8_OVERLONG_B', "\xe0\x9f\xbf" );
69 define( 'UTF8_OVERLONG_C', "\xf0\x8f\xbf\xbf" );
71 # These two ranges are illegal
72 define( 'UTF8_FDD0', "\xef\xb7\x90" /*codepointToUtf8( 0xfdd0 )*/ );
73 define( 'UTF8_FDEF', "\xef\xb7\xaf" /*codepointToUtf8( 0xfdef )*/ );
74 define( 'UTF8_FFFE', "\xef\xbf\xbe" /*codepointToUtf8( 0xfffe )*/ );
75 define( 'UTF8_FFFF', "\xef\xbf\xbf" /*codepointToUtf8( 0xffff )*/ );
77 define( 'UTF8_HEAD', false );
78 define( 'UTF8_TAIL', true );