2 class CollationTest
extends MediaWikiLangTestCase
{
3 protected function setUp() {
5 if ( !wfDl( 'intl' ) ) {
6 $this->markTestSkipped( 'These tests require intl extension' );
11 * Test to make sure, that if you
12 * have "X" and "XY", the binary
13 * sortkey also has "X" being a
14 * prefix of "XY". Our collation
15 * code makes this assumption.
17 * @param $lang String Language code for collator
18 * @param $base String Base string
19 * @param $extended String String containing base as a prefix.
21 * @dataProvider prefixDataProvider
23 function testIsPrefix( $lang, $base, $extended ) {
24 $cp = Collator
::create( $lang );
25 $cp->setStrength( Collator
::PRIMARY
);
26 $baseBin = $cp->getSortKey( $base );
27 // Remove sortkey terminator
28 $baseBin = rtrim( $baseBin, "\0" );
29 $extendedBin = $cp->getSortKey( $extended );
30 $this->assertStringStartsWith( $baseBin, $extendedBin, "$base is not a prefix of $extended" );
33 function prefixDataProvider() {
35 array( 'en', 'A', 'AA' ),
36 array( 'en', 'A', 'AAA' ),
37 array( 'en', 'Д', 'ДЂ' ),
38 array( 'en', 'Д', 'ДA' ),
39 // 'Ʒ' should expand to 'Z ' (note space).
40 array( 'fi', 'Z', 'Ʒ' ),
41 // 'Þ' should expand to 'th'
42 array( 'sv', 't', 'Þ' ),
43 // Javanese is a limited use alphabet, so should have 3 bytes
44 // per character, so do some tests with it.
45 array( 'en', 'ꦲ', 'ꦲꦤ' ),
46 array( 'en', 'ꦲ', 'ꦲД' ),
47 array( 'en', 'A', 'Aꦲ' ),
52 * Opposite of testIsPrefix
54 * @dataProvider notPrefixDataProvider
56 function testNotIsPrefix( $lang, $base, $extended ) {
57 $cp = Collator
::create( $lang );
58 $cp->setStrength( Collator
::PRIMARY
);
59 $baseBin = $cp->getSortKey( $base );
60 // Remove sortkey terminator
61 $baseBin = rtrim( $baseBin, "\0" );
62 $extendedBin = $cp->getSortKey( $extended );
63 $this->assertStringStartsNotWith( $baseBin, $extendedBin, "$base is a prefix of $extended" );
66 function notPrefixDataProvider() {
68 array( 'en', 'A', 'B' ),
69 array( 'en', 'AC', 'ABC' ),
70 array( 'en', 'Z', 'Ʒ' ),
71 array( 'en', 'A', 'ꦲ' ),
76 * Test correct first letter is fetched.
78 * @param $collation String Collation name (aka uca-en)
79 * @param $string String String to get first letter of
80 * @param $firstLetter String Expected first letter.
82 * @dataProvider firstLetterProvider
84 function testGetFirstLetter( $collation, $string, $firstLetter ) {
85 $col = Collation
::factory( $collation );
86 $this->assertEquals( $firstLetter, $col->getFirstLetter( $string ) );
89 function firstLetterProvider() {
91 array( 'uppercase', 'Abc', 'A' ),
92 array( 'uppercase', 'abc', 'A' ),
93 array( 'identity', 'abc', 'a' ),
94 array( 'uca-en', 'abc', 'A' ),
95 array( 'uca-en', ' ', ' ' ),
96 array( 'uca-en', 'Êveryone', 'E' ),
97 array( 'uca-vi', 'Êveryone', 'Ê' ),
98 // Make sure thorn is not a first letter.
99 array( 'uca-sv', 'The', 'T' ),
100 array( 'uca-sv', 'Å', 'Å' ),
101 array( 'uca-hu', 'dzsdo', 'Dzs' ),
102 array( 'uca-hu', 'dzdso', 'Dz' ),
103 array( 'uca-hu', 'CSD', 'Cs' ),
104 array( 'uca-root', 'CSD', 'C' ),
105 array( 'uca-fi', 'Ǥ', 'G' ),
106 array( 'uca-fi', 'Ŧ', 'T' ),
107 array( 'uca-fi', 'Ʒ', 'Z' ),
108 array( 'uca-fi', 'Ŋ', 'N' ),