5 class WfBCP47Test
extends MediaWikiTestCase
{
8 * Please note the BCP explicitly state that language codes are case
9 * insensitive, there are some exceptions to the rule :)
10 * This test is used to verify our formatting against all lower and
11 * all upper cases language code.
13 * @see http://tools.ietf.org/html/bcp47
14 * @dataProvider provideLanguageCodes()
16 public function testBCP47( $code, $expected ) {
17 $code = strtolower( $code );
18 $this->assertEquals( $expected, wfBCP47( $code ),
19 "Applying BCP47 standard to lower case '$code'"
22 $code = strtoupper( $code );
23 $this->assertEquals( $expected, wfBCP47( $code ),
24 "Applying BCP47 standard to upper case '$code'"
29 * Array format is ($code, $expected)
31 public static function provideLanguageCodes() {
33 // Extracted from BCP47 (list not exhaustive)
35 array( 'en-ca-x-ca', 'en-CA-x-ca' ),
36 array( 'sgn-be-fr', 'sgn-BE-FR' ),
37 array( 'az-latn-x-latn', 'az-Latn-x-latn' ),
39 array( 'sr-Latn-RS', 'sr-Latn-RS' ),
40 array( 'az-arab-ir', 'az-Arab-IR' ),
43 array( 'sl-nedis', 'sl-nedis' ),
44 array( 'de-ch-1996', 'de-CH-1996' ),
48 'en-latn-gb-boont-r-extended-sequence-x-private',
49 'en-Latn-GB-boont-r-extended-sequence-x-private'
52 // Examples from BCP47 Appendix A
53 # Simple language subtag:
58 # Language subtag plus script subtag:
59 array( 'zh-hans', 'zh-Hans' ),
60 array( 'sr-cyrl', 'sr-Cyrl' ),
61 array( 'sr-latn', 'sr-Latn' ),
63 # Extended language subtags and their primary language subtag
65 array( 'zh-cmn-hans-cn', 'zh-cmn-Hans-CN' ),
66 array( 'cmn-hans-cn', 'cmn-Hans-CN' ),
67 array( 'zh-yue-hk', 'zh-yue-HK' ),
68 array( 'yue-hk', 'yue-HK' ),
70 # Language-Script-Region:
71 array( 'zh-hans-cn', 'zh-Hans-CN' ),
72 array( 'sr-latn-RS', 'sr-Latn-RS' ),
75 array( 'sl-rozaj', 'sl-rozaj' ),
76 array( 'sl-rozaj-biske', 'sl-rozaj-biske' ),
77 array( 'sl-nedis', 'sl-nedis' ),
79 # Language-Region-Variant:
80 array( 'de-ch-1901', 'de-CH-1901' ),
81 array( 'sl-it-nedis', 'sl-IT-nedis' ),
83 # Language-Script-Region-Variant:
84 array( 'hy-latn-it-arevela', 'hy-Latn-IT-arevela' ),
87 array( 'de-de', 'de-DE' ),
88 array( 'en-us', 'en-US' ),
89 array( 'es-419', 'es-419' ),
91 # Private use subtags:
92 array( 'de-ch-x-phonebk', 'de-CH-x-phonebk' ),
93 array( 'az-arab-x-aze-derbend', 'az-Arab-x-aze-derbend' ),
95 * Previous test does not reflect the BCP which states:
96 * az-Arab-x-AZE-derbend
97 * AZE being private, it should be lower case, hence the test above
99 #array( 'az-arab-x-aze-derbend', 'az-Arab-x-AZE-derbend' ),
102 # Private use registry values:
103 array( 'x-whatever', 'x-whatever' ),
104 array( 'qaa-qaaa-qm-x-southern', 'qaa-Qaaa-QM-x-southern' ),
105 array( 'de-qaaa', 'de-Qaaa' ),
106 array( 'sr-latn-qm', 'sr-Latn-QM' ),
107 array( 'sr-qaaa-rs', 'sr-Qaaa-RS' ),
109 # Tags that use extensions
110 array( 'en-us-u-islamcal', 'en-US-u-islamcal' ),
111 array( 'zh-cn-a-myext-x-private', 'zh-CN-a-myext-x-private' ),
112 array( 'en-a-myext-b-another', 'en-a-myext-b-another' ),
117 // ar-a-aaa-b-bbb-a-ccc