[GENERIC] Zend_Translate:
[zend.git] / documentation / manual / en / module_specs / Zend_Locale-Parsing.xml
blobf4c26d656974b821316f9a3ffda3097b1885f083
1 <?xml version="1.0" encoding="UTF-8"?>
2 <!-- Reviewed: no -->
3 <sect1 id="zend.locale.parsing">
4     <title>Normalization and Localization</title>
6     <para>
7         <classname>Zend_Locale_Format</classname> is a internal component used by
8         <classname>Zend_Locale</classname>. All locale aware classes use
9         <classname>Zend_Locale_Format</classname> for normalization and localization of numbers and
10         dates. Normalization involves parsing input from a variety of data representations, like
11         dates, into a standardized, structured representation, such as a <acronym>PHP</acronym>
12         array with year, month, and day elements.
13     </para>
15     <para>
16         The exact same string containing a number or a date might mean different things to people
17         with different customs and conventions. Disambiguation of numbers and dates requires rules
18         about how to interpret these strings and normalize the values into a standardized data
19         structure. Thus, all methods in <classname>Zend_Locale_Format</classname> require a locale
20         in order to parse the input data.
22         <note>
23             <title>Default "root" Locale</title>
25             <para>
26                 If no locale is specified, then normalization and localization will use the standard
27                 "root" locale, which might yield unexpected behavior, if the input originated in a
28                 different locale, or output for a specific locale was expected.
29             </para>
30         </note>
31     </para>
33     <sect2 id="zend.locale.number.normalize">
34         <title>Number normalization: getNumber($input, Array $options)</title>
36         <para>
37             There are many <ulink url="http://en.wikipedia.org/wiki/Numeral">number systems</ulink>
38             different from the common <ulink
39                 url="http://en.wikipedia.org/wiki/Decimal">decimal system</ulink> (e.g. "3.14").
40             Numbers can be normalized with the <methodname>getNumber()</methodname> function to
41             obtain the standard decimal representation. for all number-related discussions in this
42             manual, <ulink
43                 url="http://en.wikipedia.org/wiki/Arabic_numerals">Arabic/European numerals
44                 (0,1,2,3,4,5,6,7,8,9)</ulink> are implied, unless explicitly stated otherwise. The
45             options array may contain a 'locale' to define grouping and decimal characters. The
46             array may also have a 'precision' to truncate excess digits from the result.
47         </para>
49         <example id="zend.locale.number.normalize.example-1">
50             <title>Number normalization</title>
52             <programlisting language="php"><![CDATA[
53 $locale = new Zend_Locale('de_AT');
54 $number = Zend_Locale_Format::getNumber('13.524,678',
55                                         array('locale' => $locale,
56                                               'precision' => 3)
57                                        );
59 print $number; // will return 13524.678
60 ]]></programlisting>
61         </example>
63         <sect3 id="zend.locale.number.normalize.precision">
64             <title>Precision and Calculations</title>
66             <para>
67                 Since <methodname>getNumber($value, array $options = array())</methodname> can
68                 normalize extremely large numbers, check the result carefully before using finite
69                 precision calculations, such as ordinary <acronym>PHP</acronym> math operations. For
70                 example, <code>if ((string)int_val($number) != $number) { use <ulink
71                     url="http://www.php.net/bc">BCMath</ulink> or <ulink
72                     url="http://www.php.net/gmp">GMP</ulink></code>. Most <acronym>PHP</acronym>
73                 installations support the BCMath extension.
74             </para>
76             <para>
77                 Also, the precision of the resulting decimal representation can be rounded to a
78                 desired length with <methodname>getNumber()</methodname> with the option
79                 <code>'precision'</code>. If no precision is given, no rounding occurs. Use only
80                 <acronym>PHP</acronym> integers to specify the precision.
81             </para>
83             <para>
84                 If the resulting decimal representation should be truncated to a desired length
85                 instead of rounded the option <code>'number_format'</code> can be used instead.
86                 Define the length of the decimal representation with the desired length of zeros.
87                 The result will then not be rounded. So if the defined precision within
88                 <code>number_format</code> is zero the value "1.6" will return "1", not "2. See the
89                 example nearby:
90             </para>
92             <example id="zend.locale.number.normalize.precision.example-1">
93                 <title>Number normalization with precision</title>
95                 <programlisting language="php"><![CDATA[
96 $locale = new Zend_Locale('de_AT');
97 $number = Zend_Locale_Format::getNumber('13.524,678',
98                                         array('precision' => 1,
99                                               'locale' => $locale)
100                                        );
101 print $number; // will return 13524.7
103 $number = Zend_Locale_Format::getNumber('13.524,678',
104                                         array('number_format' => '#.00',
105                                               'locale' => $locale)
106                                        );
107 print $number; // will return 13524.67
108 ]]></programlisting>
109             </example>
110         </sect3>
111     </sect2>
113     <sect2 id="zend.locale.number.localize">
114         <title>Number localization</title>
116         <para>
117             <methodname>toNumber($value, array $options = array())</methodname> can localize numbers
118             to the following <link linkend="zend.locale.appendix">supported locales</link>. This
119             function will return a localized string of the given number in a conventional format for
120             a specific locale. The 'number_format' option explicitly specifies a non-default number
121             format for use with <methodname>toNumber()</methodname>.
122         </para>
124         <example id="zend.locale.number.localize.example-1">
125             <title>Number localization</title>
127             <programlisting language="php"><![CDATA[
128 $locale = new Zend_Locale('de_AT');
129 $number = Zend_Locale_Format::toNumber(13547.36,
130                                        array('locale' => $locale));
132 // will return 13.547,36
133 print $number;
134 ]]></programlisting>
135         </example>
137         <para>
138             <note>
139                 <title>Unlimited length</title>
141                 <para>
142                     <methodname>toNumber()</methodname> can localize numbers with unlimited length.
143                     It is not related to integer or float limitations.
144                 </para>
145             </note>
146         </para>
148         <para>
149             The same way as within <methodname>getNumber()</methodname>,
150             <methodname>toNumber()</methodname> handles precision. If no precision is given, the
151             complete localized number will be returned.
152         </para>
154         <example id="zend.locale.number.localize.example-2">
155             <title>Number localization with precision</title>
157             <programlisting language="php"><![CDATA[
158 $locale = new Zend_Locale('de_AT');
159 $number = Zend_Locale_Format::toNumber(13547.3678,
160                                        array('precision' => 2,
161                                              'locale' => $locale));
163 // will return 13.547,37
164 print $number;
165 ]]></programlisting>
166         </example>
168         <para>
169             Using the option 'number_format' a self defined format for generating a number can be
170             defined. The format itself has to be given in <acronym>CLDR</acronym> format as
171             described below. The locale is used to get separation, precision and other number
172             formatting signs from it. German for example defines ',' as precision separation and in
173             English the '.' sign is used.
174         </para>
176         <table id="zend.locale.number.localize.table-1">
177             <title>Format tokens for self generated number formats</title>
179             <tgroup cols="4">
180                 <thead>
181                     <row>
182                         <entry>Token</entry>
183                         <entry>Description</entry>
184                         <entry>Example format</entry>
185                         <entry>Generated output</entry>
186                     </row>
187                 </thead>
189                 <tbody>
190                     <row>
191                         <entry>#0</entry>
192                         <entry>Generates a number without precision and separation</entry>
193                         <entry>#0</entry>
194                         <entry>1234567</entry>
195                     </row>
197                     <row>
198                         <entry>,</entry>
200                         <entry>
201                             Generates a separation with the length from separation to next
202                             separation or to 0
203                         </entry>
205                         <entry>#,##0</entry>
206                         <entry>1,234,567</entry>
207                     </row>
209                     <row>
210                         <entry>#,##,##0</entry>
212                         <entry>
213                             Generates a standard separation of 3 and all following separations with
214                             2
215                         </entry>
217                         <entry>#,##,##0</entry>
218                         <entry>12,34,567</entry>
219                     </row>
221                     <row>
222                         <entry>.</entry>
223                         <entry>Generates a precision</entry>
224                         <entry>#0.#</entry>
225                         <entry>1234567.1234</entry>
226                     </row>
228                     <row>
229                         <entry>0</entry>
230                         <entry>Generates a precision with a defined length</entry>
231                         <entry>#0.00</entry>
232                         <entry>1234567.12</entry>
233                     </row>
234                 </tbody>
235             </tgroup>
236         </table>
238         <example id="zend.locale.number.localize.example-3">
239             <title>Using a self defined number format</title>
241             <programlisting language="php"><![CDATA[
242 $locale = new Zend_Locale('de_AT');
243 $number = Zend_Locale_Format::toNumber(13547.3678,
244                                        array('number_format' => '#,#0.00',
245                                              'locale' => 'de')
246                                       );
248 // will return 1.35.47,36
249 print $number;
251 $number = Zend_Locale_Format::toNumber(13547.3,
252                                        array('number_format' => '#,##0.00',
253                                              'locale' => 'de')
254                                        );
256 // will return 13.547,30
257 print $number;
258 ]]></programlisting>
259         </example>
260     </sect2>
262     <sect2 id="zend.locale.number.test">
263         <title>Number testing</title>
265         <para>
266             <methodname>isNumber($value, array $options = array())</methodname> checks if a given
267             string is a number and returns <constant>TRUE</constant> or <constant>FALSE</constant>.
268         </para>
270         <example id="zend.locale.number.test.example-1">
271             <title>Number testing</title>
273             <programlisting language="php"><![CDATA[
274 $locale = new Zend_Locale();
275 if (Zend_Locale_Format::isNumber('13.445,36', array('locale' => 'de_AT')) {
276     print "Number";
277 } else {
278     print "not a Number";
280 ]]></programlisting>
281         </example>
282     </sect2>
284     <sect2 id="zend.locale.float.normalize">
285         <title>Float value normalization</title>
287         <para>
288             Floating point values can be parsed with the
289             <methodname>getFloat($value, array $options = array())</methodname> function. A floating
290             point value will be returned.
291         </para>
293         <example id="zend.locale.float.normalize.example-1">
294             <title>Floating point value normalization</title>
296             <programlisting language="php"><![CDATA[
297 $locale = new Zend_Locale('de_AT');
298 $number = Zend_Locale_Format::getFloat('13.524,678',
299                                        array('precision' => 2,
300                                              'locale' => $locale)
301                                       );
303 // will return 13524.68
304 print $number;
305 ]]></programlisting>
306         </example>
307     </sect2>
309     <sect2 id="zend.locale.float.localize">
310         <title>Floating point value localization</title>
312         <para>
313             <methodname>toFloat()</methodname> can localize floating point values. This function
314             will return a localized string of the given number.
315         </para>
317         <example id="zend.locale.float.localize.example-1">
318             <title>Floating point value localization</title>
320             <programlisting language="php"><![CDATA[
321 $locale = new Zend_Locale('de_AT');
322 $number = Zend_Locale_Format::toFloat(13547.3655,
323                                       array('precision' => 1,
324                                             'locale' => $locale)
325                                      );
327 // will return 13.547,4
328 print $number;
329 ]]></programlisting>
330         </example>
331     </sect2>
333     <sect2 id="zend.locale.float.test">
334         <title>Floating point value testing</title>
336         <para>
337             <methodname>isFloat($value, array $options = array())</methodname> checks if a given
338             string is a floating point value and returns <constant>TRUE</constant> or
339             <constant>FALSE</constant>.
340         </para>
342         <example id="zend.locale.float.test.example-1">
343             <title>Floating point value testing</title>
345             <programlisting language="php"><![CDATA[
346 $locale = new Zend_Locale('de_AT');
347 if (Zend_Locale_Format::isFloat('13.445,36', array('locale' => $locale)) {
348     print "float";
349 } else {
350     print "not a float";
352 ]]></programlisting>
353         </example>
354     </sect2>
356     <sect2 id="zend.locale.integer.normalize">
357         <title>Integer value normalization</title>
359         <para>
360             Integer values can be parsed with the <methodname>getInteger()</methodname> function. A
361             integer value will be returned.
362         </para>
364         <example id="zend.locale.integer.normalize.example-1">
365             <title>Integer value normalization</title>
367             <programlisting language="php"><![CDATA[
368 $locale = new Zend_Locale('de_AT');
369 $number = Zend_Locale_Format::getInteger('13.524,678',
370                                          array('locale' => $locale));
372 // will return 13524
373 print $number;
374 ]]></programlisting>
375         </example>
376     </sect2>
378     <sect2 id="zend.locale.integer.localize">
379         <title>Integer point value localization</title>
381         <para>
382             <methodname>toInteger($value, array $options = array())</methodname> can localize
383             integer values. This function will return a localized string of the given number.
384         </para>
386         <example id="zend.locale.integer.localize.example-1">
387             <title>Integer value localization</title>
389             <programlisting language="php"><![CDATA[
390 $locale = new Zend_Locale('de_AT');
391 $number = Zend_Locale_Format::toInteger(13547.3655,
392                                         array('locale' => $locale));
394 // will return 13.547
395 print $number;
396 ]]></programlisting>
397         </example>
398     </sect2>
400     <sect2 id="zend.locale.integer.test">
401         <title>Integer value testing</title>
403         <para>
404             <methodname>isInteger($value, array $options = array())</methodname> checks if a given
405             string is a integer value and returns <constant>TRUE</constant> or
406             <constant>FALSE</constant>.
407         </para>
409         <example id="zend.locale.integer.test.example-1">
410             <title>Integer value testing</title>
412             <programlisting language="php"><![CDATA[
413 $locale = new Zend_Locale('de_AT');
414 if (Zend_Locale_Format::isInteger('13.445', array('locale' => $locale)) {
415     print "integer";
416 } else {
417     print "not a integer";
419 ]]></programlisting>
420         </example>
421     </sect2>
423     <sect2 id="zend.locale.numbersystems">
424         <title>Numeral System Conversion</title>
426         <para>
427             <methodname>Zend_Locale_Format::convertNumerals()</methodname> converts digits between
428             different <ulink url="http://en.wikipedia.org/wiki/Arabic_numerals">numeral
429                 systems</ulink>, including the standard Arabic/European/Latin numeral system
430             (0,1,2,3,4,5,6,7,8,9), not to be confused with <ulink
431                 url="http://en.wikipedia.org/wiki/Eastern_Arabic_numerals">Eastern Arabic
432                 numerals</ulink> sometimes used with the Arabic language to express numerals.
433             Attempts to use an unsupported numeral system will result in an exception, to avoid
434             accidentally performing an incorrect conversion due to a spelling error. All characters
435             in the input, which are not numerals for the selected numeral system, are copied to the
436             output with no conversion provided for unit separator characters.
437             <classname>Zend_Locale</classname>* components rely on the data provided by
438             <acronym>CLDR</acronym> (see their <ulink
439                 url="http://unicode.org/cldr/data/diff/supplemental/languages_and_scripts.html?sortby=date">list
440                 of scripts grouped by language</ulink>).
441         </para>
443         <para>
444             In <acronym>CLDR</acronym> and hereafter, the Europena/Latin numerals will
445             be referred to as "Latin" or by the assigned 4-letter code "Latn".
446             Also, the <acronym>CLDR</acronym> refers to this numeral systems as "scripts".
447         </para>
449         <para>
450             Suppose a web form collected a numeric input expressed using Eastern Arabic digits
451             "١‎٠٠". Most software and <acronym>PHP</acronym> functions expect input using Arabic
452             numerals. Fortunately, converting this input to its equivalent Latin numerals "100"
453             requires little effort using <methodname>convertNumerals($inputNumeralString,
454                 $sourceNumeralSystem, $destNumeralSystem)</methodname>, which returns the
455             <varname>$input</varname> with numerals in the script
456             <varname>$sourceNumeralSystem</varname> converted to the script
457             <varname>$destNumeralSystem</varname>.
458         </para>
460         <example id="zend.locale.numbersystems.example-1">
461             <title>Converting numerals from Eastern Arabic scripts to European/Latin scripts</title>
463             <programlisting language="php"><![CDATA[
464 $arabicScript = "١‎٠٠";   // Arabic for "100" (one hundred)
465 $latinScript = Zend_Locale_Format::convertNumerals($arabicScript,
466                                                    'Arab',
467                                                    'Latn');
469 print "\nOriginal:   " . $arabicScript;
470 print "\nNormalized: " . $latinScript;
471 ]]></programlisting>
472         </example>
474         <para>
475             Similarly, any of the supported numeral systems may be converted to any other supported
476             numeral system.
477         </para>
479         <example id="zend.locale.numbersystems.example-2">
480             <title>Converting numerals from Latin script to Eastern Arabic script</title>
482             <programlisting language="php"><![CDATA[
483 $latinScript = '123';
484 $arabicScript = Zend_Locale_Format::convertNumerals($latinScript,
485                                                     'Latn',
486                                                     'Arab');
488 print "\nOriginal:  " . $latinScript;
489 print "\nLocalized: " . $arabicScript;
490 ]]></programlisting>
491         </example>
493         <example id="zend.locale.numbersystems.example-3">
494             <title>
495                 Getting 4 letter CLDR script code using a native-language name of the script
496             </title>
498             <programlisting language="php"><![CDATA[
499 function getScriptCode($scriptName, $locale)
501     $scripts2names = Zend_Locale_Data::getList($locale, 'script');
502     $names2scripts = array_flip($scripts2names);
503     return $names2scripts[$scriptName];
505 echo getScriptCode('Latin', 'en'); // outputs "Latn"
506 echo getScriptCode('Tamil', 'en'); // outputs "Taml"
507 echo getScriptCode('tamoul', 'fr'); // outputs "Taml"
508 ]]></programlisting>
509         </example>
511         <para>
512             For a list of supported numeral systems call
513             <methodname>Zend_Locale::getTranslationList('numberingsystem', 'en')</methodname>.
514         </para>
515     </sect2>
516 </sect1>
517 <!--
518 vim:se ts=4 sw=4 et: