3 * html2text converts HTML Markup to Markdown [1]. It also supports
4 * Markdown Extra [2] by Michel Fortin [3].
6 * It started as a port of Aaron Swartz' [4] html2text.py [5] but
7 * got a long way since. This is more than a mere port now!
9 * [1]: http://daringfireball.com/projects/markdown
10 * [2]: http://www.michelf.com/projects/php-markdown/extra/
11 * [3]: http://www.michelf.com/
12 * [4]: http://www.aaronsw.com/
13 * [5]: http://www.aaronsw.com/2002/html2text/
16 * @author Milian Wolff (mail@milianw.de,http://milianw.de)
17 * @license LGPL, see LICENSE.txt and below
18 * @copyright (C) 2007 Milian Wolff
20 * This library is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU Lesser General Public
22 * License as published by the Free Software Foundation; either
23 * version 2.1 of the License, or (at your option) any later version.
25 * This library is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
28 * Lesser General Public License for more details.
30 * You should have received a copy of the GNU Lesser General Public
31 * License along with this library; if not, write to the Free Software
32 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
39 # some control structures
43 var $force_html = false;
44 var $force_html_start = array('tag'=>'','parents'=>0);
47 var $astack = array();
52 var $lastWasNL = false;
53 # indenting and appending
56 # these elements will be dropped with all subelements
63 # these elements will be quietly ignored, their children will be parsed
65 'wrapper', # important!
66 'html', # closing html tag
67 'body', # closing body tag
72 # these elements can have some attributes
73 var $has_attrs = array(
74 # tag => list of allowed attrs
81 'a' => array('href','title'),
82 'img' => array('src','alt','title'),
84 'th' => array('align'),
85 'td' => array('align'),
88 'footnote' => array('nr'),
90 'abbr' => array('title'),
91 'acronym' => array('title'),
94 var $max_len = array();
100 var $header = array();
102 var $parents = array();
104 var $abbrs = array();
106 var $buffer = array();
109 var $LINKS_EACH_PARAGRAPH;
115 * setup the xml_parser
116 * $links_each_paragraph: if set to true, the list of links will be
117 * displayed after each paragraph, else it will be displayed on the end of
119 * $body_width: if set to a integer greater 0 the output text will be
120 * wrapped to that width (in characters)
121 * $keep_html: if set to true, all unrecognized html tags will be kept, else
124 * @param bool $links_each_paragraph default true
125 * @param integer $body_width default 0
126 * @param bool $keep_html default true
129 function html2text($links_each_paragraph = true,$body_width = 0,$keep_html = true) {
130 $this->LINKS_EACH_PARAGRAPH
= $links_each_paragraph;
131 $this->BODY_WIDTH
= $body_width;
132 $this->KEEP_HTML
= $keep_html;
133 $this->xml_parser
= xml_parser_create();
134 xml_set_object($this->xml_parser
, $this);
135 xml_parser_set_option($this->xml_parser
, XML_OPTION_CASE_FOLDING
, 0);
136 xml_set_element_handler($this->xml_parser
, 'starttag', 'endtag');
137 xml_set_character_data_handler($this->xml_parser
, 'handle_data');
138 xml_set_default_handler($this->xml_parser
,'handle_default');
141 * parse a html string to text
143 * @param string $html
146 function load_string($html) {
151 # use unix style newlines
152 $html = str_replace("\r","\n",str_replace("\r\n","\n",$html));
153 # remove doctype and xml tags
154 $html = preg_replace('#^.*<body[^>]*>#Us','<html><body>',$html);
158 $html = preg_replace('/<img(?!.*\/>)([^>]*)>/Us','<img$1 />',$html);
159 $html = preg_replace('#&(?!amp;)#','&',$html);
160 $html = str_replace('<','<',$html);
161 $html = preg_replace('#<([a-z]+[^>]*) ?/>#Us','<$1 />',$html);
162 # unmatched tags (poor performance)
163 preg_match_all('#<(([a-z]|h[1-6])+)(?= |>)#',$html,$matches);
164 foreach($matches[1] as $tag){
165 $html = preg_replace('#<'.$tag.'( |>)(.*)</'.$tag.'>#Us','<'.$tag.'$1$2</'.$tag.'>',$html,1);
167 # encode < to < and & to & inside <pre>|<code>
168 $html = preg_replace_callback('#(<pre[^>]*>\s*<code[^>]*>|<code[^>]*>|<pre[^>]*>)(.*)(</pre>\s*</code>|</code>|</pre>)#Us',
171 'return $matches[1].str_replace(\'<\',\'<\',$matches[2]).$matches[3];'
173 # handle empty attributes (e.g. <input checked>)
174 $html = preg_replace_callback('#<([a-z]+)(?>[^>]* [^=]+(?> [^>]*)?) ?/?>#s',array(&$this,'parse_empty_attribs'),$html);
176 $html = '<wrapper>'.$html.'</wrapper>';
178 $html = preg_replace_callback('#<div class="footnotes">\s*<hr />\s*<ol>\s*(<li id="fn:\d+">.+</li>)\s*</ol>\s*</div>#Us',array(&$this,'footnotes'),$html);
179 # last newline inside <pre> should not be parsed
180 $html = preg_replace('#\n</code></pre>#s','</code></pre>',$html);
181 # some html elements should not be parsed if their children wont be parsed:
182 if($this->KEEP_HTML
){
183 # <ul|ol><li class="asdf">, complex because we need to handle nested lists
184 if(preg_match('#<li [^>]+>#',$html)){
185 preg_match_all('#(?:<li [^>]+>|</?(?:ul|ol)[^>]*>)#',$html,$matches,PREG_OFFSET_CAPTURE
);
188 $ins = ' forcehtml="1"';
190 foreach($matches[0] as $k => $a){
191 if(substr($a[0],0,3) == '<li'){
192 $list = &$lists[count($lists)-1];
193 if(!$list['forced']){
194 $list['forced'] = true;
195 $html = substr_replace($html,$ins,$list['offset']+
$offset,0);
199 if(substr($a[0],0,2) == '</'){ # close tag
202 array_push($lists,array(
204 'forced' => strstr($a[0],'forcehtml='),
210 # <pre><code class="asdf">
211 $html = preg_replace('#(?><pre>)\s*(<code .+>)#Us','<pre forcehtml="1">$1',$html);
214 # ok, now lets start parsing!
217 return $this->close();
222 * @param array $matches
225 function footnotes($matches){
226 # remove footnote link
227 $matches = preg_replace('@<a href="#fnref:\d+" rev="footnote"[^>]*>&#8617;</a>@U','',$matches[1]);
228 # remove empty paragraph
229 $matches = str_replace('<p></p>','',$matches);
230 # wrap in footnotes tag
231 $matches = '<footnotes>'.$matches.'</footnotes>';
232 # <li id="fn:1">...</li> -> <footnote nr="1">...</footnote>
233 $matches = str_replace('<li id="fn:','<footnote nr="',$matches);
234 return preg_replace('#</li>\s*(<footnote|</footnotes)#s','</footnote>$1',$matches);
237 * @param array $matches
240 function parse_empty_attribs($matches){
241 if(preg_match('#^<[a-z]+(?: [a-z]+=(?:"[^"]*"|\'[^\']*\'))+ ?/?>$#s',$matches[0])){
242 # mismatch, this tag is correct
245 echo dump($matches[0]);
247 $rep = $this->KEEP_HTML ?
'$1="$1"' : '';
248 return '<'.$matches[1].preg_replace('#(?<= )([^ =>]{2,})(?= |$)#Us',$rep,$matches[2]).'>';
251 * parse a html file to text
253 * @param string $file
256 function load_file($file) {
257 $contents = file_get_contents($file);
259 trigger_error('could not open XML input',E_USER_WARNING
);
262 return $this->load_string($contents);
265 * start parsing html to text
271 $html = explode("\n", $this->html
);
272 foreach ($html as $line) {
273 if (!xml_parse($this->xml_parser
, $line . "\n")) {
274 $errcode = xml_get_error_code($this->xml_parser
);
275 trigger_error(sprintf("XML error #%d: %s at line %d:<br /><pre><code>%s</code></pre>", $errcode,xml_error_string($errcode), xml_get_current_line_number($this->xml_parser
),htmlspecialchars($line)),E_USER_WARNING
);
281 * close parser and return text
287 xml_parser_free($this->xml_parser
);
289 $this->o('', false, 'end');
293 $this->outtext
= preg_replace_callback('#^(\s*)((> )+)#m',array(&$this,'cleanup_bq'),$this->outtext
);
295 $this->outtext
= str_replace('&','&',str_replace('<','<',str_replace('>','>',$this->outtext
)));
296 # empty lines (not preformatted)
297 $this->outtext
= preg_replace('#^\s{1,4}$#m','',$this->outtext
);
299 $this->outtext
= preg_replace('#^(>+)\s{1,5}$#m','$1',$this->outtext
);
300 return rtrim($this->optwrap($this->outtext
));
303 * replace "> > > " with ">>> "
305 * @param array $m matches
308 function cleanup_bq($m){
309 return $m[1].str_repeat('>',strlen($m[2])/2).' ';
312 * handles html comments
314 * @param resource $parser
315 * @param string $data
318 function handle_default($parser,$data){
319 if(substr($data,0,4) == '<!--' && substr($data,-3) == '-->'){
320 $this->outtext
.= "\n\n".$data."\n";
324 * adds pure data to the output (e.g. <p>DATA</p>)
326 * @param resource $parser
327 * @param string $data
330 function handle_data($parser, $data) {
331 $this->o($data, true);
334 * start tags (e.g. <p>)
336 * @param resource $parser
338 * @param array $attrs
341 function starttag($parser, $tag, $attrs) {
342 $this->handle_tag($tag, $attrs, true);
345 * end tags (e.g. </p>)
347 * @param resource $parser
351 function endtag($parser, $tag) {
352 $this->handle_tag($tag, null, false);
355 * force html output of all children
360 function force_html($tag){
361 $this->force_html
= true;
362 $this->force_html_start
= array(
364 'parents' => isset($this->parents
[$tag]) ?
strlen($this->parents
[$tag]) : 0
368 * parsing logic based on tag name
371 * @param array $attrs
375 function handle_tag($tag, $attrs, $start) {
376 if(in_array($tag,$this->drop
)){ # drop tags with content
384 if(in_array($tag,$this->ignore
)){ # drop tags but keep content
387 # keeping the original html
388 if($this->KEEP_HTML
){
390 # is the force html attr set?
391 if(!$this->force_html
&& isset($attrs['forcehtml'])){
392 $this->force_html($tag);
394 # we'll have to keep this tag
395 if($this->force_html
) {
396 $this->keep_tag($tag,$attrs,$start,true);
399 # tag has attrs which can't be converted
400 if(!empty($attrs) && $this->keep_tag($tag,$attrs,$start)){
405 if($this->force_html
){
406 $this->keep_tag($tag,$attrs,$start,true);
407 if($tag == $this->force_html_start
['tag'] && strlen($this->parents
[$tag]) == $this->force_html_start
['parents']){
408 $this->force_html
= false;
411 } elseif($this->parent($tag,'kept') && $this->keep_tag($tag,$attrs,$start)) {
425 $this->o(str_repeat('#', intval($tag[1])) . ' ');
426 if(!empty($attrs['id'])){
427 $this->append
= ' {#'.$attrs['id'].'}';
430 $this->out($this->append
);
453 $this->indent('> ',$start);
456 $this->out("\n\n".$this->indent
);
471 if(count($attrs) != 1 ||
!isset($attrs['id']) ||
!preg_match('#^fnref:(\d+)$#',$attrs['id'],$matches)){
473 $this->keep_tag($tag,$attrs,$start,true);
477 $this->out('[^'.$matches[1].']');
478 # omit output of link (<a href="#fn:1" rel="footnote">1</a>)
481 # last sup was not parsed -> keep tag
482 if(!$this->parent('sup')){
483 $this->keep_tag($tag,$attrs,$start);
486 # sup was parsed -> reset buffer
495 $this->o('[^'.$attrs['nr']."]:\n".$this->indent
.' ');
498 $this->indent(' ',$start);
502 # buffer to check for inline links like <foo@bar.com> and the like
503 if (isset ($attrs['href'])) {
505 array_push($this->astack
, $attrs);
507 array_push($this->astack
, null);
511 $a = array_pop($this->astack
);
514 $a['href'] = $this->decode($a['href']);
515 $buffer = $this->unbuffer();
516 $buffer_check = $this->decode(trim($buffer));
517 if((substr($a['href'],0,7) == 'mailto:' && 'mailto:'.$buffer_check == $a['href']) ||
$a['href'] == $buffer_check){
519 $this->out('<'.$buffer_check.'>',true);
522 $this->previousIndex($a);
523 $this->out('['.$buffer.']['.$a['count'].']',true);
534 array_push($this->abbrs
,isset($attrs['title'])?
$attrs['title']:'');
536 $abbr = $this->unbuffer();
537 $def = array_pop($this->abbrs
);
538 # only add abbr if its not already defined
539 if(!isset($this->abbrs
[$abbr])){
540 $this->abbrs
[$abbr] = $def;
547 if (isset ($attrs['src'])) {
548 $attrs['href'] = $attrs['src'];
550 if (isset ($attrs['alt'])) {
551 $alt = $attrs['alt'];
552 } elseif(isset($attrs['title'])){
553 $alt = $attrs['title'];
555 $this->previousIndex($attrs);
556 $this->o('!['.$alt.'][' . $attrs['count'] . ']');
561 # do we have to keep this tag?
562 # or is a parent <pre> element existing?
563 if($this->keep_tag($tag,$attrs,$start) ||
$this->parent('pre')){
566 # convert to `code` and handle backticks inside code block
567 # <code>foo`bar</code> has to get ``foo`bar`` and so forth
571 $str = $this->unbuffer();
572 preg_match_all('#`+#',$str,$matches);
573 if(!empty($matches[0])){
575 $len = strlen($matches[0][0])+
1;
579 $ticks = str_repeat('`',$len);
580 $this->out($ticks.$str.$ticks);
584 # note: if <dl> gets parsed, its direct children (<dd> and <dt>) will be parsed as well
590 # is the parent dl parsed?
591 if(!$this->parent('dl')){
592 $this->keep_tag($tag,$attrs,$start,true);
599 $this->outtext
.= "\n";
602 $this->indent(' ',$start);
605 # is the parent dl parsed?
606 if(!$this->parent('dl')){
607 $this->keep_tag($tag,$attrs,$start,true);
616 # note: if this element gets parsed, its direct children <li>s will be parsed as well
618 array_push($this->list, array (
623 array_pop($this->list);
629 $li = &$this->list[count($this->list) - 1];
631 # not inside a list or the list tag was not parsed
632 if(!isset($li) ||
!$this->parent($li['name'])){
633 $this->keep_tag($tag,$attrs,$start,true);
638 if($li['name'] == 'ul'){
643 * @todo line up <ol><li>s > 9 correctly.
645 $this->o($li['num'].'. ');
648 $this->indent(' ',$start);
650 $this->indent(' ',$start);
654 # NOTE: if the <table> tag gets parsed, all its children will be as well!
656 # finally: parse the whole table
658 $this->outtext
.= "\n\n";
659 $separator = array();
660 # seperator with correct align identifikators
661 foreach($this->cols
as $col => $arr){
662 $this->max_len
[$col] = max($arr);
664 switch($this->align
[$col]){
674 array_push($separator,$left.str_repeat('-',$this->max_len
[$col]).$right);
676 $separator = '| '.implode(' | ',$separator).' |';
678 array_walk($this->rows
,array(&$this,'fill_td'));
680 foreach($rows as $row => $cols){
682 $this->o('| '.implode(' | ',$cols).' |');
683 if(in_array($row,$this->header
)){
685 $this->o($separator);
688 $this->cols
= array();
689 $this->rows
= array();
690 $this->align
= array();
695 # not inside a table or the parent table was not parsed
696 if(!$this->parent('table')){
697 $this->keep_tag($tag,$attrs,$start,true);
707 # not inside a table or the parent table was not parsed
708 if(!$this->parent('table')){
709 $this->keep_tag($tag,$attrs,$start,true);
713 if(!in_array($this->row
,$this->header
)){
714 array_push($this->header
,$this->row
);
717 $this->align
[$this->col
] = !empty($attrs['align']) ?
$attrs['align'] : null;
721 # not inside a table or the parent table was not parsed
722 if(!$this->parent('table')){
723 $this->keep_tag($tag,$attrs,$start,true);
728 if(!empty($attrs['align']) && is_null($this->align
[$this->col
])){
729 $this->align
[$this->col
] = $attrs['align'];
730 if($attrs['align'] == 'center'){
731 $this->max_len
[$this->col
] +
=2;
737 $this->indent(' ',$start,true);
743 $this->keep_tag($tag,$attrs,$start,true);
746 # if we want to keep all non convertible html this function has to know if some parent elemts
747 # were parsed or not (also some elements need to know if)
749 if(!isset($this->parents
[$tag])){
750 $this->parents
[$tag] = '1';
752 $this->parents
[$tag] .= '1';
755 if($this->LINKS_EACH_PARAGRAPH
&& in_array($tag,array('p','ul','blockquote','ol','dl','table','h1','h2','h3','h4','h5','h6'))){
758 $this->parents
[$tag] = substr($this->parents
[$tag],0,-1);
763 * adds a string to the output ($this->outtext)
764 * also copes with tables
771 if($this->buffer_lvl
){
772 $this->buffer
[$this->buffer_lvl
] .= $str;
775 # this is for tables (see php markdown extra by michel fortin)
776 if(($this->parent('th') ||
$this->parent('td'))){
778 if(!isset($this->rows
[$this->row
][$this->col
])){
779 $this->rows
[$this->row
][$this->col
] = $str;
781 $this->rows
[$this->row
][$this->col
] .= $str;
783 if(!isset($this->cols
[$this->col
][$this->row
])){
784 $this->cols
[$this->col
][$this->row
] = strlen($str);
786 $this->cols
[$this->col
][$this->row
] +
= strlen($str);
790 $this->outtext
.= $str;
793 * further parse the output and add newlines, remove whitespaces and such
795 * @param string $data
796 * @param bool $puredata
797 * @param string $force
800 function o($data, $puredata = false, $force = false) {
801 if($this->parent('table') && trim($data) == ''){ # drop whitespaces inside tables
803 } elseif ($puredata && !$this->parent('code','both') && !$this->parent('pre','both')) { # keep whitespace for code
804 $data = preg_replace('#\s+#', ' ', $data);
806 if (!$data && !$force) {
809 if (!empty($this->indent
)) {
810 $data = str_replace("\n", "\n".$this->indent
, $data);
819 if ($force == 'end') {
828 $data = ltrim($data);
829 $this->out(str_repeat("\n".$this->indent
, $this->p_p
));
834 $this->lastWasNL
= substr($data, -1) == "\n";
838 * display block links after paragraph etc.
847 return; # no links stored
851 foreach($this->a
as $links){
855 foreach($links as $link){
856 $a = $pre.' [' . $link['count'] . ']: ' . $link['href'];
857 if (isset ($link['title'])) {
858 $a .= ' (' . $link['title'] . ')';
865 $this->lastWasNL
= true;
874 if(empty($this->abbrs
)){
875 return; # no abbrs stored
878 foreach($this->abbrs
as $abbr => $def){
879 $this->out('*['.$abbr.']: '.$def."\n");
881 $this->abbrs
= array();
883 $this->lastWasNL
= true;
886 * if the link is already set use its count, else increase acount
888 * @param array &$attrs link attributes
891 function previousIndex(&$attrs) {
892 # check for existing link
893 if(isset($this->a
[$attrs['href']])){
894 foreach($this->a
[$attrs['href']] as $a){
895 if (!empty($attrs['title']) ||
!empty($a['title'])){
896 if($a['title'] == $attrs['title']) {
906 # if we come here, no matching link was found
908 $attrs['count'] = $this->acount
;
909 if(isset($this->a
[$attrs['href']])){
910 array_push($this->a
[$attrs['href']],$attrs);
912 $this->a
[$attrs['href']] = array($attrs);
916 * handles bad html to avoid xml parse errors
918 * @param string $html
921 function handle_bad_html($html){
922 return preg_replace_callback('#<([a-z1-6]+)( [^>]*)?>(.*(?R).*)</\\1>#Us',array(&$this,'replace_bad_html'),$html);
925 * callback function which is used in handle_bad_html()
927 * @param array $matches
930 function replace_bad_html($matches){
932 $matches[3] = $this->handle_bad_html($matches[3]);
933 return '<'.$matches[1].$matches[2].'>'.$matches[3].'</'.$matches[1].'>';
936 * if the option BODY_WIDTH is set, this option will wrap text to the
939 * @param string $text
942 * @todo wrapping of code (also kept code blocks)
944 function optwrap($text) {
945 if ($this->BODY_WIDTH
< 30) {
949 $split = explode("\n", $text);
950 foreach ($split as $para) {
951 if (strlen($para) > 0) {
952 if (preg_match('#^(\s*): #',$para,$indent)) { # definition lists
953 $indent = isset($indent[1]) ?
$indent[1] : '';
954 $result .= wordwrap($para, $this->BODY_WIDTH
- strlen($indent) - 4, "\n".$indent.' ')."\n";
955 } elseif(preg_match('#^(\s*>+)#',$para,$indent)){ # blockquote
956 $result .= wordwrap($para,$this->BODY_WIDTH
- (strlen($indent[0])+
1),"\n".$indent[0].' ')."\n";
957 } elseif(preg_match('#^\s*\|#',$para)){ # table
958 $result .= $para."\n"; # dont wrap
959 } elseif(preg_match('#^(\s*)\*#',$para,$indent)) { # list item @todo: ol
960 $indent = isset($indent[1]) ?
$indent[1] : '';
962 $result .= wordwrap($para,$this->BODY_WIDTH
- strlen($indent),"\n".$indent). "\n";
963 } elseif(preg_match('#^ \[[^\]]+\]:#',$para)){ # block links
964 # don't wrap at the moment
965 $result .= $para."\n";
967 } else { # something else
968 preg_match('#^\s+#',$para,$indent);
969 $indent = isset($indent[0]) ?
$indent[0] : '';
970 $result .= wordwrap($para,$this->BODY_WIDTH
- strlen($indent),"\n".$indent). "\n";
979 * handles html tags which are not represented by the parser logic
980 * if $this->KEEP_HTML is set to true, the tag will be appended to the
981 * output and `markdown="1"` added to its attributes
984 * @param array $attrs
986 * @param array $known_attrs these attrs can be handled by markdown
989 function keep_tag($tag,$attrs,$start,$force = false){
990 if(!$force && !$this->KEEP_HTML
){
996 # if there is a attr which cannot be handled by markdown
997 # this tag will be kept.
998 if(isset($this->has_attrs
[$tag])){
999 $known_attrs = $this->has_attrs
[$tag];
1001 $known_attrs = array();
1003 if(!$force && count($known_attrs) >= count($attrs)){
1004 if(empty($attrs) ||
count(array_diff(array_keys($attrs),$known_attrs)) == 0){
1005 # tag can be handled by markdown!
1011 foreach($attrs as $key => $value){
1012 if($key == 'forcehtml'){
1015 $attr.=' '.$key.'="'.$value.'"';
1018 if(!$force && in_array($tag,array('div','center','li','dt','dd'))){
1019 $attr.= ' markdown="1"';
1020 } elseif(!$this->force_html
) {
1021 $this->force_html($tag);
1023 $this->o('<'.$tag.$attr.'>',true);
1024 # add to list of parents:
1025 if(isset($this->parents
[$tag])){
1026 $this->parents
[$tag] .= '2';
1028 $this->parents
[$tag] = '2';
1032 if(!$force && !$this->parent($tag,'kept')){
1033 # the start tag of this element was not parsed
1036 $this->o('</'.$tag.'>');
1037 $this->parents
[$tag] = substr($this->parents
[$tag],0,-1);
1038 # newlines after </tag>
1039 if(in_array($tag,array('th','td','dt','dd','li','p'))){
1043 # newlines after <tag> and </tag>
1044 if(in_array($tag,array('div','center','table','tr','ul','ol','dl','pre'))){
1050 * outputs a cell widened to the proper width
1052 * @param array &$row
1055 function fill_td(&$row){
1057 foreach($row as $col => $cont){
1058 $width = $this->max_len
[$col];
1059 switch($this->align
[$col]){
1062 $row[$col] = str_pad($row[$col],$width,' ',STR_PAD_BOTH
);
1067 $row[$col] = str_pad($row[$col],$width,' ');
1071 $row[$col] = str_pad($row[$col],$width,' ',STR_PAD_LEFT
);
1077 * some sort of <br />
1083 if ($this->p_p
== 0) {
1088 * text <p> (e.g. newlines after output)
1094 if($this->parent('table')){
1100 * add $indent before each line
1102 * @param string $indent
1103 * @param bool $start wether it's an opening tag or a closing one
1104 * @param bool $output shall $indent be outputted? (only if $start is true)
1107 function indent($indent,$start,$output=false){
1112 $this->indent
.= $indent;
1114 $len = strlen($indent);
1115 if($len >= strlen($this->indent
)){
1118 $this->indent
= substr($this->indent
,0,-$len);
1123 * checks if a parent element exists
1124 * use $type to check for a parsed parent element or a kept element
1125 * @param string $parent name of the parent tag
1126 * @param string $type either 'parsed' or 'kept' or 'both'
1129 function parent($parent,$type = 'parsed'){
1130 if(!isset($this->parents
[$parent])){
1133 if($type != 'both'){
1134 $type = $type == 'parsed' ?
'1' : '2';
1135 return substr($this->parents
[$parent],-1) === $type;
1137 return !empty($this->parents
[$parent]);
1148 $this->out(str_repeat("\n".$this->indent
, $this->p_p
));
1151 $this->buffer_lvl++
;
1152 $this->buffer
[$this->buffer_lvl
] = '';
1155 * end buffer and return buffered output
1160 function unbuffer(){
1161 $out = $this->buffer
[$this->buffer_lvl
];
1162 unset($this->buffer
[$this->buffer_lvl
]);
1163 $this->buffer_lvl
--;
1169 * @author derernst@gmx.ch <http://www.php.net/manual/en/function.html-entity-decode.php#68536>
1171 function decode($text,$quote_style = ENT_NOQUOTES
){
1172 if (function_exists('html_entity_decode')) {
1173 $text = html_entity_decode($text, $quote_style, 'ISO-8859-1'); // NOTE: UTF-8 does not work!
1176 $trans_tbl = get_html_translation_table(HTML_ENTITIES
, $quote_style);
1177 $trans_tbl = array_flip($trans_tbl);
1178 $text = strtr($text, $trans_tbl);
1180 $text = preg_replace('~&#x([0-9a-f]+);~ei', 'chr(hexdec("\\1"))', $text);
1181 $text = preg_replace('~&#([0-9]+);~e', 'chr("\\1")', $text);