third_party/libxml/src/HTMLtree.c

   1 /*
   2  * HTMLtree.c : implementation of access function for an HTML tree.
   3  *
   4  * See Copyright for the status of this software.
   5  *
   6  * daniel@veillard.com
   7  */
   8
   9
  10 #define IN_LIBXML
  11 #include "libxml.h"
  12 #ifdef LIBXML_HTML_ENABLED
  13
  14 #include <string.h> /* for memset() only ! */
  15
  16 #ifdef HAVE_CTYPE_H
  17 #include <ctype.h>
  18 #endif
  19 #ifdef HAVE_STDLIB_H
  20 #include <stdlib.h>
  21 #endif
  22
  23 #include <libxml/xmlmemory.h>
  24 #include <libxml/HTMLparser.h>
  25 #include <libxml/HTMLtree.h>
  26 #include <libxml/entities.h>
  27 #include <libxml/valid.h>
  28 #include <libxml/xmlerror.h>
  29 #include <libxml/parserInternals.h>
  30 #include <libxml/globals.h>
  31 #include <libxml/uri.h>
  32
  33 /************************************************************************
  34  *                                                                      *
  35  *              Getting/Setting encoding meta tags                      *
  36  *                                                                      *
  37  ************************************************************************/
  38
  39 /**
  40  * htmlGetMetaEncoding:
  41  * @doc:  the document
  42  *
  43  * Encoding definition lookup in the Meta tags
  44  *
  45  * Returns the current encoding as flagged in the HTML source
  46  */
  47 const xmlChar *
  48 htmlGetMetaEncoding(htmlDocPtr doc) {
  49     htmlNodePtr cur;
  50     const xmlChar *content;
  51     const xmlChar *encoding;
  52
  53     if (doc == NULL)
  54         return(NULL);
  55     cur = doc->children;
  56
  57     /*
  58      * Search the html
  59      */
  60     while (cur != NULL) {
  61         if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
  62             if (xmlStrEqual(cur->name, BAD_CAST"html"))
  63                 break;
  64             if (xmlStrEqual(cur->name, BAD_CAST"head"))
  65                 goto found_head;
  66             if (xmlStrEqual(cur->name, BAD_CAST"meta"))
  67                 goto found_meta;
  68         }
  69         cur = cur->next;
  70     }
  71     if (cur == NULL)
  72         return(NULL);
  73     cur = cur->children;
  74
  75     /*
  76      * Search the head
  77      */
  78     while (cur != NULL) {
  79         if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
  80             if (xmlStrEqual(cur->name, BAD_CAST"head"))
  81                 break;
  82             if (xmlStrEqual(cur->name, BAD_CAST"meta"))
  83                 goto found_meta;
  84         }
  85         cur = cur->next;
  86     }
  87     if (cur == NULL)
  88         return(NULL);
  89 found_head:
  90     cur = cur->children;
  91
  92     /*
  93      * Search the meta elements
  94      */
  95 found_meta:
  96     while (cur != NULL) {
  97         if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
  98             if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
  99                 xmlAttrPtr attr = cur->properties;
 100                 int http;
 101                 const xmlChar *value;
 102
 103                 content = NULL;
 104                 http = 0;
 105                 while (attr != NULL) {
 106                     if ((attr->children != NULL) &&
 107                         (attr->children->type == XML_TEXT_NODE) &&
 108                         (attr->children->next == NULL)) {
 109                         value = attr->children->content;
 110                         if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
 111                          && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
 112                             http = 1;
 113                         else if ((value != NULL)
 114                          && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
 115                             content = value;
 116                         if ((http != 0) && (content != NULL))
 117                             goto found_content;
 118                     }
 119                     attr = attr->next;
 120                 }
 121             }
 122         }
 123         cur = cur->next;
 124     }
 125     return(NULL);
 126
 127 found_content:
 128     encoding = xmlStrstr(content, BAD_CAST"charset=");
 129     if (encoding == NULL)
 130         encoding = xmlStrstr(content, BAD_CAST"Charset=");
 131     if (encoding == NULL)
 132         encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
 133     if (encoding != NULL) {
 134         encoding += 8;
 135     } else {
 136         encoding = xmlStrstr(content, BAD_CAST"charset =");
 137         if (encoding == NULL)
 138             encoding = xmlStrstr(content, BAD_CAST"Charset =");
 139         if (encoding == NULL)
 140             encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
 141         if (encoding != NULL)
 142             encoding += 9;
 143     }
 144     if (encoding != NULL) {
 145         while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
 146     }
 147     return(encoding);
 148 }
 149
 150 /**
 151  * htmlSetMetaEncoding:
 152  * @doc:  the document
 153  * @encoding:  the encoding string
 154  *
 155  * Sets the current encoding in the Meta tags
 156  * NOTE: this will not change the document content encoding, just
 157  * the META flag associated.
 158  *
 159  * Returns 0 in case of success and -1 in case of error
 160  */
 161 int
 162 htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
 163     htmlNodePtr cur, meta = NULL, head = NULL;
 164     const xmlChar *content = NULL;
 165     char newcontent[100];
 166
 167
 168     if (doc == NULL)
 169         return(-1);
 170
 171     /* html isn't a real encoding it's just libxml2 way to get entities */
 172     if (!xmlStrcasecmp(encoding, BAD_CAST "html"))
 173         return(-1);
 174
 175     if (encoding != NULL) {
 176         snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
 177                 (char *)encoding);
 178         newcontent[sizeof(newcontent) - 1] = 0;
 179     }
 180
 181     cur = doc->children;
 182
 183     /*
 184      * Search the html
 185      */
 186     while (cur != NULL) {
 187         if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
 188             if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
 189                 break;
 190             if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
 191                 goto found_head;
 192             if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
 193                 goto found_meta;
 194         }
 195         cur = cur->next;
 196     }
 197     if (cur == NULL)
 198         return(-1);
 199     cur = cur->children;
 200
 201     /*
 202      * Search the head
 203      */
 204     while (cur != NULL) {
 205         if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
 206             if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
 207                 break;
 208             if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
 209                 head = cur->parent;
 210                 goto found_meta;
 211             }
 212         }
 213         cur = cur->next;
 214     }
 215     if (cur == NULL)
 216         return(-1);
 217 found_head:
 218     head = cur;
 219     if (cur->children == NULL)
 220         goto create;
 221     cur = cur->children;
 222
 223 found_meta:
 224     /*
 225      * Search and update all the remaining the meta elements carrying
 226      * encoding informations
 227      */
 228     while (cur != NULL) {
 229         if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
 230             if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
 231                 xmlAttrPtr attr = cur->properties;
 232                 int http;
 233                 const xmlChar *value;
 234
 235                 content = NULL;
 236                 http = 0;
 237                 while (attr != NULL) {
 238                     if ((attr->children != NULL) &&
 239                         (attr->children->type == XML_TEXT_NODE) &&
 240                         (attr->children->next == NULL)) {
 241                         value = attr->children->content;
 242                         if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
 243                          && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
 244                             http = 1;
 245                         else
 246                         {
 247                            if ((value != NULL) &&
 248                                (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
 249                                content = value;
 250                         }
 251                         if ((http != 0) && (content != NULL))
 252                             break;
 253                     }
 254                     attr = attr->next;
 255                 }
 256                 if ((http != 0) && (content != NULL)) {
 257                     meta = cur;
 258                     break;
 259                 }
 260
 261             }
 262         }
 263         cur = cur->next;
 264     }
 265 create:
 266     if (meta == NULL) {
 267         if ((encoding != NULL) && (head != NULL)) {
 268             /*
 269              * Create a new Meta element with the right attributes
 270              */
 271
 272             meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
 273             if (head->children == NULL)
 274                 xmlAddChild(head, meta);
 275             else
 276                 xmlAddPrevSibling(head->children, meta);
 277             xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
 278             xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
 279         }
 280     } else {
 281         /* change the document only if there is a real encoding change */
 282         if (xmlStrcasestr(content, encoding) == NULL) {
 283             xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent);
 284         }
 285     }
 286
 287
 288     return(0);
 289 }
 290
 291 /**
 292  * booleanHTMLAttrs:
 293  *
 294  * These are the HTML attributes which will be output
 295  * in minimized form, i.e. <option selected="selected"> will be
 296  * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
 297  *
 298  */
 299 static const char* htmlBooleanAttrs[] = {
 300   "checked", "compact", "declare", "defer", "disabled", "ismap",
 301   "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
 302   "selected", NULL
 303 };
 304
 305
 306 /**
 307  * htmlIsBooleanAttr:
 308  * @name:  the name of the attribute to check
 309  *
 310  * Determine if a given attribute is a boolean attribute.
 311  *
 312  * returns: false if the attribute is not boolean, true otherwise.
 313  */
 314 int
 315 htmlIsBooleanAttr(const xmlChar *name)
 316 {
 317     int i = 0;
 318
 319     while (htmlBooleanAttrs[i] != NULL) {
 320         if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
 321             return 1;
 322         i++;
 323     }
 324     return 0;
 325 }
 326
 327 #ifdef LIBXML_OUTPUT_ENABLED
 328 /*
 329  * private routine exported from xmlIO.c
 330  */
 331 xmlOutputBufferPtr
 332 xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder);
 333 /************************************************************************
 334  *                                                                      *
 335  *                      Output error handlers                           *
 336  *                                                                      *
 337  ************************************************************************/
 338 /**
 339  * htmlSaveErrMemory:
 340  * @extra:  extra informations
 341  *
 342  * Handle an out of memory condition
 343  */
 344 static void
 345 htmlSaveErrMemory(const char *extra)
 346 {
 347     __xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra);
 348 }
 349
 350 /**
 351  * htmlSaveErr:
 352  * @code:  the error number
 353  * @node:  the location of the error.
 354  * @extra:  extra informations
 355  *
 356  * Handle an out of memory condition
 357  */
 358 static void
 359 htmlSaveErr(int code, xmlNodePtr node, const char *extra)
 360 {
 361     const char *msg = NULL;
 362
 363     switch(code) {
 364         case XML_SAVE_NOT_UTF8:
 365             msg = "string is not in UTF-8\n";
 366             break;
 367         case XML_SAVE_CHAR_INVALID:
 368             msg = "invalid character value\n";
 369             break;
 370         case XML_SAVE_UNKNOWN_ENCODING:
 371             msg = "unknown encoding %s\n";
 372             break;
 373         case XML_SAVE_NO_DOCTYPE:
 374             msg = "HTML has no DOCTYPE\n";
 375             break;
 376         default:
 377             msg = "unexpected error number\n";
 378     }
 379     __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);
 380 }
 381
 382 /************************************************************************
 383  *                                                                      *
 384  *              Dumping HTML tree content to a simple buffer            *
 385  *                                                                      *
 386  ************************************************************************/
 387
 388 static int
 389 htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
 390                    int format);
 391
 392 /**
 393  * htmlNodeDumpFormat:
 394  * @buf:  the HTML buffer output
 395  * @doc:  the document
 396  * @cur:  the current node
 397  * @format:  should formatting spaces been added
 398  *
 399  * Dump an HTML node, recursive behaviour,children are printed too.
 400  *
 401  * Returns the number of byte written or -1 in case of error
 402  */
 403 static int
 404 htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
 405                    int format) {
 406     unsigned int use;
 407     int ret;
 408     xmlOutputBufferPtr outbuf;
 409
 410     if (cur == NULL) {
 411         return (-1);
 412     }
 413     if (buf == NULL) {
 414         return (-1);
 415     }
 416     outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
 417     if (outbuf == NULL) {
 418         htmlSaveErrMemory("allocating HTML output buffer");
 419         return (-1);
 420     }
 421     memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));
 422     outbuf->buffer = buf;
 423     outbuf->encoder = NULL;
 424     outbuf->writecallback = NULL;
 425     outbuf->closecallback = NULL;
 426     outbuf->context = NULL;
 427     outbuf->written = 0;
 428
 429     use = buf->use;
 430     htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
 431     xmlFree(outbuf);
 432     ret = buf->use - use;
 433     return (ret);
 434 }
 435
 436 /**
 437  * htmlNodeDump:
 438  * @buf:  the HTML buffer output
 439  * @doc:  the document
 440  * @cur:  the current node
 441  *
 442  * Dump an HTML node, recursive behaviour,children are printed too,
 443  * and formatting returns are added.
 444  *
 445  * Returns the number of byte written or -1 in case of error
 446  */
 447 int
 448 htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
 449     xmlInitParser();
 450
 451     return(htmlNodeDumpFormat(buf, doc, cur, 1));
 452 }
 453
 454 /**
 455  * htmlNodeDumpFileFormat:
 456  * @out:  the FILE pointer
 457  * @doc:  the document
 458  * @cur:  the current node
 459  * @encoding: the document encoding
 460  * @format:  should formatting spaces been added
 461  *
 462  * Dump an HTML node, recursive behaviour,children are printed too.
 463  *
 464  * TODO: if encoding == NULL try to save in the doc encoding
 465  *
 466  * returns: the number of byte written or -1 in case of failure.
 467  */
 468 int
 469 htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
 470                        xmlNodePtr cur, const char *encoding, int format) {
 471     xmlOutputBufferPtr buf;
 472     xmlCharEncodingHandlerPtr handler = NULL;
 473     int ret;
 474
 475     xmlInitParser();
 476
 477     if (encoding != NULL) {
 478         xmlCharEncoding enc;
 479
 480         enc = xmlParseCharEncoding(encoding);
 481         if (enc != XML_CHAR_ENCODING_UTF8) {
 482             handler = xmlFindCharEncodingHandler(encoding);
 483             if (handler == NULL)
 484                 return(-1);
 485         }
 486     }
 487
 488     /*
 489      * Fallback to HTML or ASCII when the encoding is unspecified
 490      */
 491     if (handler == NULL)
 492         handler = xmlFindCharEncodingHandler("HTML");
 493     if (handler == NULL)
 494         handler = xmlFindCharEncodingHandler("ascii");
 495
 496     /*
 497      * save the content to a temp buffer.
 498      */
 499     buf = xmlOutputBufferCreateFile(out, handler);
 500     if (buf == NULL) return(0);
 501
 502     htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
 503
 504     ret = xmlOutputBufferClose(buf);
 505     return(ret);
 506 }
 507
 508 /**
 509  * htmlNodeDumpFile:
 510  * @out:  the FILE pointer
 511  * @doc:  the document
 512  * @cur:  the current node
 513  *
 514  * Dump an HTML node, recursive behaviour,children are printed too,
 515  * and formatting returns are added.
 516  */
 517 void
 518 htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
 519     htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
 520 }
 521
 522 /**
 523  * htmlDocDumpMemoryFormat:
 524  * @cur:  the document
 525  * @mem:  OUT: the memory pointer
 526  * @size:  OUT: the memory length
 527  * @format:  should formatting spaces been added
 528  *
 529  * Dump an HTML document in memory and return the xmlChar * and it's size.
 530  * It's up to the caller to free the memory.
 531  */
 532 void
 533 htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
 534     xmlOutputBufferPtr buf;
 535     xmlCharEncodingHandlerPtr handler = NULL;
 536     const char *encoding;
 537
 538     xmlInitParser();
 539
 540     if ((mem == NULL) || (size == NULL))
 541         return;
 542     if (cur == NULL) {
 543         *mem = NULL;
 544         *size = 0;
 545         return;
 546     }
 547
 548     encoding = (const char *) htmlGetMetaEncoding(cur);
 549
 550     if (encoding != NULL) {
 551         xmlCharEncoding enc;
 552
 553         enc = xmlParseCharEncoding(encoding);
 554         if (enc != cur->charset) {
 555             if (cur->charset != XML_CHAR_ENCODING_UTF8) {
 556                 /*
 557                  * Not supported yet
 558                  */
 559                 *mem = NULL;
 560                 *size = 0;
 561                 return;
 562             }
 563
 564             handler = xmlFindCharEncodingHandler(encoding);
 565             if (handler == NULL) {
 566                 *mem = NULL;
 567                 *size = 0;
 568                 return;
 569             }
 570         } else {
 571             handler = xmlFindCharEncodingHandler(encoding);
 572         }
 573     }
 574
 575     /*
 576      * Fallback to HTML or ASCII when the encoding is unspecified
 577      */
 578     if (handler == NULL)
 579         handler = xmlFindCharEncodingHandler("HTML");
 580     if (handler == NULL)
 581         handler = xmlFindCharEncodingHandler("ascii");
 582
 583     buf = xmlAllocOutputBufferInternal(handler);
 584     if (buf == NULL) {
 585         *mem = NULL;
 586         *size = 0;
 587         return;
 588     }
 589
 590         htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
 591
 592     xmlOutputBufferFlush(buf);
 593     if (buf->conv != NULL) {
 594         *size = buf->conv->use;
 595         *mem = xmlStrndup(buf->conv->content, *size);
 596     } else {
 597         *size = buf->buffer->use;
 598         *mem = xmlStrndup(buf->buffer->content, *size);
 599     }
 600     (void)xmlOutputBufferClose(buf);
 601 }
 602
 603 /**
 604  * htmlDocDumpMemory:
 605  * @cur:  the document
 606  * @mem:  OUT: the memory pointer
 607  * @size:  OUT: the memory length
 608  *
 609  * Dump an HTML document in memory and return the xmlChar * and it's size.
 610  * It's up to the caller to free the memory.
 611  */
 612 void
 613 htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
 614         htmlDocDumpMemoryFormat(cur, mem, size, 1);
 615 }
 616
 617
 618 /************************************************************************
 619  *                                                                      *
 620  *              Dumping HTML tree content to an I/O output buffer       *
 621  *                                                                      *
 622  ************************************************************************/
 623
 624 void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);
 625
 626 /**
 627  * htmlDtdDumpOutput:
 628  * @buf:  the HTML buffer output
 629  * @doc:  the document
 630  * @encoding:  the encoding string
 631  *
 632  * TODO: check whether encoding is needed
 633  *
 634  * Dump the HTML document DTD, if any.
 635  */
 636 static void
 637 htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
 638                   const char *encoding ATTRIBUTE_UNUSED) {
 639     xmlDtdPtr cur = doc->intSubset;
 640
 641     if (cur == NULL) {
 642         htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);
 643         return;
 644     }
 645     xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
 646     xmlOutputBufferWriteString(buf, (const char *)cur->name);
 647     if (cur->ExternalID != NULL) {
 648         xmlOutputBufferWriteString(buf, " PUBLIC ");
 649         xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
 650         if (cur->SystemID != NULL) {
 651             xmlOutputBufferWriteString(buf, " ");
 652             xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
 653         }
 654     }  else if (cur->SystemID != NULL) {
 655         xmlOutputBufferWriteString(buf, " SYSTEM ");
 656         xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
 657     }
 658     xmlOutputBufferWriteString(buf, ">\n");
 659 }
 660
 661 /**
 662  * htmlAttrDumpOutput:
 663  * @buf:  the HTML buffer output
 664  * @doc:  the document
 665  * @cur:  the attribute pointer
 666  * @encoding:  the encoding string
 667  *
 668  * Dump an HTML attribute
 669  */
 670 static void
 671 htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
 672                    const char *encoding ATTRIBUTE_UNUSED) {
 673     xmlChar *value;
 674
 675     /*
 676      * TODO: The html output method should not escape a & character
 677      *       occurring in an attribute value immediately followed by
 678      *       a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
 679      */
 680
 681     if (cur == NULL) {
 682         return;
 683     }
 684     xmlOutputBufferWriteString(buf, " ");
 685     if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
 686         xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
 687         xmlOutputBufferWriteString(buf, ":");
 688     }
 689     xmlOutputBufferWriteString(buf, (const char *)cur->name);
 690     if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
 691         value = xmlNodeListGetString(doc, cur->children, 0);
 692         if (value) {
 693             xmlOutputBufferWriteString(buf, "=");
 694             if ((cur->ns == NULL) && (cur->parent != NULL) &&
 695                 (cur->parent->ns == NULL) &&
 696                 ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
 697                  (!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
 698                  (!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
 699                  ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
 700                   (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
 701                 xmlChar *escaped;
 702                 xmlChar *tmp = value;
 703
 704                 while (IS_BLANK_CH(*tmp)) tmp++;
 705
 706                 escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
 707                 if (escaped != NULL) {
 708                     xmlBufferWriteQuotedString(buf->buffer, escaped);
 709                     xmlFree(escaped);
 710                 } else {
 711                     xmlBufferWriteQuotedString(buf->buffer, value);
 712                 }
 713             } else {
 714                 xmlBufferWriteQuotedString(buf->buffer, value);
 715             }
 716             xmlFree(value);
 717         } else  {
 718             xmlOutputBufferWriteString(buf, "=\"\"");
 719         }
 720     }
 721 }
 722
 723 /**
 724  * htmlAttrListDumpOutput:
 725  * @buf:  the HTML buffer output
 726  * @doc:  the document
 727  * @cur:  the first attribute pointer
 728  * @encoding:  the encoding string
 729  *
 730  * Dump a list of HTML attributes
 731  */
 732 static void
 733 htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
 734     if (cur == NULL) {
 735         return;
 736     }
 737     while (cur != NULL) {
 738         htmlAttrDumpOutput(buf, doc, cur, encoding);
 739         cur = cur->next;
 740     }
 741 }
 742
 743
 744
 745 /**
 746  * htmlNodeListDumpOutput:
 747  * @buf:  the HTML buffer output
 748  * @doc:  the document
 749  * @cur:  the first node
 750  * @encoding:  the encoding string
 751  * @format:  should formatting spaces been added
 752  *
 753  * Dump an HTML node list, recursive behaviour,children are printed too.
 754  */
 755 static void
 756 htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
 757                        xmlNodePtr cur, const char *encoding, int format) {
 758     if (cur == NULL) {
 759         return;
 760     }
 761     while (cur != NULL) {
 762         htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
 763         cur = cur->next;
 764     }
 765 }
 766
 767 /**
 768  * htmlNodeDumpFormatOutput:
 769  * @buf:  the HTML buffer output
 770  * @doc:  the document
 771  * @cur:  the current node
 772  * @encoding:  the encoding string
 773  * @format:  should formatting spaces been added
 774  *
 775  * Dump an HTML node, recursive behaviour,children are printed too.
 776  */
 777 void
 778 htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
 779                          xmlNodePtr cur, const char *encoding, int format) {
 780     const htmlElemDesc * info;
 781
 782     xmlInitParser();
 783
 784     if ((cur == NULL) || (buf == NULL)) {
 785         return;
 786     }
 787     /*
 788      * Special cases.
 789      */
 790     if (cur->type == XML_DTD_NODE)
 791         return;
 792     if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
 793         (cur->type == XML_DOCUMENT_NODE)){
 794         htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
 795         return;
 796     }
 797     if (cur->type == XML_ATTRIBUTE_NODE) {
 798         htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur, encoding);
 799         return;
 800     }
 801     if (cur->type == HTML_TEXT_NODE) {
 802         if (cur->content != NULL) {
 803             if (((cur->name == (const xmlChar *)xmlStringText) ||
 804                  (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
 805                 ((cur->parent == NULL) ||
 806                  ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
 807                   (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
 808                 xmlChar *buffer;
 809
 810                 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
 811                 if (buffer != NULL) {
 812                     xmlOutputBufferWriteString(buf, (const char *)buffer);
 813                     xmlFree(buffer);
 814                 }
 815             } else {
 816                 xmlOutputBufferWriteString(buf, (const char *)cur->content);
 817             }
 818         }
 819         return;
 820     }
 821     if (cur->type == HTML_COMMENT_NODE) {
 822         if (cur->content != NULL) {
 823             xmlOutputBufferWriteString(buf, "<!--");
 824             xmlOutputBufferWriteString(buf, (const char *)cur->content);
 825             xmlOutputBufferWriteString(buf, "-->");
 826         }
 827         return;
 828     }
 829     if (cur->type == HTML_PI_NODE) {
 830         if (cur->name == NULL)
 831             return;
 832         xmlOutputBufferWriteString(buf, "<?");
 833         xmlOutputBufferWriteString(buf, (const char *)cur->name);
 834         if (cur->content != NULL) {
 835             xmlOutputBufferWriteString(buf, " ");
 836             xmlOutputBufferWriteString(buf, (const char *)cur->content);
 837         }
 838         xmlOutputBufferWriteString(buf, ">");
 839         return;
 840     }
 841     if (cur->type == HTML_ENTITY_REF_NODE) {
 842         xmlOutputBufferWriteString(buf, "&");
 843         xmlOutputBufferWriteString(buf, (const char *)cur->name);
 844         xmlOutputBufferWriteString(buf, ";");
 845         return;
 846     }
 847     if (cur->type == HTML_PRESERVE_NODE) {
 848         if (cur->content != NULL) {
 849             xmlOutputBufferWriteString(buf, (const char *)cur->content);
 850         }
 851         return;
 852     }
 853
 854     /*
 855      * Get specific HTML info for that node.
 856      */
 857     if (cur->ns == NULL)
 858         info = htmlTagLookup(cur->name);
 859     else
 860         info = NULL;
 861
 862     xmlOutputBufferWriteString(buf, "<");
 863     if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
 864         xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
 865         xmlOutputBufferWriteString(buf, ":");
 866     }
 867     xmlOutputBufferWriteString(buf, (const char *)cur->name);
 868     if (cur->nsDef)
 869         xmlNsListDumpOutput(buf, cur->nsDef);
 870     if (cur->properties != NULL)
 871         htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
 872
 873     if ((info != NULL) && (info->empty)) {
 874         xmlOutputBufferWriteString(buf, ">");
 875         if ((format) && (!info->isinline) && (cur->next != NULL)) {
 876             if ((cur->next->type != HTML_TEXT_NODE) &&
 877                 (cur->next->type != HTML_ENTITY_REF_NODE) &&
 878                 (cur->parent != NULL) &&
 879                 (cur->parent->name != NULL) &&
 880                 (cur->parent->name[0] != 'p')) /* p, pre, param */
 881                 xmlOutputBufferWriteString(buf, "\n");
 882         }
 883         return;
 884     }
 885     if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
 886         (cur->children == NULL)) {
 887         if ((info != NULL) && (info->saveEndTag != 0) &&
 888             (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
 889             (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
 890             xmlOutputBufferWriteString(buf, ">");
 891         } else {
 892             xmlOutputBufferWriteString(buf, "></");
 893             if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
 894                 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
 895                 xmlOutputBufferWriteString(buf, ":");
 896             }
 897             xmlOutputBufferWriteString(buf, (const char *)cur->name);
 898             xmlOutputBufferWriteString(buf, ">");
 899         }
 900         if ((format) && (cur->next != NULL) &&
 901             (info != NULL) && (!info->isinline)) {
 902             if ((cur->next->type != HTML_TEXT_NODE) &&
 903                 (cur->next->type != HTML_ENTITY_REF_NODE) &&
 904                 (cur->parent != NULL) &&
 905                 (cur->parent->name != NULL) &&
 906                 (cur->parent->name[0] != 'p')) /* p, pre, param */
 907                 xmlOutputBufferWriteString(buf, "\n");
 908         }
 909         return;
 910     }
 911     xmlOutputBufferWriteString(buf, ">");
 912     if ((cur->type != XML_ELEMENT_NODE) &&
 913         (cur->content != NULL)) {
 914             /*
 915              * Uses the OutputBuffer property to automatically convert
 916              * invalids to charrefs
 917              */
 918
 919             xmlOutputBufferWriteString(buf, (const char *) cur->content);
 920     }
 921     if (cur->children != NULL) {
 922         if ((format) && (info != NULL) && (!info->isinline) &&
 923             (cur->children->type != HTML_TEXT_NODE) &&
 924             (cur->children->type != HTML_ENTITY_REF_NODE) &&
 925             (cur->children != cur->last) &&
 926             (cur->name != NULL) &&
 927             (cur->name[0] != 'p')) /* p, pre, param */
 928             xmlOutputBufferWriteString(buf, "\n");
 929         htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
 930         if ((format) && (info != NULL) && (!info->isinline) &&
 931             (cur->last->type != HTML_TEXT_NODE) &&
 932             (cur->last->type != HTML_ENTITY_REF_NODE) &&
 933             (cur->children != cur->last) &&
 934             (cur->name != NULL) &&
 935             (cur->name[0] != 'p')) /* p, pre, param */
 936             xmlOutputBufferWriteString(buf, "\n");
 937     }
 938     xmlOutputBufferWriteString(buf, "</");
 939     if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
 940         xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
 941         xmlOutputBufferWriteString(buf, ":");
 942     }
 943     xmlOutputBufferWriteString(buf, (const char *)cur->name);
 944     xmlOutputBufferWriteString(buf, ">");
 945     if ((format) && (info != NULL) && (!info->isinline) &&
 946         (cur->next != NULL)) {
 947         if ((cur->next->type != HTML_TEXT_NODE) &&
 948             (cur->next->type != HTML_ENTITY_REF_NODE) &&
 949             (cur->parent != NULL) &&
 950             (cur->parent->name != NULL) &&
 951             (cur->parent->name[0] != 'p')) /* p, pre, param */
 952             xmlOutputBufferWriteString(buf, "\n");
 953     }
 954 }
 955
 956 /**
 957  * htmlNodeDumpOutput:
 958  * @buf:  the HTML buffer output
 959  * @doc:  the document
 960  * @cur:  the current node
 961  * @encoding:  the encoding string
 962  *
 963  * Dump an HTML node, recursive behaviour,children are printed too,
 964  * and formatting returns/spaces are added.
 965  */
 966 void
 967 htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
 968                    xmlNodePtr cur, const char *encoding) {
 969     htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
 970 }
 971
 972 /**
 973  * htmlDocContentDumpFormatOutput:
 974  * @buf:  the HTML buffer output
 975  * @cur:  the document
 976  * @encoding:  the encoding string
 977  * @format:  should formatting spaces been added
 978  *
 979  * Dump an HTML document.
 980  */
 981 void
 982 htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
 983                                const char *encoding, int format) {
 984     int type;
 985
 986     xmlInitParser();
 987
 988     if ((buf == NULL) || (cur == NULL))
 989         return;
 990
 991     /*
 992      * force to output the stuff as HTML, especially for entities
 993      */
 994     type = cur->type;
 995     cur->type = XML_HTML_DOCUMENT_NODE;
 996     if (cur->intSubset != NULL) {
 997         htmlDtdDumpOutput(buf, cur, NULL);
 998     }
 999     if (cur->children != NULL) {
1000         htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
1001     }
1002     xmlOutputBufferWriteString(buf, "\n");
1003     cur->type = (xmlElementType) type;
1004 }
1005
1006 /**
1007  * htmlDocContentDumpOutput:
1008  * @buf:  the HTML buffer output
1009  * @cur:  the document
1010  * @encoding:  the encoding string
1011  *
1012  * Dump an HTML document. Formating return/spaces are added.
1013  */
1014 void
1015 htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1016                          const char *encoding) {
1017     htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
1018 }
1019
1020 /************************************************************************
1021  *                                                                      *
1022  *              Saving functions front-ends                             *
1023  *                                                                      *
1024  ************************************************************************/
1025
1026 /**
1027  * htmlDocDump:
1028  * @f:  the FILE*
1029  * @cur:  the document
1030  *
1031  * Dump an HTML document to an open FILE.
1032  *
1033  * returns: the number of byte written or -1 in case of failure.
1034  */
1035 int
1036 htmlDocDump(FILE *f, xmlDocPtr cur) {
1037     xmlOutputBufferPtr buf;
1038     xmlCharEncodingHandlerPtr handler = NULL;
1039     const char *encoding;
1040     int ret;
1041
1042     xmlInitParser();
1043
1044     if ((cur == NULL) || (f == NULL)) {
1045         return(-1);
1046     }
1047
1048     encoding = (const char *) htmlGetMetaEncoding(cur);
1049
1050     if (encoding != NULL) {
1051         xmlCharEncoding enc;
1052
1053         enc = xmlParseCharEncoding(encoding);
1054         if (enc != cur->charset) {
1055             if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1056                 /*
1057                  * Not supported yet
1058                  */
1059                 return(-1);
1060             }
1061
1062             handler = xmlFindCharEncodingHandler(encoding);
1063             if (handler == NULL)
1064                 return(-1);
1065         } else {
1066             handler = xmlFindCharEncodingHandler(encoding);
1067         }
1068     }
1069
1070     /*
1071      * Fallback to HTML or ASCII when the encoding is unspecified
1072      */
1073     if (handler == NULL)
1074         handler = xmlFindCharEncodingHandler("HTML");
1075     if (handler == NULL)
1076         handler = xmlFindCharEncodingHandler("ascii");
1077
1078     buf = xmlOutputBufferCreateFile(f, handler);
1079     if (buf == NULL) return(-1);
1080     htmlDocContentDumpOutput(buf, cur, NULL);
1081
1082     ret = xmlOutputBufferClose(buf);
1083     return(ret);
1084 }
1085
1086 /**
1087  * htmlSaveFile:
1088  * @filename:  the filename (or URL)
1089  * @cur:  the document
1090  *
1091  * Dump an HTML document to a file. If @filename is "-" the stdout file is
1092  * used.
1093  * returns: the number of byte written or -1 in case of failure.
1094  */
1095 int
1096 htmlSaveFile(const char *filename, xmlDocPtr cur) {
1097     xmlOutputBufferPtr buf;
1098     xmlCharEncodingHandlerPtr handler = NULL;
1099     const char *encoding;
1100     int ret;
1101
1102     if ((cur == NULL) || (filename == NULL))
1103         return(-1);
1104
1105     xmlInitParser();
1106
1107     encoding = (const char *) htmlGetMetaEncoding(cur);
1108
1109     if (encoding != NULL) {
1110         xmlCharEncoding enc;
1111
1112         enc = xmlParseCharEncoding(encoding);
1113         if (enc != cur->charset) {
1114             if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1115                 /*
1116                  * Not supported yet
1117                  */
1118                 return(-1);
1119             }
1120
1121             handler = xmlFindCharEncodingHandler(encoding);
1122             if (handler == NULL)
1123                 return(-1);
1124         }
1125     }
1126
1127     /*
1128      * Fallback to HTML or ASCII when the encoding is unspecified
1129      */
1130     if (handler == NULL)
1131         handler = xmlFindCharEncodingHandler("HTML");
1132     if (handler == NULL)
1133         handler = xmlFindCharEncodingHandler("ascii");
1134
1135     /*
1136      * save the content to a temp buffer.
1137      */
1138     buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1139     if (buf == NULL) return(0);
1140
1141     htmlDocContentDumpOutput(buf, cur, NULL);
1142
1143     ret = xmlOutputBufferClose(buf);
1144     return(ret);
1145 }
1146
1147 /**
1148  * htmlSaveFileFormat:
1149  * @filename:  the filename
1150  * @cur:  the document
1151  * @format:  should formatting spaces been added
1152  * @encoding: the document encoding
1153  *
1154  * Dump an HTML document to a file using a given encoding.
1155  *
1156  * returns: the number of byte written or -1 in case of failure.
1157  */
1158 int
1159 htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1160                    const char *encoding, int format) {
1161     xmlOutputBufferPtr buf;
1162     xmlCharEncodingHandlerPtr handler = NULL;
1163     int ret;
1164
1165     if ((cur == NULL) || (filename == NULL))
1166         return(-1);
1167
1168     xmlInitParser();
1169
1170     if (encoding != NULL) {
1171         xmlCharEncoding enc;
1172
1173         enc = xmlParseCharEncoding(encoding);
1174         if (enc != cur->charset) {
1175             if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1176                 /*
1177                  * Not supported yet
1178                  */
1179                 return(-1);
1180             }
1181
1182             handler = xmlFindCharEncodingHandler(encoding);
1183             if (handler == NULL)
1184                 return(-1);
1185         }
1186         htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1187     } else {
1188         htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
1189     }
1190
1191     /*
1192      * Fallback to HTML or ASCII when the encoding is unspecified
1193      */
1194     if (handler == NULL)
1195         handler = xmlFindCharEncodingHandler("HTML");
1196     if (handler == NULL)
1197         handler = xmlFindCharEncodingHandler("ascii");
1198
1199     /*
1200      * save the content to a temp buffer.
1201      */
1202     buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1203     if (buf == NULL) return(0);
1204
1205     htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
1206
1207     ret = xmlOutputBufferClose(buf);
1208     return(ret);
1209 }
1210
1211 /**
1212  * htmlSaveFileEnc:
1213  * @filename:  the filename
1214  * @cur:  the document
1215  * @encoding: the document encoding
1216  *
1217  * Dump an HTML document to a file using a given encoding
1218  * and formatting returns/spaces are added.
1219  *
1220  * returns: the number of byte written or -1 in case of failure.
1221  */
1222 int
1223 htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1224     return(htmlSaveFileFormat(filename, cur, encoding, 1));
1225 }
1226
1227 #endif /* LIBXML_OUTPUT_ENABLED */
1228
1229 #define bottom_HTMLtree
1230 #include "elfgcchack.h"
1231 #endif /* LIBXML_HTML_ENABLED */