Update mojo sdk to rev 1dc8a9a5db73d3718d99917fadf31f5fb2ebad4f
[chromium-blink-merge.git] / third_party / libxml / src / HTMLtree.c
blobb5085836b900af69b90eab1d68bd71ed3ba55559
1 /*
2 * HTMLtree.c : implementation of access function for an HTML tree.
4 * See Copyright for the status of this software.
6 * daniel@veillard.com
7 */
10 #define IN_LIBXML
11 #include "libxml.h"
12 #ifdef LIBXML_HTML_ENABLED
14 #include <string.h> /* for memset() only ! */
16 #ifdef HAVE_CTYPE_H
17 #include <ctype.h>
18 #endif
19 #ifdef HAVE_STDLIB_H
20 #include <stdlib.h>
21 #endif
23 #include <libxml/xmlmemory.h>
24 #include <libxml/HTMLparser.h>
25 #include <libxml/HTMLtree.h>
26 #include <libxml/entities.h>
27 #include <libxml/valid.h>
28 #include <libxml/xmlerror.h>
29 #include <libxml/parserInternals.h>
30 #include <libxml/globals.h>
31 #include <libxml/uri.h>
33 /************************************************************************
34 * *
35 * Getting/Setting encoding meta tags *
36 * *
37 ************************************************************************/
39 /**
40 * htmlGetMetaEncoding:
41 * @doc: the document
43 * Encoding definition lookup in the Meta tags
45 * Returns the current encoding as flagged in the HTML source
47 const xmlChar *
48 htmlGetMetaEncoding(htmlDocPtr doc) {
49 htmlNodePtr cur;
50 const xmlChar *content;
51 const xmlChar *encoding;
53 if (doc == NULL)
54 return(NULL);
55 cur = doc->children;
58 * Search the html
60 while (cur != NULL) {
61 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
62 if (xmlStrEqual(cur->name, BAD_CAST"html"))
63 break;
64 if (xmlStrEqual(cur->name, BAD_CAST"head"))
65 goto found_head;
66 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
67 goto found_meta;
69 cur = cur->next;
71 if (cur == NULL)
72 return(NULL);
73 cur = cur->children;
76 * Search the head
78 while (cur != NULL) {
79 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
80 if (xmlStrEqual(cur->name, BAD_CAST"head"))
81 break;
82 if (xmlStrEqual(cur->name, BAD_CAST"meta"))
83 goto found_meta;
85 cur = cur->next;
87 if (cur == NULL)
88 return(NULL);
89 found_head:
90 cur = cur->children;
93 * Search the meta elements
95 found_meta:
96 while (cur != NULL) {
97 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
98 if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
99 xmlAttrPtr attr = cur->properties;
100 int http;
101 const xmlChar *value;
103 content = NULL;
104 http = 0;
105 while (attr != NULL) {
106 if ((attr->children != NULL) &&
107 (attr->children->type == XML_TEXT_NODE) &&
108 (attr->children->next == NULL)) {
109 value = attr->children->content;
110 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
111 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
112 http = 1;
113 else if ((value != NULL)
114 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
115 content = value;
116 if ((http != 0) && (content != NULL))
117 goto found_content;
119 attr = attr->next;
123 cur = cur->next;
125 return(NULL);
127 found_content:
128 encoding = xmlStrstr(content, BAD_CAST"charset=");
129 if (encoding == NULL)
130 encoding = xmlStrstr(content, BAD_CAST"Charset=");
131 if (encoding == NULL)
132 encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
133 if (encoding != NULL) {
134 encoding += 8;
135 } else {
136 encoding = xmlStrstr(content, BAD_CAST"charset =");
137 if (encoding == NULL)
138 encoding = xmlStrstr(content, BAD_CAST"Charset =");
139 if (encoding == NULL)
140 encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
141 if (encoding != NULL)
142 encoding += 9;
144 if (encoding != NULL) {
145 while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
147 return(encoding);
151 * htmlSetMetaEncoding:
152 * @doc: the document
153 * @encoding: the encoding string
155 * Sets the current encoding in the Meta tags
156 * NOTE: this will not change the document content encoding, just
157 * the META flag associated.
159 * Returns 0 in case of success and -1 in case of error
162 htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
163 htmlNodePtr cur, meta = NULL, head = NULL;
164 const xmlChar *content = NULL;
165 char newcontent[100];
168 if (doc == NULL)
169 return(-1);
171 /* html isn't a real encoding it's just libxml2 way to get entities */
172 if (!xmlStrcasecmp(encoding, BAD_CAST "html"))
173 return(-1);
175 if (encoding != NULL) {
176 snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
177 (char *)encoding);
178 newcontent[sizeof(newcontent) - 1] = 0;
181 cur = doc->children;
184 * Search the html
186 while (cur != NULL) {
187 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
188 if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
189 break;
190 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
191 goto found_head;
192 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
193 goto found_meta;
195 cur = cur->next;
197 if (cur == NULL)
198 return(-1);
199 cur = cur->children;
202 * Search the head
204 while (cur != NULL) {
205 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
206 if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
207 break;
208 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
209 head = cur->parent;
210 goto found_meta;
213 cur = cur->next;
215 if (cur == NULL)
216 return(-1);
217 found_head:
218 head = cur;
219 if (cur->children == NULL)
220 goto create;
221 cur = cur->children;
223 found_meta:
225 * Search and update all the remaining the meta elements carrying
226 * encoding informations
228 while (cur != NULL) {
229 if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
230 if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
231 xmlAttrPtr attr = cur->properties;
232 int http;
233 const xmlChar *value;
235 content = NULL;
236 http = 0;
237 while (attr != NULL) {
238 if ((attr->children != NULL) &&
239 (attr->children->type == XML_TEXT_NODE) &&
240 (attr->children->next == NULL)) {
241 value = attr->children->content;
242 if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
243 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
244 http = 1;
245 else
247 if ((value != NULL) &&
248 (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
249 content = value;
251 if ((http != 0) && (content != NULL))
252 break;
254 attr = attr->next;
256 if ((http != 0) && (content != NULL)) {
257 meta = cur;
258 break;
263 cur = cur->next;
265 create:
266 if (meta == NULL) {
267 if ((encoding != NULL) && (head != NULL)) {
269 * Create a new Meta element with the right attributes
272 meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
273 if (head->children == NULL)
274 xmlAddChild(head, meta);
275 else
276 xmlAddPrevSibling(head->children, meta);
277 xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
278 xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
280 } else {
281 /* change the document only if there is a real encoding change */
282 if (xmlStrcasestr(content, encoding) == NULL) {
283 xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent);
288 return(0);
292 * booleanHTMLAttrs:
294 * These are the HTML attributes which will be output
295 * in minimized form, i.e. <option selected="selected"> will be
296 * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
299 static const char* htmlBooleanAttrs[] = {
300 "checked", "compact", "declare", "defer", "disabled", "ismap",
301 "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
302 "selected", NULL
307 * htmlIsBooleanAttr:
308 * @name: the name of the attribute to check
310 * Determine if a given attribute is a boolean attribute.
312 * returns: false if the attribute is not boolean, true otherwise.
315 htmlIsBooleanAttr(const xmlChar *name)
317 int i = 0;
319 while (htmlBooleanAttrs[i] != NULL) {
320 if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
321 return 1;
322 i++;
324 return 0;
327 #ifdef LIBXML_OUTPUT_ENABLED
329 * private routine exported from xmlIO.c
331 xmlOutputBufferPtr
332 xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder);
333 /************************************************************************
335 * Output error handlers *
337 ************************************************************************/
339 * htmlSaveErrMemory:
340 * @extra: extra informations
342 * Handle an out of memory condition
344 static void
345 htmlSaveErrMemory(const char *extra)
347 __xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra);
351 * htmlSaveErr:
352 * @code: the error number
353 * @node: the location of the error.
354 * @extra: extra informations
356 * Handle an out of memory condition
358 static void
359 htmlSaveErr(int code, xmlNodePtr node, const char *extra)
361 const char *msg = NULL;
363 switch(code) {
364 case XML_SAVE_NOT_UTF8:
365 msg = "string is not in UTF-8\n";
366 break;
367 case XML_SAVE_CHAR_INVALID:
368 msg = "invalid character value\n";
369 break;
370 case XML_SAVE_UNKNOWN_ENCODING:
371 msg = "unknown encoding %s\n";
372 break;
373 case XML_SAVE_NO_DOCTYPE:
374 msg = "HTML has no DOCTYPE\n";
375 break;
376 default:
377 msg = "unexpected error number\n";
379 __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);
382 /************************************************************************
384 * Dumping HTML tree content to a simple buffer *
386 ************************************************************************/
388 static int
389 htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
390 int format);
393 * htmlNodeDumpFormat:
394 * @buf: the HTML buffer output
395 * @doc: the document
396 * @cur: the current node
397 * @format: should formatting spaces been added
399 * Dump an HTML node, recursive behaviour,children are printed too.
401 * Returns the number of byte written or -1 in case of error
403 static int
404 htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
405 int format) {
406 unsigned int use;
407 int ret;
408 xmlOutputBufferPtr outbuf;
410 if (cur == NULL) {
411 return (-1);
413 if (buf == NULL) {
414 return (-1);
416 outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
417 if (outbuf == NULL) {
418 htmlSaveErrMemory("allocating HTML output buffer");
419 return (-1);
421 memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));
422 outbuf->buffer = buf;
423 outbuf->encoder = NULL;
424 outbuf->writecallback = NULL;
425 outbuf->closecallback = NULL;
426 outbuf->context = NULL;
427 outbuf->written = 0;
429 use = buf->use;
430 htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
431 xmlFree(outbuf);
432 ret = buf->use - use;
433 return (ret);
437 * htmlNodeDump:
438 * @buf: the HTML buffer output
439 * @doc: the document
440 * @cur: the current node
442 * Dump an HTML node, recursive behaviour,children are printed too,
443 * and formatting returns are added.
445 * Returns the number of byte written or -1 in case of error
448 htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
449 xmlInitParser();
451 return(htmlNodeDumpFormat(buf, doc, cur, 1));
455 * htmlNodeDumpFileFormat:
456 * @out: the FILE pointer
457 * @doc: the document
458 * @cur: the current node
459 * @encoding: the document encoding
460 * @format: should formatting spaces been added
462 * Dump an HTML node, recursive behaviour,children are printed too.
464 * TODO: if encoding == NULL try to save in the doc encoding
466 * returns: the number of byte written or -1 in case of failure.
469 htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
470 xmlNodePtr cur, const char *encoding, int format) {
471 xmlOutputBufferPtr buf;
472 xmlCharEncodingHandlerPtr handler = NULL;
473 int ret;
475 xmlInitParser();
477 if (encoding != NULL) {
478 xmlCharEncoding enc;
480 enc = xmlParseCharEncoding(encoding);
481 if (enc != XML_CHAR_ENCODING_UTF8) {
482 handler = xmlFindCharEncodingHandler(encoding);
483 if (handler == NULL)
484 return(-1);
489 * Fallback to HTML or ASCII when the encoding is unspecified
491 if (handler == NULL)
492 handler = xmlFindCharEncodingHandler("HTML");
493 if (handler == NULL)
494 handler = xmlFindCharEncodingHandler("ascii");
497 * save the content to a temp buffer.
499 buf = xmlOutputBufferCreateFile(out, handler);
500 if (buf == NULL) return(0);
502 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
504 ret = xmlOutputBufferClose(buf);
505 return(ret);
509 * htmlNodeDumpFile:
510 * @out: the FILE pointer
511 * @doc: the document
512 * @cur: the current node
514 * Dump an HTML node, recursive behaviour,children are printed too,
515 * and formatting returns are added.
517 void
518 htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
519 htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
523 * htmlDocDumpMemoryFormat:
524 * @cur: the document
525 * @mem: OUT: the memory pointer
526 * @size: OUT: the memory length
527 * @format: should formatting spaces been added
529 * Dump an HTML document in memory and return the xmlChar * and it's size.
530 * It's up to the caller to free the memory.
532 void
533 htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
534 xmlOutputBufferPtr buf;
535 xmlCharEncodingHandlerPtr handler = NULL;
536 const char *encoding;
538 xmlInitParser();
540 if ((mem == NULL) || (size == NULL))
541 return;
542 if (cur == NULL) {
543 *mem = NULL;
544 *size = 0;
545 return;
548 encoding = (const char *) htmlGetMetaEncoding(cur);
550 if (encoding != NULL) {
551 xmlCharEncoding enc;
553 enc = xmlParseCharEncoding(encoding);
554 if (enc != cur->charset) {
555 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
557 * Not supported yet
559 *mem = NULL;
560 *size = 0;
561 return;
564 handler = xmlFindCharEncodingHandler(encoding);
565 if (handler == NULL) {
566 *mem = NULL;
567 *size = 0;
568 return;
570 } else {
571 handler = xmlFindCharEncodingHandler(encoding);
576 * Fallback to HTML or ASCII when the encoding is unspecified
578 if (handler == NULL)
579 handler = xmlFindCharEncodingHandler("HTML");
580 if (handler == NULL)
581 handler = xmlFindCharEncodingHandler("ascii");
583 buf = xmlAllocOutputBufferInternal(handler);
584 if (buf == NULL) {
585 *mem = NULL;
586 *size = 0;
587 return;
590 htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
592 xmlOutputBufferFlush(buf);
593 if (buf->conv != NULL) {
594 *size = buf->conv->use;
595 *mem = xmlStrndup(buf->conv->content, *size);
596 } else {
597 *size = buf->buffer->use;
598 *mem = xmlStrndup(buf->buffer->content, *size);
600 (void)xmlOutputBufferClose(buf);
604 * htmlDocDumpMemory:
605 * @cur: the document
606 * @mem: OUT: the memory pointer
607 * @size: OUT: the memory length
609 * Dump an HTML document in memory and return the xmlChar * and it's size.
610 * It's up to the caller to free the memory.
612 void
613 htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
614 htmlDocDumpMemoryFormat(cur, mem, size, 1);
618 /************************************************************************
620 * Dumping HTML tree content to an I/O output buffer *
622 ************************************************************************/
624 void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);
627 * htmlDtdDumpOutput:
628 * @buf: the HTML buffer output
629 * @doc: the document
630 * @encoding: the encoding string
632 * TODO: check whether encoding is needed
634 * Dump the HTML document DTD, if any.
636 static void
637 htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
638 const char *encoding ATTRIBUTE_UNUSED) {
639 xmlDtdPtr cur = doc->intSubset;
641 if (cur == NULL) {
642 htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);
643 return;
645 xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
646 xmlOutputBufferWriteString(buf, (const char *)cur->name);
647 if (cur->ExternalID != NULL) {
648 xmlOutputBufferWriteString(buf, " PUBLIC ");
649 xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
650 if (cur->SystemID != NULL) {
651 xmlOutputBufferWriteString(buf, " ");
652 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
654 } else if (cur->SystemID != NULL) {
655 xmlOutputBufferWriteString(buf, " SYSTEM ");
656 xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
658 xmlOutputBufferWriteString(buf, ">\n");
662 * htmlAttrDumpOutput:
663 * @buf: the HTML buffer output
664 * @doc: the document
665 * @cur: the attribute pointer
666 * @encoding: the encoding string
668 * Dump an HTML attribute
670 static void
671 htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
672 const char *encoding ATTRIBUTE_UNUSED) {
673 xmlChar *value;
676 * TODO: The html output method should not escape a & character
677 * occurring in an attribute value immediately followed by
678 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
681 if (cur == NULL) {
682 return;
684 xmlOutputBufferWriteString(buf, " ");
685 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
686 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
687 xmlOutputBufferWriteString(buf, ":");
689 xmlOutputBufferWriteString(buf, (const char *)cur->name);
690 if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
691 value = xmlNodeListGetString(doc, cur->children, 0);
692 if (value) {
693 xmlOutputBufferWriteString(buf, "=");
694 if ((cur->ns == NULL) && (cur->parent != NULL) &&
695 (cur->parent->ns == NULL) &&
696 ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
697 (!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
698 (!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
699 ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
700 (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
701 xmlChar *escaped;
702 xmlChar *tmp = value;
704 while (IS_BLANK_CH(*tmp)) tmp++;
706 escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
707 if (escaped != NULL) {
708 xmlBufferWriteQuotedString(buf->buffer, escaped);
709 xmlFree(escaped);
710 } else {
711 xmlBufferWriteQuotedString(buf->buffer, value);
713 } else {
714 xmlBufferWriteQuotedString(buf->buffer, value);
716 xmlFree(value);
717 } else {
718 xmlOutputBufferWriteString(buf, "=\"\"");
724 * htmlAttrListDumpOutput:
725 * @buf: the HTML buffer output
726 * @doc: the document
727 * @cur: the first attribute pointer
728 * @encoding: the encoding string
730 * Dump a list of HTML attributes
732 static void
733 htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
734 if (cur == NULL) {
735 return;
737 while (cur != NULL) {
738 htmlAttrDumpOutput(buf, doc, cur, encoding);
739 cur = cur->next;
746 * htmlNodeListDumpOutput:
747 * @buf: the HTML buffer output
748 * @doc: the document
749 * @cur: the first node
750 * @encoding: the encoding string
751 * @format: should formatting spaces been added
753 * Dump an HTML node list, recursive behaviour,children are printed too.
755 static void
756 htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
757 xmlNodePtr cur, const char *encoding, int format) {
758 if (cur == NULL) {
759 return;
761 while (cur != NULL) {
762 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
763 cur = cur->next;
768 * htmlNodeDumpFormatOutput:
769 * @buf: the HTML buffer output
770 * @doc: the document
771 * @cur: the current node
772 * @encoding: the encoding string
773 * @format: should formatting spaces been added
775 * Dump an HTML node, recursive behaviour,children are printed too.
777 void
778 htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
779 xmlNodePtr cur, const char *encoding, int format) {
780 const htmlElemDesc * info;
782 xmlInitParser();
784 if ((cur == NULL) || (buf == NULL)) {
785 return;
788 * Special cases.
790 if (cur->type == XML_DTD_NODE)
791 return;
792 if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
793 (cur->type == XML_DOCUMENT_NODE)){
794 htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
795 return;
797 if (cur->type == XML_ATTRIBUTE_NODE) {
798 htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur, encoding);
799 return;
801 if (cur->type == HTML_TEXT_NODE) {
802 if (cur->content != NULL) {
803 if (((cur->name == (const xmlChar *)xmlStringText) ||
804 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
805 ((cur->parent == NULL) ||
806 ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
807 (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
808 xmlChar *buffer;
810 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
811 if (buffer != NULL) {
812 xmlOutputBufferWriteString(buf, (const char *)buffer);
813 xmlFree(buffer);
815 } else {
816 xmlOutputBufferWriteString(buf, (const char *)cur->content);
819 return;
821 if (cur->type == HTML_COMMENT_NODE) {
822 if (cur->content != NULL) {
823 xmlOutputBufferWriteString(buf, "<!--");
824 xmlOutputBufferWriteString(buf, (const char *)cur->content);
825 xmlOutputBufferWriteString(buf, "-->");
827 return;
829 if (cur->type == HTML_PI_NODE) {
830 if (cur->name == NULL)
831 return;
832 xmlOutputBufferWriteString(buf, "<?");
833 xmlOutputBufferWriteString(buf, (const char *)cur->name);
834 if (cur->content != NULL) {
835 xmlOutputBufferWriteString(buf, " ");
836 xmlOutputBufferWriteString(buf, (const char *)cur->content);
838 xmlOutputBufferWriteString(buf, ">");
839 return;
841 if (cur->type == HTML_ENTITY_REF_NODE) {
842 xmlOutputBufferWriteString(buf, "&");
843 xmlOutputBufferWriteString(buf, (const char *)cur->name);
844 xmlOutputBufferWriteString(buf, ";");
845 return;
847 if (cur->type == HTML_PRESERVE_NODE) {
848 if (cur->content != NULL) {
849 xmlOutputBufferWriteString(buf, (const char *)cur->content);
851 return;
855 * Get specific HTML info for that node.
857 if (cur->ns == NULL)
858 info = htmlTagLookup(cur->name);
859 else
860 info = NULL;
862 xmlOutputBufferWriteString(buf, "<");
863 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
864 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
865 xmlOutputBufferWriteString(buf, ":");
867 xmlOutputBufferWriteString(buf, (const char *)cur->name);
868 if (cur->nsDef)
869 xmlNsListDumpOutput(buf, cur->nsDef);
870 if (cur->properties != NULL)
871 htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
873 if ((info != NULL) && (info->empty)) {
874 xmlOutputBufferWriteString(buf, ">");
875 if ((format) && (!info->isinline) && (cur->next != NULL)) {
876 if ((cur->next->type != HTML_TEXT_NODE) &&
877 (cur->next->type != HTML_ENTITY_REF_NODE) &&
878 (cur->parent != NULL) &&
879 (cur->parent->name != NULL) &&
880 (cur->parent->name[0] != 'p')) /* p, pre, param */
881 xmlOutputBufferWriteString(buf, "\n");
883 return;
885 if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
886 (cur->children == NULL)) {
887 if ((info != NULL) && (info->saveEndTag != 0) &&
888 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
889 (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
890 xmlOutputBufferWriteString(buf, ">");
891 } else {
892 xmlOutputBufferWriteString(buf, "></");
893 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
894 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
895 xmlOutputBufferWriteString(buf, ":");
897 xmlOutputBufferWriteString(buf, (const char *)cur->name);
898 xmlOutputBufferWriteString(buf, ">");
900 if ((format) && (cur->next != NULL) &&
901 (info != NULL) && (!info->isinline)) {
902 if ((cur->next->type != HTML_TEXT_NODE) &&
903 (cur->next->type != HTML_ENTITY_REF_NODE) &&
904 (cur->parent != NULL) &&
905 (cur->parent->name != NULL) &&
906 (cur->parent->name[0] != 'p')) /* p, pre, param */
907 xmlOutputBufferWriteString(buf, "\n");
909 return;
911 xmlOutputBufferWriteString(buf, ">");
912 if ((cur->type != XML_ELEMENT_NODE) &&
913 (cur->content != NULL)) {
915 * Uses the OutputBuffer property to automatically convert
916 * invalids to charrefs
919 xmlOutputBufferWriteString(buf, (const char *) cur->content);
921 if (cur->children != NULL) {
922 if ((format) && (info != NULL) && (!info->isinline) &&
923 (cur->children->type != HTML_TEXT_NODE) &&
924 (cur->children->type != HTML_ENTITY_REF_NODE) &&
925 (cur->children != cur->last) &&
926 (cur->name != NULL) &&
927 (cur->name[0] != 'p')) /* p, pre, param */
928 xmlOutputBufferWriteString(buf, "\n");
929 htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
930 if ((format) && (info != NULL) && (!info->isinline) &&
931 (cur->last->type != HTML_TEXT_NODE) &&
932 (cur->last->type != HTML_ENTITY_REF_NODE) &&
933 (cur->children != cur->last) &&
934 (cur->name != NULL) &&
935 (cur->name[0] != 'p')) /* p, pre, param */
936 xmlOutputBufferWriteString(buf, "\n");
938 xmlOutputBufferWriteString(buf, "</");
939 if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
940 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
941 xmlOutputBufferWriteString(buf, ":");
943 xmlOutputBufferWriteString(buf, (const char *)cur->name);
944 xmlOutputBufferWriteString(buf, ">");
945 if ((format) && (info != NULL) && (!info->isinline) &&
946 (cur->next != NULL)) {
947 if ((cur->next->type != HTML_TEXT_NODE) &&
948 (cur->next->type != HTML_ENTITY_REF_NODE) &&
949 (cur->parent != NULL) &&
950 (cur->parent->name != NULL) &&
951 (cur->parent->name[0] != 'p')) /* p, pre, param */
952 xmlOutputBufferWriteString(buf, "\n");
957 * htmlNodeDumpOutput:
958 * @buf: the HTML buffer output
959 * @doc: the document
960 * @cur: the current node
961 * @encoding: the encoding string
963 * Dump an HTML node, recursive behaviour,children are printed too,
964 * and formatting returns/spaces are added.
966 void
967 htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
968 xmlNodePtr cur, const char *encoding) {
969 htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
973 * htmlDocContentDumpFormatOutput:
974 * @buf: the HTML buffer output
975 * @cur: the document
976 * @encoding: the encoding string
977 * @format: should formatting spaces been added
979 * Dump an HTML document.
981 void
982 htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
983 const char *encoding, int format) {
984 int type;
986 xmlInitParser();
988 if ((buf == NULL) || (cur == NULL))
989 return;
992 * force to output the stuff as HTML, especially for entities
994 type = cur->type;
995 cur->type = XML_HTML_DOCUMENT_NODE;
996 if (cur->intSubset != NULL) {
997 htmlDtdDumpOutput(buf, cur, NULL);
999 if (cur->children != NULL) {
1000 htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
1002 xmlOutputBufferWriteString(buf, "\n");
1003 cur->type = (xmlElementType) type;
1007 * htmlDocContentDumpOutput:
1008 * @buf: the HTML buffer output
1009 * @cur: the document
1010 * @encoding: the encoding string
1012 * Dump an HTML document. Formating return/spaces are added.
1014 void
1015 htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1016 const char *encoding) {
1017 htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
1020 /************************************************************************
1022 * Saving functions front-ends *
1024 ************************************************************************/
1027 * htmlDocDump:
1028 * @f: the FILE*
1029 * @cur: the document
1031 * Dump an HTML document to an open FILE.
1033 * returns: the number of byte written or -1 in case of failure.
1036 htmlDocDump(FILE *f, xmlDocPtr cur) {
1037 xmlOutputBufferPtr buf;
1038 xmlCharEncodingHandlerPtr handler = NULL;
1039 const char *encoding;
1040 int ret;
1042 xmlInitParser();
1044 if ((cur == NULL) || (f == NULL)) {
1045 return(-1);
1048 encoding = (const char *) htmlGetMetaEncoding(cur);
1050 if (encoding != NULL) {
1051 xmlCharEncoding enc;
1053 enc = xmlParseCharEncoding(encoding);
1054 if (enc != cur->charset) {
1055 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1057 * Not supported yet
1059 return(-1);
1062 handler = xmlFindCharEncodingHandler(encoding);
1063 if (handler == NULL)
1064 return(-1);
1065 } else {
1066 handler = xmlFindCharEncodingHandler(encoding);
1071 * Fallback to HTML or ASCII when the encoding is unspecified
1073 if (handler == NULL)
1074 handler = xmlFindCharEncodingHandler("HTML");
1075 if (handler == NULL)
1076 handler = xmlFindCharEncodingHandler("ascii");
1078 buf = xmlOutputBufferCreateFile(f, handler);
1079 if (buf == NULL) return(-1);
1080 htmlDocContentDumpOutput(buf, cur, NULL);
1082 ret = xmlOutputBufferClose(buf);
1083 return(ret);
1087 * htmlSaveFile:
1088 * @filename: the filename (or URL)
1089 * @cur: the document
1091 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1092 * used.
1093 * returns: the number of byte written or -1 in case of failure.
1096 htmlSaveFile(const char *filename, xmlDocPtr cur) {
1097 xmlOutputBufferPtr buf;
1098 xmlCharEncodingHandlerPtr handler = NULL;
1099 const char *encoding;
1100 int ret;
1102 if ((cur == NULL) || (filename == NULL))
1103 return(-1);
1105 xmlInitParser();
1107 encoding = (const char *) htmlGetMetaEncoding(cur);
1109 if (encoding != NULL) {
1110 xmlCharEncoding enc;
1112 enc = xmlParseCharEncoding(encoding);
1113 if (enc != cur->charset) {
1114 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1116 * Not supported yet
1118 return(-1);
1121 handler = xmlFindCharEncodingHandler(encoding);
1122 if (handler == NULL)
1123 return(-1);
1128 * Fallback to HTML or ASCII when the encoding is unspecified
1130 if (handler == NULL)
1131 handler = xmlFindCharEncodingHandler("HTML");
1132 if (handler == NULL)
1133 handler = xmlFindCharEncodingHandler("ascii");
1136 * save the content to a temp buffer.
1138 buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1139 if (buf == NULL) return(0);
1141 htmlDocContentDumpOutput(buf, cur, NULL);
1143 ret = xmlOutputBufferClose(buf);
1144 return(ret);
1148 * htmlSaveFileFormat:
1149 * @filename: the filename
1150 * @cur: the document
1151 * @format: should formatting spaces been added
1152 * @encoding: the document encoding
1154 * Dump an HTML document to a file using a given encoding.
1156 * returns: the number of byte written or -1 in case of failure.
1159 htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1160 const char *encoding, int format) {
1161 xmlOutputBufferPtr buf;
1162 xmlCharEncodingHandlerPtr handler = NULL;
1163 int ret;
1165 if ((cur == NULL) || (filename == NULL))
1166 return(-1);
1168 xmlInitParser();
1170 if (encoding != NULL) {
1171 xmlCharEncoding enc;
1173 enc = xmlParseCharEncoding(encoding);
1174 if (enc != cur->charset) {
1175 if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1177 * Not supported yet
1179 return(-1);
1182 handler = xmlFindCharEncodingHandler(encoding);
1183 if (handler == NULL)
1184 return(-1);
1186 htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1187 } else {
1188 htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
1192 * Fallback to HTML or ASCII when the encoding is unspecified
1194 if (handler == NULL)
1195 handler = xmlFindCharEncodingHandler("HTML");
1196 if (handler == NULL)
1197 handler = xmlFindCharEncodingHandler("ascii");
1200 * save the content to a temp buffer.
1202 buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1203 if (buf == NULL) return(0);
1205 htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
1207 ret = xmlOutputBufferClose(buf);
1208 return(ret);
1212 * htmlSaveFileEnc:
1213 * @filename: the filename
1214 * @cur: the document
1215 * @encoding: the document encoding
1217 * Dump an HTML document to a file using a given encoding
1218 * and formatting returns/spaces are added.
1220 * returns: the number of byte written or -1 in case of failure.
1223 htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1224 return(htmlSaveFileFormat(filename, cur, encoding, 1));
1227 #endif /* LIBXML_OUTPUT_ENABLED */
1229 #define bottom_HTMLtree
1230 #include "elfgcchack.h"
1231 #endif /* LIBXML_HTML_ENABLED */