2 * HTMLtree.c : implementation of access function for an HTML tree.
4 * See Copyright for the status of this software.
12 #ifdef LIBXML_HTML_ENABLED
14 #include <string.h> /* for memset() only ! */
18 #include <libxml/xmlmemory.h>
19 #include <libxml/HTMLparser.h>
20 #include <libxml/HTMLtree.h>
21 #include <libxml/entities.h>
22 #include <libxml/valid.h>
23 #include <libxml/xmlerror.h>
24 #include <libxml/parserInternals.h>
25 #include <libxml/globals.h>
26 #include <libxml/uri.h>
28 #include "private/buf.h"
29 #include "private/error.h"
30 #include "private/io.h"
31 #include "private/save.h"
33 /************************************************************************
35 * Getting/Setting encoding meta tags *
37 ************************************************************************/
40 * htmlGetMetaEncoding:
43 * Encoding definition lookup in the Meta tags
45 * Returns the current encoding as flagged in the HTML source
48 htmlGetMetaEncoding(htmlDocPtr doc
) {
50 const xmlChar
*content
;
51 const xmlChar
*encoding
;
61 if ((cur
->type
== XML_ELEMENT_NODE
) && (cur
->name
!= NULL
)) {
62 if (xmlStrEqual(cur
->name
, BAD_CAST
"html"))
64 if (xmlStrEqual(cur
->name
, BAD_CAST
"head"))
66 if (xmlStrEqual(cur
->name
, BAD_CAST
"meta"))
79 if ((cur
->type
== XML_ELEMENT_NODE
) && (cur
->name
!= NULL
)) {
80 if (xmlStrEqual(cur
->name
, BAD_CAST
"head"))
82 if (xmlStrEqual(cur
->name
, BAD_CAST
"meta"))
93 * Search the meta elements
97 if ((cur
->type
== XML_ELEMENT_NODE
) && (cur
->name
!= NULL
)) {
98 if (xmlStrEqual(cur
->name
, BAD_CAST
"meta")) {
99 xmlAttrPtr attr
= cur
->properties
;
101 const xmlChar
*value
;
105 while (attr
!= NULL
) {
106 if ((attr
->children
!= NULL
) &&
107 (attr
->children
->type
== XML_TEXT_NODE
) &&
108 (attr
->children
->next
== NULL
)) {
109 value
= attr
->children
->content
;
110 if ((!xmlStrcasecmp(attr
->name
, BAD_CAST
"http-equiv"))
111 && (!xmlStrcasecmp(value
, BAD_CAST
"Content-Type")))
113 else if ((value
!= NULL
)
114 && (!xmlStrcasecmp(attr
->name
, BAD_CAST
"content")))
116 if ((http
!= 0) && (content
!= NULL
))
128 encoding
= xmlStrstr(content
, BAD_CAST
"charset=");
129 if (encoding
== NULL
)
130 encoding
= xmlStrstr(content
, BAD_CAST
"Charset=");
131 if (encoding
== NULL
)
132 encoding
= xmlStrstr(content
, BAD_CAST
"CHARSET=");
133 if (encoding
!= NULL
) {
136 encoding
= xmlStrstr(content
, BAD_CAST
"charset =");
137 if (encoding
== NULL
)
138 encoding
= xmlStrstr(content
, BAD_CAST
"Charset =");
139 if (encoding
== NULL
)
140 encoding
= xmlStrstr(content
, BAD_CAST
"CHARSET =");
141 if (encoding
!= NULL
)
144 if (encoding
!= NULL
) {
145 while ((*encoding
== ' ') || (*encoding
== '\t')) encoding
++;
151 * htmlSetMetaEncoding:
153 * @encoding: the encoding string
155 * Sets the current encoding in the Meta tags
156 * NOTE: this will not change the document content encoding, just
157 * the META flag associated.
159 * Returns 0 in case of success and -1 in case of error
162 htmlSetMetaEncoding(htmlDocPtr doc
, const xmlChar
*encoding
) {
163 htmlNodePtr cur
, meta
= NULL
, head
= NULL
;
164 const xmlChar
*content
= NULL
;
165 char newcontent
[100];
172 /* html isn't a real encoding it's just libxml2 way to get entities */
173 if (!xmlStrcasecmp(encoding
, BAD_CAST
"html"))
176 if (encoding
!= NULL
) {
177 snprintf(newcontent
, sizeof(newcontent
), "text/html; charset=%s",
179 newcontent
[sizeof(newcontent
) - 1] = 0;
187 while (cur
!= NULL
) {
188 if ((cur
->type
== XML_ELEMENT_NODE
) && (cur
->name
!= NULL
)) {
189 if (xmlStrcasecmp(cur
->name
, BAD_CAST
"html") == 0)
191 if (xmlStrcasecmp(cur
->name
, BAD_CAST
"head") == 0)
193 if (xmlStrcasecmp(cur
->name
, BAD_CAST
"meta") == 0)
205 while (cur
!= NULL
) {
206 if ((cur
->type
== XML_ELEMENT_NODE
) && (cur
->name
!= NULL
)) {
207 if (xmlStrcasecmp(cur
->name
, BAD_CAST
"head") == 0)
209 if (xmlStrcasecmp(cur
->name
, BAD_CAST
"meta") == 0) {
220 if (cur
->children
== NULL
)
226 * Search and update all the remaining the meta elements carrying
227 * encoding information
229 while (cur
!= NULL
) {
230 if ((cur
->type
== XML_ELEMENT_NODE
) && (cur
->name
!= NULL
)) {
231 if (xmlStrcasecmp(cur
->name
, BAD_CAST
"meta") == 0) {
232 xmlAttrPtr attr
= cur
->properties
;
234 const xmlChar
*value
;
238 while (attr
!= NULL
) {
239 if ((attr
->children
!= NULL
) &&
240 (attr
->children
->type
== XML_TEXT_NODE
) &&
241 (attr
->children
->next
== NULL
)) {
242 value
= attr
->children
->content
;
243 if ((!xmlStrcasecmp(attr
->name
, BAD_CAST
"http-equiv"))
244 && (!xmlStrcasecmp(value
, BAD_CAST
"Content-Type")))
248 if ((value
!= NULL
) &&
249 (!xmlStrcasecmp(attr
->name
, BAD_CAST
"content")))
252 if ((http
!= 0) && (content
!= NULL
))
257 if ((http
!= 0) && (content
!= NULL
)) {
268 if ((encoding
!= NULL
) && (head
!= NULL
)) {
270 * Create a new Meta element with the right attributes
273 meta
= xmlNewDocNode(doc
, NULL
, BAD_CAST
"meta", NULL
);
274 if (head
->children
== NULL
)
275 xmlAddChild(head
, meta
);
277 xmlAddPrevSibling(head
->children
, meta
);
278 xmlNewProp(meta
, BAD_CAST
"http-equiv", BAD_CAST
"Content-Type");
279 xmlNewProp(meta
, BAD_CAST
"content", BAD_CAST newcontent
);
282 /* remove the meta tag if NULL is passed */
283 if (encoding
== NULL
) {
287 /* change the document only if there is a real encoding change */
288 else if (xmlStrcasestr(content
, encoding
) == NULL
) {
289 xmlSetProp(meta
, BAD_CAST
"content", BAD_CAST newcontent
);
300 * These are the HTML attributes which will be output
301 * in minimized form, i.e. <option selected="selected"> will be
302 * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
305 static const char* const htmlBooleanAttrs
[] = {
306 "checked", "compact", "declare", "defer", "disabled", "ismap",
307 "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
314 * @name: the name of the attribute to check
316 * Determine if a given attribute is a boolean attribute.
318 * returns: false if the attribute is not boolean, true otherwise.
321 htmlIsBooleanAttr(const xmlChar
*name
)
325 while (htmlBooleanAttrs
[i
] != NULL
) {
326 if (xmlStrcasecmp((const xmlChar
*)htmlBooleanAttrs
[i
], name
) == 0)
333 #ifdef LIBXML_OUTPUT_ENABLED
334 /************************************************************************
336 * Output error handlers *
338 ************************************************************************/
341 * @extra: extra information
343 * Handle an out of memory condition
346 htmlSaveErrMemory(const char *extra
)
348 __xmlSimpleError(XML_FROM_OUTPUT
, XML_ERR_NO_MEMORY
, NULL
, NULL
, extra
);
353 * @code: the error number
354 * @node: the location of the error.
355 * @extra: extra information
357 * Handle an out of memory condition
360 htmlSaveErr(int code
, xmlNodePtr node
, const char *extra
)
362 const char *msg
= NULL
;
365 case XML_SAVE_NOT_UTF8
:
366 msg
= "string is not in UTF-8\n";
368 case XML_SAVE_CHAR_INVALID
:
369 msg
= "invalid character value\n";
371 case XML_SAVE_UNKNOWN_ENCODING
:
372 msg
= "unknown encoding %s\n";
374 case XML_SAVE_NO_DOCTYPE
:
375 msg
= "HTML has no DOCTYPE\n";
378 msg
= "unexpected error number\n";
380 __xmlSimpleError(XML_FROM_OUTPUT
, code
, node
, msg
, extra
);
383 /************************************************************************
385 * Dumping HTML tree content to a simple buffer *
387 ************************************************************************/
390 * htmlBufNodeDumpFormat:
391 * @buf: the xmlBufPtr output
393 * @cur: the current node
394 * @format: should formatting spaces been added
396 * Dump an HTML node, recursive behaviour,children are printed too.
398 * Returns the number of byte written or -1 in case of error
401 htmlBufNodeDumpFormat(xmlBufPtr buf
, xmlDocPtr doc
, xmlNodePtr cur
,
405 xmlOutputBufferPtr outbuf
;
413 outbuf
= (xmlOutputBufferPtr
) xmlMalloc(sizeof(xmlOutputBuffer
));
414 if (outbuf
== NULL
) {
415 htmlSaveErrMemory("allocating HTML output buffer");
418 memset(outbuf
, 0, sizeof(xmlOutputBuffer
));
419 outbuf
->buffer
= buf
;
420 outbuf
->encoder
= NULL
;
421 outbuf
->writecallback
= NULL
;
422 outbuf
->closecallback
= NULL
;
423 outbuf
->context
= NULL
;
426 use
= xmlBufUse(buf
);
427 htmlNodeDumpFormatOutput(outbuf
, doc
, cur
, NULL
, format
);
429 ret
= xmlBufUse(buf
) - use
;
435 * @buf: the HTML buffer output
437 * @cur: the current node
439 * Dump an HTML node, recursive behaviour,children are printed too,
440 * and formatting returns are added.
442 * Returns the number of byte written or -1 in case of error
445 htmlNodeDump(xmlBufferPtr buf
, xmlDocPtr doc
, xmlNodePtr cur
) {
449 if ((buf
== NULL
) || (cur
== NULL
))
453 buffer
= xmlBufFromBuffer(buf
);
457 ret
= htmlBufNodeDumpFormat(buffer
, doc
, cur
, 1);
459 xmlBufBackToBuffer(buffer
);
467 * htmlNodeDumpFileFormat:
468 * @out: the FILE pointer
470 * @cur: the current node
471 * @encoding: the document encoding
472 * @format: should formatting spaces been added
474 * Dump an HTML node, recursive behaviour,children are printed too.
476 * TODO: if encoding == NULL try to save in the doc encoding
478 * returns: the number of byte written or -1 in case of failure.
481 htmlNodeDumpFileFormat(FILE *out
, xmlDocPtr doc
,
482 xmlNodePtr cur
, const char *encoding
, int format
) {
483 xmlOutputBufferPtr buf
;
484 xmlCharEncodingHandlerPtr handler
= NULL
;
489 if (encoding
!= NULL
) {
492 enc
= xmlParseCharEncoding(encoding
);
493 if (enc
!= XML_CHAR_ENCODING_UTF8
) {
494 handler
= xmlFindCharEncodingHandler(encoding
);
496 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING
, NULL
, encoding
);
500 * Fallback to HTML or ASCII when the encoding is unspecified
503 handler
= xmlFindCharEncodingHandler("HTML");
505 handler
= xmlFindCharEncodingHandler("ascii");
509 * save the content to a temp buffer.
511 buf
= xmlOutputBufferCreateFile(out
, handler
);
512 if (buf
== NULL
) return(0);
514 htmlNodeDumpFormatOutput(buf
, doc
, cur
, NULL
, format
);
516 ret
= xmlOutputBufferClose(buf
);
522 * @out: the FILE pointer
524 * @cur: the current node
526 * Dump an HTML node, recursive behaviour,children are printed too,
527 * and formatting returns are added.
530 htmlNodeDumpFile(FILE *out
, xmlDocPtr doc
, xmlNodePtr cur
) {
531 htmlNodeDumpFileFormat(out
, doc
, cur
, NULL
, 1);
535 * htmlDocDumpMemoryFormat:
537 * @mem: OUT: the memory pointer
538 * @size: OUT: the memory length
539 * @format: should formatting spaces been added
541 * Dump an HTML document in memory and return the xmlChar * and it's size.
542 * It's up to the caller to free the memory.
545 htmlDocDumpMemoryFormat(xmlDocPtr cur
, xmlChar
**mem
, int *size
, int format
) {
546 xmlOutputBufferPtr buf
;
547 xmlCharEncodingHandlerPtr handler
= NULL
;
548 const char *encoding
;
552 if ((mem
== NULL
) || (size
== NULL
))
560 encoding
= (const char *) htmlGetMetaEncoding(cur
);
562 if (encoding
!= NULL
) {
565 enc
= xmlParseCharEncoding(encoding
);
566 if (enc
!= XML_CHAR_ENCODING_UTF8
) {
567 handler
= xmlFindCharEncodingHandler(encoding
);
569 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING
, NULL
, encoding
);
574 * Fallback to HTML or ASCII when the encoding is unspecified
577 handler
= xmlFindCharEncodingHandler("HTML");
579 handler
= xmlFindCharEncodingHandler("ascii");
582 buf
= xmlAllocOutputBufferInternal(handler
);
589 htmlDocContentDumpFormatOutput(buf
, cur
, NULL
, format
);
591 xmlOutputBufferFlush(buf
);
592 if (buf
->conv
!= NULL
) {
593 *size
= xmlBufUse(buf
->conv
);
594 *mem
= xmlStrndup(xmlBufContent(buf
->conv
), *size
);
596 *size
= xmlBufUse(buf
->buffer
);
597 *mem
= xmlStrndup(xmlBufContent(buf
->buffer
), *size
);
599 (void)xmlOutputBufferClose(buf
);
605 * @mem: OUT: the memory pointer
606 * @size: OUT: the memory length
608 * Dump an HTML document in memory and return the xmlChar * and it's size.
609 * It's up to the caller to free the memory.
612 htmlDocDumpMemory(xmlDocPtr cur
, xmlChar
**mem
, int *size
) {
613 htmlDocDumpMemoryFormat(cur
, mem
, size
, 1);
617 /************************************************************************
619 * Dumping HTML tree content to an I/O output buffer *
621 ************************************************************************/
625 * @buf: the HTML buffer output
627 * @encoding: the encoding string
629 * TODO: check whether encoding is needed
631 * Dump the HTML document DTD, if any.
634 htmlDtdDumpOutput(xmlOutputBufferPtr buf
, xmlDocPtr doc
,
635 const char *encoding ATTRIBUTE_UNUSED
) {
636 xmlDtdPtr cur
= doc
->intSubset
;
639 htmlSaveErr(XML_SAVE_NO_DOCTYPE
, (xmlNodePtr
) doc
, NULL
);
642 xmlOutputBufferWriteString(buf
, "<!DOCTYPE ");
643 xmlOutputBufferWriteString(buf
, (const char *)cur
->name
);
644 if (cur
->ExternalID
!= NULL
) {
645 xmlOutputBufferWriteString(buf
, " PUBLIC ");
646 xmlBufWriteQuotedString(buf
->buffer
, cur
->ExternalID
);
647 if (cur
->SystemID
!= NULL
) {
648 xmlOutputBufferWriteString(buf
, " ");
649 xmlBufWriteQuotedString(buf
->buffer
, cur
->SystemID
);
651 } else if (cur
->SystemID
!= NULL
&&
652 xmlStrcmp(cur
->SystemID
, BAD_CAST
"about:legacy-compat")) {
653 xmlOutputBufferWriteString(buf
, " SYSTEM ");
654 xmlBufWriteQuotedString(buf
->buffer
, cur
->SystemID
);
656 xmlOutputBufferWriteString(buf
, ">\n");
660 * htmlAttrDumpOutput:
661 * @buf: the HTML buffer output
663 * @cur: the attribute pointer
665 * Dump an HTML attribute
668 htmlAttrDumpOutput(xmlOutputBufferPtr buf
, xmlDocPtr doc
, xmlAttrPtr cur
) {
672 * The html output method should not escape a & character
673 * occurring in an attribute value immediately followed by
674 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
675 * This is implemented in xmlEncodeEntitiesReentrant
681 xmlOutputBufferWriteString(buf
, " ");
682 if ((cur
->ns
!= NULL
) && (cur
->ns
->prefix
!= NULL
)) {
683 xmlOutputBufferWriteString(buf
, (const char *)cur
->ns
->prefix
);
684 xmlOutputBufferWriteString(buf
, ":");
686 xmlOutputBufferWriteString(buf
, (const char *)cur
->name
);
687 if ((cur
->children
!= NULL
) && (!htmlIsBooleanAttr(cur
->name
))) {
688 value
= xmlNodeListGetString(doc
, cur
->children
, 0);
690 xmlOutputBufferWriteString(buf
, "=");
691 if ((cur
->ns
== NULL
) && (cur
->parent
!= NULL
) &&
692 (cur
->parent
->ns
== NULL
) &&
693 ((!xmlStrcasecmp(cur
->name
, BAD_CAST
"href")) ||
694 (!xmlStrcasecmp(cur
->name
, BAD_CAST
"action")) ||
695 (!xmlStrcasecmp(cur
->name
, BAD_CAST
"src")) ||
696 ((!xmlStrcasecmp(cur
->name
, BAD_CAST
"name")) &&
697 (!xmlStrcasecmp(cur
->parent
->name
, BAD_CAST
"a"))))) {
699 xmlChar
*tmp
= value
;
701 while (IS_BLANK_CH(*tmp
)) tmp
++;
704 * Angle brackets are technically illegal in URIs, but they're
705 * used in server side includes, for example. Curly brackets
706 * are illegal as well and often used in templates.
707 * Don't escape non-whitespace, printable ASCII chars for
708 * improved interoperability. Only escape space, control
709 * and non-ASCII chars.
711 escaped
= xmlURIEscapeStr(tmp
,
712 BAD_CAST
"\"#$%&+,/:;<=>?@[\\]^`{|}");
713 if (escaped
!= NULL
) {
714 xmlBufWriteQuotedString(buf
->buffer
, escaped
);
717 xmlBufWriteQuotedString(buf
->buffer
, value
);
720 xmlBufWriteQuotedString(buf
->buffer
, value
);
724 xmlOutputBufferWriteString(buf
, "=\"\"");
730 * htmlNodeDumpFormatOutput:
731 * @buf: the HTML buffer output
733 * @cur: the current node
734 * @encoding: the encoding string (unused)
735 * @format: should formatting spaces been added
737 * Dump an HTML node, recursive behaviour,children are printed too.
740 htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf
, xmlDocPtr doc
,
741 xmlNodePtr cur
, const char *encoding ATTRIBUTE_UNUSED
,
743 xmlNodePtr root
, parent
;
745 const htmlElemDesc
* info
;
749 if ((cur
== NULL
) || (buf
== NULL
)) {
754 parent
= cur
->parent
;
757 case XML_HTML_DOCUMENT_NODE
:
758 case XML_DOCUMENT_NODE
:
759 if (((xmlDocPtr
) cur
)->intSubset
!= NULL
) {
760 htmlDtdDumpOutput(buf
, (xmlDocPtr
) cur
, NULL
);
762 if (cur
->children
!= NULL
) {
763 /* Always validate cur->parent when descending. */
764 if (cur
->parent
== parent
) {
770 xmlOutputBufferWriteString(buf
, "\n");
774 case XML_ELEMENT_NODE
:
776 * Some users like lxml are known to pass nodes with a corrupted
777 * tree structure. Fall back to a recursive call to handle this
780 if ((cur
->parent
!= parent
) && (cur
->children
!= NULL
)) {
781 htmlNodeDumpFormatOutput(buf
, doc
, cur
, encoding
, format
);
786 * Get specific HTML info for that node.
789 info
= htmlTagLookup(cur
->name
);
793 xmlOutputBufferWriteString(buf
, "<");
794 if ((cur
->ns
!= NULL
) && (cur
->ns
->prefix
!= NULL
)) {
795 xmlOutputBufferWriteString(buf
, (const char *)cur
->ns
->prefix
);
796 xmlOutputBufferWriteString(buf
, ":");
798 xmlOutputBufferWriteString(buf
, (const char *)cur
->name
);
800 xmlNsListDumpOutput(buf
, cur
->nsDef
);
801 attr
= cur
->properties
;
802 while (attr
!= NULL
) {
803 htmlAttrDumpOutput(buf
, doc
, attr
);
807 if ((info
!= NULL
) && (info
->empty
)) {
808 xmlOutputBufferWriteString(buf
, ">");
809 } else if (cur
->children
== NULL
) {
810 if ((info
!= NULL
) && (info
->saveEndTag
!= 0) &&
811 (xmlStrcmp(BAD_CAST info
->name
, BAD_CAST
"html")) &&
812 (xmlStrcmp(BAD_CAST info
->name
, BAD_CAST
"body"))) {
813 xmlOutputBufferWriteString(buf
, ">");
815 xmlOutputBufferWriteString(buf
, "></");
816 if ((cur
->ns
!= NULL
) && (cur
->ns
->prefix
!= NULL
)) {
817 xmlOutputBufferWriteString(buf
,
818 (const char *)cur
->ns
->prefix
);
819 xmlOutputBufferWriteString(buf
, ":");
821 xmlOutputBufferWriteString(buf
, (const char *)cur
->name
);
822 xmlOutputBufferWriteString(buf
, ">");
825 xmlOutputBufferWriteString(buf
, ">");
826 if ((format
) && (info
!= NULL
) && (!info
->isinline
) &&
827 (cur
->children
->type
!= HTML_TEXT_NODE
) &&
828 (cur
->children
->type
!= HTML_ENTITY_REF_NODE
) &&
829 (cur
->children
!= cur
->last
) &&
830 (cur
->name
!= NULL
) &&
831 (cur
->name
[0] != 'p')) /* p, pre, param */
832 xmlOutputBufferWriteString(buf
, "\n");
838 if ((format
) && (cur
->next
!= NULL
) &&
839 (info
!= NULL
) && (!info
->isinline
)) {
840 if ((cur
->next
->type
!= HTML_TEXT_NODE
) &&
841 (cur
->next
->type
!= HTML_ENTITY_REF_NODE
) &&
843 (parent
->name
!= NULL
) &&
844 (parent
->name
[0] != 'p')) /* p, pre, param */
845 xmlOutputBufferWriteString(buf
, "\n");
850 case XML_ATTRIBUTE_NODE
:
851 htmlAttrDumpOutput(buf
, doc
, (xmlAttrPtr
) cur
);
855 if (cur
->content
== NULL
)
857 if (((cur
->name
== (const xmlChar
*)xmlStringText
) ||
858 (cur
->name
!= (const xmlChar
*)xmlStringTextNoenc
)) &&
860 ((xmlStrcasecmp(parent
->name
, BAD_CAST
"script")) &&
861 (xmlStrcasecmp(parent
->name
, BAD_CAST
"style"))))) {
864 buffer
= xmlEncodeEntitiesReentrant(doc
, cur
->content
);
865 if (buffer
!= NULL
) {
866 xmlOutputBufferWriteString(buf
, (const char *)buffer
);
870 xmlOutputBufferWriteString(buf
, (const char *)cur
->content
);
874 case HTML_COMMENT_NODE
:
875 if (cur
->content
!= NULL
) {
876 xmlOutputBufferWriteString(buf
, "<!--");
877 xmlOutputBufferWriteString(buf
, (const char *)cur
->content
);
878 xmlOutputBufferWriteString(buf
, "-->");
883 if (cur
->name
!= NULL
) {
884 xmlOutputBufferWriteString(buf
, "<?");
885 xmlOutputBufferWriteString(buf
, (const char *)cur
->name
);
886 if (cur
->content
!= NULL
) {
887 xmlOutputBufferWriteString(buf
, " ");
888 xmlOutputBufferWriteString(buf
,
889 (const char *)cur
->content
);
891 xmlOutputBufferWriteString(buf
, ">");
895 case HTML_ENTITY_REF_NODE
:
896 xmlOutputBufferWriteString(buf
, "&");
897 xmlOutputBufferWriteString(buf
, (const char *)cur
->name
);
898 xmlOutputBufferWriteString(buf
, ";");
901 case HTML_PRESERVE_NODE
:
902 if (cur
->content
!= NULL
) {
903 xmlOutputBufferWriteString(buf
, (const char *)cur
->content
);
914 if (cur
->next
!= NULL
) {
920 /* cur->parent was validated when descending. */
921 parent
= cur
->parent
;
923 if ((cur
->type
== XML_HTML_DOCUMENT_NODE
) ||
924 (cur
->type
== XML_DOCUMENT_NODE
)) {
925 xmlOutputBufferWriteString(buf
, "\n");
927 if ((format
) && (cur
->ns
== NULL
))
928 info
= htmlTagLookup(cur
->name
);
932 if ((format
) && (info
!= NULL
) && (!info
->isinline
) &&
933 (cur
->last
->type
!= HTML_TEXT_NODE
) &&
934 (cur
->last
->type
!= HTML_ENTITY_REF_NODE
) &&
935 (cur
->children
!= cur
->last
) &&
936 (cur
->name
!= NULL
) &&
937 (cur
->name
[0] != 'p')) /* p, pre, param */
938 xmlOutputBufferWriteString(buf
, "\n");
940 xmlOutputBufferWriteString(buf
, "</");
941 if ((cur
->ns
!= NULL
) && (cur
->ns
->prefix
!= NULL
)) {
942 xmlOutputBufferWriteString(buf
, (const char *)cur
->ns
->prefix
);
943 xmlOutputBufferWriteString(buf
, ":");
945 xmlOutputBufferWriteString(buf
, (const char *)cur
->name
);
946 xmlOutputBufferWriteString(buf
, ">");
948 if ((format
) && (info
!= NULL
) && (!info
->isinline
) &&
949 (cur
->next
!= NULL
)) {
950 if ((cur
->next
->type
!= HTML_TEXT_NODE
) &&
951 (cur
->next
->type
!= HTML_ENTITY_REF_NODE
) &&
953 (parent
->name
!= NULL
) &&
954 (parent
->name
[0] != 'p')) /* p, pre, param */
955 xmlOutputBufferWriteString(buf
, "\n");
963 * htmlNodeDumpOutput:
964 * @buf: the HTML buffer output
966 * @cur: the current node
967 * @encoding: the encoding string (unused)
969 * Dump an HTML node, recursive behaviour,children are printed too,
970 * and formatting returns/spaces are added.
973 htmlNodeDumpOutput(xmlOutputBufferPtr buf
, xmlDocPtr doc
,
974 xmlNodePtr cur
, const char *encoding ATTRIBUTE_UNUSED
) {
975 htmlNodeDumpFormatOutput(buf
, doc
, cur
, NULL
, 1);
979 * htmlDocContentDumpFormatOutput:
980 * @buf: the HTML buffer output
982 * @encoding: the encoding string (unused)
983 * @format: should formatting spaces been added
985 * Dump an HTML document.
988 htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf
, xmlDocPtr cur
,
989 const char *encoding ATTRIBUTE_UNUSED
,
994 cur
->type
= XML_HTML_DOCUMENT_NODE
;
996 htmlNodeDumpFormatOutput(buf
, cur
, (xmlNodePtr
) cur
, NULL
, format
);
998 cur
->type
= (xmlElementType
) type
;
1002 * htmlDocContentDumpOutput:
1003 * @buf: the HTML buffer output
1004 * @cur: the document
1005 * @encoding: the encoding string (unused)
1007 * Dump an HTML document. Formatting return/spaces are added.
1010 htmlDocContentDumpOutput(xmlOutputBufferPtr buf
, xmlDocPtr cur
,
1011 const char *encoding ATTRIBUTE_UNUSED
) {
1012 htmlNodeDumpFormatOutput(buf
, cur
, (xmlNodePtr
) cur
, NULL
, 1);
1015 /************************************************************************
1017 * Saving functions front-ends *
1019 ************************************************************************/
1024 * @cur: the document
1026 * Dump an HTML document to an open FILE.
1028 * returns: the number of byte written or -1 in case of failure.
1031 htmlDocDump(FILE *f
, xmlDocPtr cur
) {
1032 xmlOutputBufferPtr buf
;
1033 xmlCharEncodingHandlerPtr handler
= NULL
;
1034 const char *encoding
;
1039 if ((cur
== NULL
) || (f
== NULL
)) {
1043 encoding
= (const char *) htmlGetMetaEncoding(cur
);
1045 if (encoding
!= NULL
) {
1046 xmlCharEncoding enc
;
1048 enc
= xmlParseCharEncoding(encoding
);
1049 if (enc
!= XML_CHAR_ENCODING_UTF8
) {
1050 handler
= xmlFindCharEncodingHandler(encoding
);
1051 if (handler
== NULL
)
1052 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING
, NULL
, encoding
);
1056 * Fallback to HTML or ASCII when the encoding is unspecified
1058 if (handler
== NULL
)
1059 handler
= xmlFindCharEncodingHandler("HTML");
1060 if (handler
== NULL
)
1061 handler
= xmlFindCharEncodingHandler("ascii");
1064 buf
= xmlOutputBufferCreateFile(f
, handler
);
1065 if (buf
== NULL
) return(-1);
1066 htmlDocContentDumpOutput(buf
, cur
, NULL
);
1068 ret
= xmlOutputBufferClose(buf
);
1074 * @filename: the filename (or URL)
1075 * @cur: the document
1077 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1079 * returns: the number of byte written or -1 in case of failure.
1082 htmlSaveFile(const char *filename
, xmlDocPtr cur
) {
1083 xmlOutputBufferPtr buf
;
1084 xmlCharEncodingHandlerPtr handler
= NULL
;
1085 const char *encoding
;
1088 if ((cur
== NULL
) || (filename
== NULL
))
1093 encoding
= (const char *) htmlGetMetaEncoding(cur
);
1095 if (encoding
!= NULL
) {
1096 xmlCharEncoding enc
;
1098 enc
= xmlParseCharEncoding(encoding
);
1099 if (enc
!= XML_CHAR_ENCODING_UTF8
) {
1100 handler
= xmlFindCharEncodingHandler(encoding
);
1101 if (handler
== NULL
)
1102 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING
, NULL
, encoding
);
1106 * Fallback to HTML or ASCII when the encoding is unspecified
1108 if (handler
== NULL
)
1109 handler
= xmlFindCharEncodingHandler("HTML");
1110 if (handler
== NULL
)
1111 handler
= xmlFindCharEncodingHandler("ascii");
1115 * save the content to a temp buffer.
1117 buf
= xmlOutputBufferCreateFilename(filename
, handler
, cur
->compression
);
1118 if (buf
== NULL
) return(0);
1120 htmlDocContentDumpOutput(buf
, cur
, NULL
);
1122 ret
= xmlOutputBufferClose(buf
);
1127 * htmlSaveFileFormat:
1128 * @filename: the filename
1129 * @cur: the document
1130 * @format: should formatting spaces been added
1131 * @encoding: the document encoding
1133 * Dump an HTML document to a file using a given encoding.
1135 * returns: the number of byte written or -1 in case of failure.
1138 htmlSaveFileFormat(const char *filename
, xmlDocPtr cur
,
1139 const char *encoding
, int format
) {
1140 xmlOutputBufferPtr buf
;
1141 xmlCharEncodingHandlerPtr handler
= NULL
;
1144 if ((cur
== NULL
) || (filename
== NULL
))
1149 if (encoding
!= NULL
) {
1150 xmlCharEncoding enc
;
1152 enc
= xmlParseCharEncoding(encoding
);
1153 if (enc
!= XML_CHAR_ENCODING_UTF8
) {
1154 handler
= xmlFindCharEncodingHandler(encoding
);
1155 if (handler
== NULL
)
1156 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING
, NULL
, encoding
);
1158 htmlSetMetaEncoding(cur
, (const xmlChar
*) encoding
);
1160 htmlSetMetaEncoding(cur
, (const xmlChar
*) "UTF-8");
1163 * Fallback to HTML or ASCII when the encoding is unspecified
1165 if (handler
== NULL
)
1166 handler
= xmlFindCharEncodingHandler("HTML");
1167 if (handler
== NULL
)
1168 handler
= xmlFindCharEncodingHandler("ascii");
1172 * save the content to a temp buffer.
1174 buf
= xmlOutputBufferCreateFilename(filename
, handler
, 0);
1175 if (buf
== NULL
) return(0);
1177 htmlDocContentDumpFormatOutput(buf
, cur
, encoding
, format
);
1179 ret
= xmlOutputBufferClose(buf
);
1185 * @filename: the filename
1186 * @cur: the document
1187 * @encoding: the document encoding
1189 * Dump an HTML document to a file using a given encoding
1190 * and formatting returns/spaces are added.
1192 * returns: the number of byte written or -1 in case of failure.
1195 htmlSaveFileEnc(const char *filename
, xmlDocPtr cur
, const char *encoding
) {
1196 return(htmlSaveFileFormat(filename
, cur
, encoding
, 1));
1199 #endif /* LIBXML_OUTPUT_ENABLED */
1201 #endif /* LIBXML_HTML_ENABLED */