2 * HTMLtree.c : implementation of access function for an HTML tree.
4 * See Copyright for the status of this software.
12 #ifdef LIBXML_HTML_ENABLED
14 #include <string.h> /* for memset() only ! */
23 #include <libxml/xmlmemory.h>
24 #include <libxml/HTMLparser.h>
25 #include <libxml/HTMLtree.h>
26 #include <libxml/entities.h>
27 #include <libxml/valid.h>
28 #include <libxml/xmlerror.h>
29 #include <libxml/parserInternals.h>
30 #include <libxml/globals.h>
31 #include <libxml/uri.h>
33 /************************************************************************
35 * Getting/Setting encoding meta tags *
37 ************************************************************************/
40 * htmlGetMetaEncoding:
43 * Encoding definition lookup in the Meta tags
45 * Returns the current encoding as flagged in the HTML source
48 htmlGetMetaEncoding(htmlDocPtr doc
) {
50 const xmlChar
*content
;
51 const xmlChar
*encoding
;
61 if ((cur
->type
== XML_ELEMENT_NODE
) && (cur
->name
!= NULL
)) {
62 if (xmlStrEqual(cur
->name
, BAD_CAST
"html"))
64 if (xmlStrEqual(cur
->name
, BAD_CAST
"head"))
66 if (xmlStrEqual(cur
->name
, BAD_CAST
"meta"))
79 if ((cur
->type
== XML_ELEMENT_NODE
) && (cur
->name
!= NULL
)) {
80 if (xmlStrEqual(cur
->name
, BAD_CAST
"head"))
82 if (xmlStrEqual(cur
->name
, BAD_CAST
"meta"))
93 * Search the meta elements
97 if ((cur
->type
== XML_ELEMENT_NODE
) && (cur
->name
!= NULL
)) {
98 if (xmlStrEqual(cur
->name
, BAD_CAST
"meta")) {
99 xmlAttrPtr attr
= cur
->properties
;
101 const xmlChar
*value
;
105 while (attr
!= NULL
) {
106 if ((attr
->children
!= NULL
) &&
107 (attr
->children
->type
== XML_TEXT_NODE
) &&
108 (attr
->children
->next
== NULL
)) {
109 value
= attr
->children
->content
;
110 if ((!xmlStrcasecmp(attr
->name
, BAD_CAST
"http-equiv"))
111 && (!xmlStrcasecmp(value
, BAD_CAST
"Content-Type")))
113 else if ((value
!= NULL
)
114 && (!xmlStrcasecmp(attr
->name
, BAD_CAST
"content")))
116 if ((http
!= 0) && (content
!= NULL
))
128 encoding
= xmlStrstr(content
, BAD_CAST
"charset=");
129 if (encoding
== NULL
)
130 encoding
= xmlStrstr(content
, BAD_CAST
"Charset=");
131 if (encoding
== NULL
)
132 encoding
= xmlStrstr(content
, BAD_CAST
"CHARSET=");
133 if (encoding
!= NULL
) {
136 encoding
= xmlStrstr(content
, BAD_CAST
"charset =");
137 if (encoding
== NULL
)
138 encoding
= xmlStrstr(content
, BAD_CAST
"Charset =");
139 if (encoding
== NULL
)
140 encoding
= xmlStrstr(content
, BAD_CAST
"CHARSET =");
141 if (encoding
!= NULL
)
144 if (encoding
!= NULL
) {
145 while ((*encoding
== ' ') || (*encoding
== '\t')) encoding
++;
151 * htmlSetMetaEncoding:
153 * @encoding: the encoding string
155 * Sets the current encoding in the Meta tags
156 * NOTE: this will not change the document content encoding, just
157 * the META flag associated.
159 * Returns 0 in case of success and -1 in case of error
162 htmlSetMetaEncoding(htmlDocPtr doc
, const xmlChar
*encoding
) {
163 htmlNodePtr cur
, meta
= NULL
, head
= NULL
;
164 const xmlChar
*content
= NULL
;
165 char newcontent
[100];
171 /* html isn't a real encoding it's just libxml2 way to get entities */
172 if (!xmlStrcasecmp(encoding
, BAD_CAST
"html"))
175 if (encoding
!= NULL
) {
176 snprintf(newcontent
, sizeof(newcontent
), "text/html; charset=%s",
178 newcontent
[sizeof(newcontent
) - 1] = 0;
186 while (cur
!= NULL
) {
187 if ((cur
->type
== XML_ELEMENT_NODE
) && (cur
->name
!= NULL
)) {
188 if (xmlStrcasecmp(cur
->name
, BAD_CAST
"html") == 0)
190 if (xmlStrcasecmp(cur
->name
, BAD_CAST
"head") == 0)
192 if (xmlStrcasecmp(cur
->name
, BAD_CAST
"meta") == 0)
204 while (cur
!= NULL
) {
205 if ((cur
->type
== XML_ELEMENT_NODE
) && (cur
->name
!= NULL
)) {
206 if (xmlStrcasecmp(cur
->name
, BAD_CAST
"head") == 0)
208 if (xmlStrcasecmp(cur
->name
, BAD_CAST
"meta") == 0) {
219 if (cur
->children
== NULL
)
225 * Search and update all the remaining the meta elements carrying
226 * encoding informations
228 while (cur
!= NULL
) {
229 if ((cur
->type
== XML_ELEMENT_NODE
) && (cur
->name
!= NULL
)) {
230 if (xmlStrcasecmp(cur
->name
, BAD_CAST
"meta") == 0) {
231 xmlAttrPtr attr
= cur
->properties
;
233 const xmlChar
*value
;
237 while (attr
!= NULL
) {
238 if ((attr
->children
!= NULL
) &&
239 (attr
->children
->type
== XML_TEXT_NODE
) &&
240 (attr
->children
->next
== NULL
)) {
241 value
= attr
->children
->content
;
242 if ((!xmlStrcasecmp(attr
->name
, BAD_CAST
"http-equiv"))
243 && (!xmlStrcasecmp(value
, BAD_CAST
"Content-Type")))
247 if ((value
!= NULL
) &&
248 (!xmlStrcasecmp(attr
->name
, BAD_CAST
"content")))
251 if ((http
!= 0) && (content
!= NULL
))
256 if ((http
!= 0) && (content
!= NULL
)) {
267 if ((encoding
!= NULL
) && (head
!= NULL
)) {
269 * Create a new Meta element with the right attributes
272 meta
= xmlNewDocNode(doc
, NULL
, BAD_CAST
"meta", NULL
);
273 if (head
->children
== NULL
)
274 xmlAddChild(head
, meta
);
276 xmlAddPrevSibling(head
->children
, meta
);
277 xmlNewProp(meta
, BAD_CAST
"http-equiv", BAD_CAST
"Content-Type");
278 xmlNewProp(meta
, BAD_CAST
"content", BAD_CAST newcontent
);
281 /* change the document only if there is a real encoding change */
282 if (xmlStrcasestr(content
, encoding
) == NULL
) {
283 xmlSetProp(meta
, BAD_CAST
"content", BAD_CAST newcontent
);
294 * These are the HTML attributes which will be output
295 * in minimized form, i.e. <option selected="selected"> will be
296 * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
299 static const char* htmlBooleanAttrs
[] = {
300 "checked", "compact", "declare", "defer", "disabled", "ismap",
301 "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
308 * @name: the name of the attribute to check
310 * Determine if a given attribute is a boolean attribute.
312 * returns: false if the attribute is not boolean, true otherwise.
315 htmlIsBooleanAttr(const xmlChar
*name
)
319 while (htmlBooleanAttrs
[i
] != NULL
) {
320 if (xmlStrcasecmp((const xmlChar
*)htmlBooleanAttrs
[i
], name
) == 0)
327 #ifdef LIBXML_OUTPUT_ENABLED
329 * private routine exported from xmlIO.c
332 xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder
);
333 /************************************************************************
335 * Output error handlers *
337 ************************************************************************/
340 * @extra: extra informations
342 * Handle an out of memory condition
345 htmlSaveErrMemory(const char *extra
)
347 __xmlSimpleError(XML_FROM_OUTPUT
, XML_ERR_NO_MEMORY
, NULL
, NULL
, extra
);
352 * @code: the error number
353 * @node: the location of the error.
354 * @extra: extra informations
356 * Handle an out of memory condition
359 htmlSaveErr(int code
, xmlNodePtr node
, const char *extra
)
361 const char *msg
= NULL
;
364 case XML_SAVE_NOT_UTF8
:
365 msg
= "string is not in UTF-8\n";
367 case XML_SAVE_CHAR_INVALID
:
368 msg
= "invalid character value\n";
370 case XML_SAVE_UNKNOWN_ENCODING
:
371 msg
= "unknown encoding %s\n";
373 case XML_SAVE_NO_DOCTYPE
:
374 msg
= "HTML has no DOCTYPE\n";
377 msg
= "unexpected error number\n";
379 __xmlSimpleError(XML_FROM_OUTPUT
, code
, node
, msg
, extra
);
382 /************************************************************************
384 * Dumping HTML tree content to a simple buffer *
386 ************************************************************************/
389 htmlNodeDumpFormat(xmlBufferPtr buf
, xmlDocPtr doc
, xmlNodePtr cur
,
393 * htmlNodeDumpFormat:
394 * @buf: the HTML buffer output
396 * @cur: the current node
397 * @format: should formatting spaces been added
399 * Dump an HTML node, recursive behaviour,children are printed too.
401 * Returns the number of byte written or -1 in case of error
404 htmlNodeDumpFormat(xmlBufferPtr buf
, xmlDocPtr doc
, xmlNodePtr cur
,
408 xmlOutputBufferPtr outbuf
;
416 outbuf
= (xmlOutputBufferPtr
) xmlMalloc(sizeof(xmlOutputBuffer
));
417 if (outbuf
== NULL
) {
418 htmlSaveErrMemory("allocating HTML output buffer");
421 memset(outbuf
, 0, (size_t) sizeof(xmlOutputBuffer
));
422 outbuf
->buffer
= buf
;
423 outbuf
->encoder
= NULL
;
424 outbuf
->writecallback
= NULL
;
425 outbuf
->closecallback
= NULL
;
426 outbuf
->context
= NULL
;
430 htmlNodeDumpFormatOutput(outbuf
, doc
, cur
, NULL
, format
);
432 ret
= buf
->use
- use
;
438 * @buf: the HTML buffer output
440 * @cur: the current node
442 * Dump an HTML node, recursive behaviour,children are printed too,
443 * and formatting returns are added.
445 * Returns the number of byte written or -1 in case of error
448 htmlNodeDump(xmlBufferPtr buf
, xmlDocPtr doc
, xmlNodePtr cur
) {
451 return(htmlNodeDumpFormat(buf
, doc
, cur
, 1));
455 * htmlNodeDumpFileFormat:
456 * @out: the FILE pointer
458 * @cur: the current node
459 * @encoding: the document encoding
460 * @format: should formatting spaces been added
462 * Dump an HTML node, recursive behaviour,children are printed too.
464 * TODO: if encoding == NULL try to save in the doc encoding
466 * returns: the number of byte written or -1 in case of failure.
469 htmlNodeDumpFileFormat(FILE *out
, xmlDocPtr doc
,
470 xmlNodePtr cur
, const char *encoding
, int format
) {
471 xmlOutputBufferPtr buf
;
472 xmlCharEncodingHandlerPtr handler
= NULL
;
477 if (encoding
!= NULL
) {
480 enc
= xmlParseCharEncoding(encoding
);
481 if (enc
!= XML_CHAR_ENCODING_UTF8
) {
482 handler
= xmlFindCharEncodingHandler(encoding
);
489 * Fallback to HTML or ASCII when the encoding is unspecified
492 handler
= xmlFindCharEncodingHandler("HTML");
494 handler
= xmlFindCharEncodingHandler("ascii");
497 * save the content to a temp buffer.
499 buf
= xmlOutputBufferCreateFile(out
, handler
);
500 if (buf
== NULL
) return(0);
502 htmlNodeDumpFormatOutput(buf
, doc
, cur
, encoding
, format
);
504 ret
= xmlOutputBufferClose(buf
);
510 * @out: the FILE pointer
512 * @cur: the current node
514 * Dump an HTML node, recursive behaviour,children are printed too,
515 * and formatting returns are added.
518 htmlNodeDumpFile(FILE *out
, xmlDocPtr doc
, xmlNodePtr cur
) {
519 htmlNodeDumpFileFormat(out
, doc
, cur
, NULL
, 1);
523 * htmlDocDumpMemoryFormat:
525 * @mem: OUT: the memory pointer
526 * @size: OUT: the memory length
527 * @format: should formatting spaces been added
529 * Dump an HTML document in memory and return the xmlChar * and it's size.
530 * It's up to the caller to free the memory.
533 htmlDocDumpMemoryFormat(xmlDocPtr cur
, xmlChar
**mem
, int *size
, int format
) {
534 xmlOutputBufferPtr buf
;
535 xmlCharEncodingHandlerPtr handler
= NULL
;
536 const char *encoding
;
540 if ((mem
== NULL
) || (size
== NULL
))
548 encoding
= (const char *) htmlGetMetaEncoding(cur
);
550 if (encoding
!= NULL
) {
553 enc
= xmlParseCharEncoding(encoding
);
554 if (enc
!= cur
->charset
) {
555 if (cur
->charset
!= XML_CHAR_ENCODING_UTF8
) {
564 handler
= xmlFindCharEncodingHandler(encoding
);
565 if (handler
== NULL
) {
571 handler
= xmlFindCharEncodingHandler(encoding
);
576 * Fallback to HTML or ASCII when the encoding is unspecified
579 handler
= xmlFindCharEncodingHandler("HTML");
581 handler
= xmlFindCharEncodingHandler("ascii");
583 buf
= xmlAllocOutputBufferInternal(handler
);
590 htmlDocContentDumpFormatOutput(buf
, cur
, NULL
, format
);
592 xmlOutputBufferFlush(buf
);
593 if (buf
->conv
!= NULL
) {
594 *size
= buf
->conv
->use
;
595 *mem
= xmlStrndup(buf
->conv
->content
, *size
);
597 *size
= buf
->buffer
->use
;
598 *mem
= xmlStrndup(buf
->buffer
->content
, *size
);
600 (void)xmlOutputBufferClose(buf
);
606 * @mem: OUT: the memory pointer
607 * @size: OUT: the memory length
609 * Dump an HTML document in memory and return the xmlChar * and it's size.
610 * It's up to the caller to free the memory.
613 htmlDocDumpMemory(xmlDocPtr cur
, xmlChar
**mem
, int *size
) {
614 htmlDocDumpMemoryFormat(cur
, mem
, size
, 1);
618 /************************************************************************
620 * Dumping HTML tree content to an I/O output buffer *
622 ************************************************************************/
624 void xmlNsListDumpOutput(xmlOutputBufferPtr buf
, xmlNsPtr cur
);
628 * @buf: the HTML buffer output
630 * @encoding: the encoding string
632 * TODO: check whether encoding is needed
634 * Dump the HTML document DTD, if any.
637 htmlDtdDumpOutput(xmlOutputBufferPtr buf
, xmlDocPtr doc
,
638 const char *encoding ATTRIBUTE_UNUSED
) {
639 xmlDtdPtr cur
= doc
->intSubset
;
642 htmlSaveErr(XML_SAVE_NO_DOCTYPE
, (xmlNodePtr
) doc
, NULL
);
645 xmlOutputBufferWriteString(buf
, "<!DOCTYPE ");
646 xmlOutputBufferWriteString(buf
, (const char *)cur
->name
);
647 if (cur
->ExternalID
!= NULL
) {
648 xmlOutputBufferWriteString(buf
, " PUBLIC ");
649 xmlBufferWriteQuotedString(buf
->buffer
, cur
->ExternalID
);
650 if (cur
->SystemID
!= NULL
) {
651 xmlOutputBufferWriteString(buf
, " ");
652 xmlBufferWriteQuotedString(buf
->buffer
, cur
->SystemID
);
654 } else if (cur
->SystemID
!= NULL
) {
655 xmlOutputBufferWriteString(buf
, " SYSTEM ");
656 xmlBufferWriteQuotedString(buf
->buffer
, cur
->SystemID
);
658 xmlOutputBufferWriteString(buf
, ">\n");
662 * htmlAttrDumpOutput:
663 * @buf: the HTML buffer output
665 * @cur: the attribute pointer
666 * @encoding: the encoding string
668 * Dump an HTML attribute
671 htmlAttrDumpOutput(xmlOutputBufferPtr buf
, xmlDocPtr doc
, xmlAttrPtr cur
,
672 const char *encoding ATTRIBUTE_UNUSED
) {
676 * TODO: The html output method should not escape a & character
677 * occurring in an attribute value immediately followed by
678 * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
684 xmlOutputBufferWriteString(buf
, " ");
685 if ((cur
->ns
!= NULL
) && (cur
->ns
->prefix
!= NULL
)) {
686 xmlOutputBufferWriteString(buf
, (const char *)cur
->ns
->prefix
);
687 xmlOutputBufferWriteString(buf
, ":");
689 xmlOutputBufferWriteString(buf
, (const char *)cur
->name
);
690 if ((cur
->children
!= NULL
) && (!htmlIsBooleanAttr(cur
->name
))) {
691 value
= xmlNodeListGetString(doc
, cur
->children
, 0);
693 xmlOutputBufferWriteString(buf
, "=");
694 if ((cur
->ns
== NULL
) && (cur
->parent
!= NULL
) &&
695 (cur
->parent
->ns
== NULL
) &&
696 ((!xmlStrcasecmp(cur
->name
, BAD_CAST
"href")) ||
697 (!xmlStrcasecmp(cur
->name
, BAD_CAST
"action")) ||
698 (!xmlStrcasecmp(cur
->name
, BAD_CAST
"src")) ||
699 ((!xmlStrcasecmp(cur
->name
, BAD_CAST
"name")) &&
700 (!xmlStrcasecmp(cur
->parent
->name
, BAD_CAST
"a"))))) {
702 xmlChar
*tmp
= value
;
704 while (IS_BLANK_CH(*tmp
)) tmp
++;
706 escaped
= xmlURIEscapeStr(tmp
, BAD_CAST
"@/:=?;#%&,+");
707 if (escaped
!= NULL
) {
708 xmlBufferWriteQuotedString(buf
->buffer
, escaped
);
711 xmlBufferWriteQuotedString(buf
->buffer
, value
);
714 xmlBufferWriteQuotedString(buf
->buffer
, value
);
718 xmlOutputBufferWriteString(buf
, "=\"\"");
724 * htmlAttrListDumpOutput:
725 * @buf: the HTML buffer output
727 * @cur: the first attribute pointer
728 * @encoding: the encoding string
730 * Dump a list of HTML attributes
733 htmlAttrListDumpOutput(xmlOutputBufferPtr buf
, xmlDocPtr doc
, xmlAttrPtr cur
, const char *encoding
) {
737 while (cur
!= NULL
) {
738 htmlAttrDumpOutput(buf
, doc
, cur
, encoding
);
746 * htmlNodeListDumpOutput:
747 * @buf: the HTML buffer output
749 * @cur: the first node
750 * @encoding: the encoding string
751 * @format: should formatting spaces been added
753 * Dump an HTML node list, recursive behaviour,children are printed too.
756 htmlNodeListDumpOutput(xmlOutputBufferPtr buf
, xmlDocPtr doc
,
757 xmlNodePtr cur
, const char *encoding
, int format
) {
761 while (cur
!= NULL
) {
762 htmlNodeDumpFormatOutput(buf
, doc
, cur
, encoding
, format
);
768 * htmlNodeDumpFormatOutput:
769 * @buf: the HTML buffer output
771 * @cur: the current node
772 * @encoding: the encoding string
773 * @format: should formatting spaces been added
775 * Dump an HTML node, recursive behaviour,children are printed too.
778 htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf
, xmlDocPtr doc
,
779 xmlNodePtr cur
, const char *encoding
, int format
) {
780 const htmlElemDesc
* info
;
784 if ((cur
== NULL
) || (buf
== NULL
)) {
790 if (cur
->type
== XML_DTD_NODE
)
792 if ((cur
->type
== XML_HTML_DOCUMENT_NODE
) ||
793 (cur
->type
== XML_DOCUMENT_NODE
)){
794 htmlDocContentDumpOutput(buf
, (xmlDocPtr
) cur
, encoding
);
797 if (cur
->type
== XML_ATTRIBUTE_NODE
) {
798 htmlAttrDumpOutput(buf
, doc
, (xmlAttrPtr
) cur
, encoding
);
801 if (cur
->type
== HTML_TEXT_NODE
) {
802 if (cur
->content
!= NULL
) {
803 if (((cur
->name
== (const xmlChar
*)xmlStringText
) ||
804 (cur
->name
!= (const xmlChar
*)xmlStringTextNoenc
)) &&
805 ((cur
->parent
== NULL
) ||
806 ((xmlStrcasecmp(cur
->parent
->name
, BAD_CAST
"script")) &&
807 (xmlStrcasecmp(cur
->parent
->name
, BAD_CAST
"style"))))) {
810 buffer
= xmlEncodeEntitiesReentrant(doc
, cur
->content
);
811 if (buffer
!= NULL
) {
812 xmlOutputBufferWriteString(buf
, (const char *)buffer
);
816 xmlOutputBufferWriteString(buf
, (const char *)cur
->content
);
821 if (cur
->type
== HTML_COMMENT_NODE
) {
822 if (cur
->content
!= NULL
) {
823 xmlOutputBufferWriteString(buf
, "<!--");
824 xmlOutputBufferWriteString(buf
, (const char *)cur
->content
);
825 xmlOutputBufferWriteString(buf
, "-->");
829 if (cur
->type
== HTML_PI_NODE
) {
830 if (cur
->name
== NULL
)
832 xmlOutputBufferWriteString(buf
, "<?");
833 xmlOutputBufferWriteString(buf
, (const char *)cur
->name
);
834 if (cur
->content
!= NULL
) {
835 xmlOutputBufferWriteString(buf
, " ");
836 xmlOutputBufferWriteString(buf
, (const char *)cur
->content
);
838 xmlOutputBufferWriteString(buf
, ">");
841 if (cur
->type
== HTML_ENTITY_REF_NODE
) {
842 xmlOutputBufferWriteString(buf
, "&");
843 xmlOutputBufferWriteString(buf
, (const char *)cur
->name
);
844 xmlOutputBufferWriteString(buf
, ";");
847 if (cur
->type
== HTML_PRESERVE_NODE
) {
848 if (cur
->content
!= NULL
) {
849 xmlOutputBufferWriteString(buf
, (const char *)cur
->content
);
855 * Get specific HTML info for that node.
858 info
= htmlTagLookup(cur
->name
);
862 xmlOutputBufferWriteString(buf
, "<");
863 if ((cur
->ns
!= NULL
) && (cur
->ns
->prefix
!= NULL
)) {
864 xmlOutputBufferWriteString(buf
, (const char *)cur
->ns
->prefix
);
865 xmlOutputBufferWriteString(buf
, ":");
867 xmlOutputBufferWriteString(buf
, (const char *)cur
->name
);
869 xmlNsListDumpOutput(buf
, cur
->nsDef
);
870 if (cur
->properties
!= NULL
)
871 htmlAttrListDumpOutput(buf
, doc
, cur
->properties
, encoding
);
873 if ((info
!= NULL
) && (info
->empty
)) {
874 xmlOutputBufferWriteString(buf
, ">");
875 if ((format
) && (!info
->isinline
) && (cur
->next
!= NULL
)) {
876 if ((cur
->next
->type
!= HTML_TEXT_NODE
) &&
877 (cur
->next
->type
!= HTML_ENTITY_REF_NODE
) &&
878 (cur
->parent
!= NULL
) &&
879 (cur
->parent
->name
!= NULL
) &&
880 (cur
->parent
->name
[0] != 'p')) /* p, pre, param */
881 xmlOutputBufferWriteString(buf
, "\n");
885 if (((cur
->type
== XML_ELEMENT_NODE
) || (cur
->content
== NULL
)) &&
886 (cur
->children
== NULL
)) {
887 if ((info
!= NULL
) && (info
->saveEndTag
!= 0) &&
888 (xmlStrcmp(BAD_CAST info
->name
, BAD_CAST
"html")) &&
889 (xmlStrcmp(BAD_CAST info
->name
, BAD_CAST
"body"))) {
890 xmlOutputBufferWriteString(buf
, ">");
892 xmlOutputBufferWriteString(buf
, "></");
893 if ((cur
->ns
!= NULL
) && (cur
->ns
->prefix
!= NULL
)) {
894 xmlOutputBufferWriteString(buf
, (const char *)cur
->ns
->prefix
);
895 xmlOutputBufferWriteString(buf
, ":");
897 xmlOutputBufferWriteString(buf
, (const char *)cur
->name
);
898 xmlOutputBufferWriteString(buf
, ">");
900 if ((format
) && (cur
->next
!= NULL
) &&
901 (info
!= NULL
) && (!info
->isinline
)) {
902 if ((cur
->next
->type
!= HTML_TEXT_NODE
) &&
903 (cur
->next
->type
!= HTML_ENTITY_REF_NODE
) &&
904 (cur
->parent
!= NULL
) &&
905 (cur
->parent
->name
!= NULL
) &&
906 (cur
->parent
->name
[0] != 'p')) /* p, pre, param */
907 xmlOutputBufferWriteString(buf
, "\n");
911 xmlOutputBufferWriteString(buf
, ">");
912 if ((cur
->type
!= XML_ELEMENT_NODE
) &&
913 (cur
->content
!= NULL
)) {
915 * Uses the OutputBuffer property to automatically convert
916 * invalids to charrefs
919 xmlOutputBufferWriteString(buf
, (const char *) cur
->content
);
921 if (cur
->children
!= NULL
) {
922 if ((format
) && (info
!= NULL
) && (!info
->isinline
) &&
923 (cur
->children
->type
!= HTML_TEXT_NODE
) &&
924 (cur
->children
->type
!= HTML_ENTITY_REF_NODE
) &&
925 (cur
->children
!= cur
->last
) &&
926 (cur
->name
!= NULL
) &&
927 (cur
->name
[0] != 'p')) /* p, pre, param */
928 xmlOutputBufferWriteString(buf
, "\n");
929 htmlNodeListDumpOutput(buf
, doc
, cur
->children
, encoding
, format
);
930 if ((format
) && (info
!= NULL
) && (!info
->isinline
) &&
931 (cur
->last
->type
!= HTML_TEXT_NODE
) &&
932 (cur
->last
->type
!= HTML_ENTITY_REF_NODE
) &&
933 (cur
->children
!= cur
->last
) &&
934 (cur
->name
!= NULL
) &&
935 (cur
->name
[0] != 'p')) /* p, pre, param */
936 xmlOutputBufferWriteString(buf
, "\n");
938 xmlOutputBufferWriteString(buf
, "</");
939 if ((cur
->ns
!= NULL
) && (cur
->ns
->prefix
!= NULL
)) {
940 xmlOutputBufferWriteString(buf
, (const char *)cur
->ns
->prefix
);
941 xmlOutputBufferWriteString(buf
, ":");
943 xmlOutputBufferWriteString(buf
, (const char *)cur
->name
);
944 xmlOutputBufferWriteString(buf
, ">");
945 if ((format
) && (info
!= NULL
) && (!info
->isinline
) &&
946 (cur
->next
!= NULL
)) {
947 if ((cur
->next
->type
!= HTML_TEXT_NODE
) &&
948 (cur
->next
->type
!= HTML_ENTITY_REF_NODE
) &&
949 (cur
->parent
!= NULL
) &&
950 (cur
->parent
->name
!= NULL
) &&
951 (cur
->parent
->name
[0] != 'p')) /* p, pre, param */
952 xmlOutputBufferWriteString(buf
, "\n");
957 * htmlNodeDumpOutput:
958 * @buf: the HTML buffer output
960 * @cur: the current node
961 * @encoding: the encoding string
963 * Dump an HTML node, recursive behaviour,children are printed too,
964 * and formatting returns/spaces are added.
967 htmlNodeDumpOutput(xmlOutputBufferPtr buf
, xmlDocPtr doc
,
968 xmlNodePtr cur
, const char *encoding
) {
969 htmlNodeDumpFormatOutput(buf
, doc
, cur
, encoding
, 1);
973 * htmlDocContentDumpFormatOutput:
974 * @buf: the HTML buffer output
976 * @encoding: the encoding string
977 * @format: should formatting spaces been added
979 * Dump an HTML document.
982 htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf
, xmlDocPtr cur
,
983 const char *encoding
, int format
) {
988 if ((buf
== NULL
) || (cur
== NULL
))
992 * force to output the stuff as HTML, especially for entities
995 cur
->type
= XML_HTML_DOCUMENT_NODE
;
996 if (cur
->intSubset
!= NULL
) {
997 htmlDtdDumpOutput(buf
, cur
, NULL
);
999 if (cur
->children
!= NULL
) {
1000 htmlNodeListDumpOutput(buf
, cur
, cur
->children
, encoding
, format
);
1002 xmlOutputBufferWriteString(buf
, "\n");
1003 cur
->type
= (xmlElementType
) type
;
1007 * htmlDocContentDumpOutput:
1008 * @buf: the HTML buffer output
1009 * @cur: the document
1010 * @encoding: the encoding string
1012 * Dump an HTML document. Formating return/spaces are added.
1015 htmlDocContentDumpOutput(xmlOutputBufferPtr buf
, xmlDocPtr cur
,
1016 const char *encoding
) {
1017 htmlDocContentDumpFormatOutput(buf
, cur
, encoding
, 1);
1020 /************************************************************************
1022 * Saving functions front-ends *
1024 ************************************************************************/
1029 * @cur: the document
1031 * Dump an HTML document to an open FILE.
1033 * returns: the number of byte written or -1 in case of failure.
1036 htmlDocDump(FILE *f
, xmlDocPtr cur
) {
1037 xmlOutputBufferPtr buf
;
1038 xmlCharEncodingHandlerPtr handler
= NULL
;
1039 const char *encoding
;
1044 if ((cur
== NULL
) || (f
== NULL
)) {
1048 encoding
= (const char *) htmlGetMetaEncoding(cur
);
1050 if (encoding
!= NULL
) {
1051 xmlCharEncoding enc
;
1053 enc
= xmlParseCharEncoding(encoding
);
1054 if (enc
!= cur
->charset
) {
1055 if (cur
->charset
!= XML_CHAR_ENCODING_UTF8
) {
1062 handler
= xmlFindCharEncodingHandler(encoding
);
1063 if (handler
== NULL
)
1066 handler
= xmlFindCharEncodingHandler(encoding
);
1071 * Fallback to HTML or ASCII when the encoding is unspecified
1073 if (handler
== NULL
)
1074 handler
= xmlFindCharEncodingHandler("HTML");
1075 if (handler
== NULL
)
1076 handler
= xmlFindCharEncodingHandler("ascii");
1078 buf
= xmlOutputBufferCreateFile(f
, handler
);
1079 if (buf
== NULL
) return(-1);
1080 htmlDocContentDumpOutput(buf
, cur
, NULL
);
1082 ret
= xmlOutputBufferClose(buf
);
1088 * @filename: the filename (or URL)
1089 * @cur: the document
1091 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1093 * returns: the number of byte written or -1 in case of failure.
1096 htmlSaveFile(const char *filename
, xmlDocPtr cur
) {
1097 xmlOutputBufferPtr buf
;
1098 xmlCharEncodingHandlerPtr handler
= NULL
;
1099 const char *encoding
;
1102 if ((cur
== NULL
) || (filename
== NULL
))
1107 encoding
= (const char *) htmlGetMetaEncoding(cur
);
1109 if (encoding
!= NULL
) {
1110 xmlCharEncoding enc
;
1112 enc
= xmlParseCharEncoding(encoding
);
1113 if (enc
!= cur
->charset
) {
1114 if (cur
->charset
!= XML_CHAR_ENCODING_UTF8
) {
1121 handler
= xmlFindCharEncodingHandler(encoding
);
1122 if (handler
== NULL
)
1128 * Fallback to HTML or ASCII when the encoding is unspecified
1130 if (handler
== NULL
)
1131 handler
= xmlFindCharEncodingHandler("HTML");
1132 if (handler
== NULL
)
1133 handler
= xmlFindCharEncodingHandler("ascii");
1136 * save the content to a temp buffer.
1138 buf
= xmlOutputBufferCreateFilename(filename
, handler
, cur
->compression
);
1139 if (buf
== NULL
) return(0);
1141 htmlDocContentDumpOutput(buf
, cur
, NULL
);
1143 ret
= xmlOutputBufferClose(buf
);
1148 * htmlSaveFileFormat:
1149 * @filename: the filename
1150 * @cur: the document
1151 * @format: should formatting spaces been added
1152 * @encoding: the document encoding
1154 * Dump an HTML document to a file using a given encoding.
1156 * returns: the number of byte written or -1 in case of failure.
1159 htmlSaveFileFormat(const char *filename
, xmlDocPtr cur
,
1160 const char *encoding
, int format
) {
1161 xmlOutputBufferPtr buf
;
1162 xmlCharEncodingHandlerPtr handler
= NULL
;
1165 if ((cur
== NULL
) || (filename
== NULL
))
1170 if (encoding
!= NULL
) {
1171 xmlCharEncoding enc
;
1173 enc
= xmlParseCharEncoding(encoding
);
1174 if (enc
!= cur
->charset
) {
1175 if (cur
->charset
!= XML_CHAR_ENCODING_UTF8
) {
1182 handler
= xmlFindCharEncodingHandler(encoding
);
1183 if (handler
== NULL
)
1186 htmlSetMetaEncoding(cur
, (const xmlChar
*) encoding
);
1188 htmlSetMetaEncoding(cur
, (const xmlChar
*) "UTF-8");
1192 * Fallback to HTML or ASCII when the encoding is unspecified
1194 if (handler
== NULL
)
1195 handler
= xmlFindCharEncodingHandler("HTML");
1196 if (handler
== NULL
)
1197 handler
= xmlFindCharEncodingHandler("ascii");
1200 * save the content to a temp buffer.
1202 buf
= xmlOutputBufferCreateFilename(filename
, handler
, 0);
1203 if (buf
== NULL
) return(0);
1205 htmlDocContentDumpFormatOutput(buf
, cur
, encoding
, format
);
1207 ret
= xmlOutputBufferClose(buf
);
1213 * @filename: the filename
1214 * @cur: the document
1215 * @encoding: the document encoding
1217 * Dump an HTML document to a file using a given encoding
1218 * and formatting returns/spaces are added.
1220 * returns: the number of byte written or -1 in case of failure.
1223 htmlSaveFileEnc(const char *filename
, xmlDocPtr cur
, const char *encoding
) {
1224 return(htmlSaveFileFormat(filename
, cur
, encoding
, 1));
1227 #endif /* LIBXML_OUTPUT_ENABLED */
1229 #define bottom_HTMLtree
1230 #include "elfgcchack.h"
1231 #endif /* LIBXML_HTML_ENABLED */