dsrc isn't necessary for this repo
[client-tools.git] / src / external / 3rd / library / libxml / testHTML.c
blobdbfccc39997f874cbeff090456aa9bdedda74cba
1 /*
2 * testHTML.c : a small tester program for HTML input.
4 * See Copyright for the status of this software.
6 * daniel@veillard.com
7 */
9 #include "libxml.h"
11 #ifdef LIBXML_HTML_ENABLED
13 #include <string.h>
14 #include <stdarg.h>
17 #ifdef HAVE_SYS_TYPES_H
18 #include <sys/types.h>
19 #endif
20 #ifdef HAVE_SYS_STAT_H
21 #include <sys/stat.h>
22 #endif
23 #ifdef HAVE_FCNTL_H
24 #include <fcntl.h>
25 #endif
26 #ifdef HAVE_UNISTD_H
27 #include <unistd.h>
28 #endif
29 #ifdef HAVE_STDLIB_H
30 #include <stdlib.h>
31 #endif
33 #include <libxml/xmlmemory.h>
34 #include <libxml/HTMLparser.h>
35 #include <libxml/HTMLtree.h>
36 #include <libxml/debugXML.h>
37 #include <libxml/xmlerror.h>
38 #include <libxml/globals.h>
40 #ifdef LIBXML_DEBUG_ENABLED
41 static int debug = 0;
42 #endif
43 static int copy = 0;
44 static int sax = 0;
45 static int repeat = 0;
46 static int noout = 0;
47 static int push = 0;
48 static char *encoding = NULL;
50 xmlSAXHandler emptySAXHandlerStruct = {
51 NULL, /* internalSubset */
52 NULL, /* isStandalone */
53 NULL, /* hasInternalSubset */
54 NULL, /* hasExternalSubset */
55 NULL, /* resolveEntity */
56 NULL, /* getEntity */
57 NULL, /* entityDecl */
58 NULL, /* notationDecl */
59 NULL, /* attributeDecl */
60 NULL, /* elementDecl */
61 NULL, /* unparsedEntityDecl */
62 NULL, /* setDocumentLocator */
63 NULL, /* startDocument */
64 NULL, /* endDocument */
65 NULL, /* startElement */
66 NULL, /* endElement */
67 NULL, /* reference */
68 NULL, /* characters */
69 NULL, /* ignorableWhitespace */
70 NULL, /* processingInstruction */
71 NULL, /* comment */
72 NULL, /* xmlParserWarning */
73 NULL, /* xmlParserError */
74 NULL, /* xmlParserError */
75 NULL, /* getParameterEntity */
76 NULL, /* cdataBlock */
77 NULL, /* externalSubset */
81 xmlSAXHandlerPtr emptySAXHandler = &emptySAXHandlerStruct;
82 extern xmlSAXHandlerPtr debugSAXHandler;
84 /************************************************************************
85 * *
86 * Debug Handlers *
87 * *
88 ************************************************************************/
90 /**
91 * isStandaloneDebug:
92 * @ctxt: An XML parser context
94 * Is this document tagged standalone ?
96 * Returns 1 if true
98 static int
99 isStandaloneDebug(void *ctx ATTRIBUTE_UNUSED)
101 fprintf(stdout, "SAX.isStandalone()\n");
102 return(0);
106 * hasInternalSubsetDebug:
107 * @ctxt: An XML parser context
109 * Does this document has an internal subset
111 * Returns 1 if true
113 static int
114 hasInternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED)
116 fprintf(stdout, "SAX.hasInternalSubset()\n");
117 return(0);
121 * hasExternalSubsetDebug:
122 * @ctxt: An XML parser context
124 * Does this document has an external subset
126 * Returns 1 if true
128 static int
129 hasExternalSubsetDebug(void *ctx ATTRIBUTE_UNUSED)
131 fprintf(stdout, "SAX.hasExternalSubset()\n");
132 return(0);
136 * hasInternalSubsetDebug:
137 * @ctxt: An XML parser context
139 * Does this document has an internal subset
141 static void
142 internalSubsetDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
143 const xmlChar *ExternalID, const xmlChar *SystemID)
145 fprintf(stdout, "SAX.internalSubset(%s,", name);
146 if (ExternalID == NULL)
147 fprintf(stdout, " ,");
148 else
149 fprintf(stdout, " %s,", ExternalID);
150 if (SystemID == NULL)
151 fprintf(stdout, " )\n");
152 else
153 fprintf(stdout, " %s)\n", SystemID);
157 * resolveEntityDebug:
158 * @ctxt: An XML parser context
159 * @publicId: The public ID of the entity
160 * @systemId: The system ID of the entity
162 * Special entity resolver, better left to the parser, it has
163 * more context than the application layer.
164 * The default behaviour is to NOT resolve the entities, in that case
165 * the ENTITY_REF nodes are built in the structure (and the parameter
166 * values).
168 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
170 static xmlParserInputPtr
171 resolveEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *publicId, const xmlChar *systemId)
173 /* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */
176 fprintf(stdout, "SAX.resolveEntity(");
177 if (publicId != NULL)
178 fprintf(stdout, "%s", (char *)publicId);
179 else
180 fprintf(stdout, " ");
181 if (systemId != NULL)
182 fprintf(stdout, ", %s)\n", (char *)systemId);
183 else
184 fprintf(stdout, ", )\n");
185 /*********
186 if (systemId != NULL) {
187 return(xmlNewInputFromFile(ctxt, (char *) systemId));
189 *********/
190 return(NULL);
194 * getEntityDebug:
195 * @ctxt: An XML parser context
196 * @name: The entity name
198 * Get an entity by name
200 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
202 static xmlEntityPtr
203 getEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
205 fprintf(stdout, "SAX.getEntity(%s)\n", name);
206 return(NULL);
210 * getParameterEntityDebug:
211 * @ctxt: An XML parser context
212 * @name: The entity name
214 * Get a parameter entity by name
216 * Returns the xmlParserInputPtr
218 static xmlEntityPtr
219 getParameterEntityDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
221 fprintf(stdout, "SAX.getParameterEntity(%s)\n", name);
222 return(NULL);
227 * entityDeclDebug:
228 * @ctxt: An XML parser context
229 * @name: the entity name
230 * @type: the entity type
231 * @publicId: The public ID of the entity
232 * @systemId: The system ID of the entity
233 * @content: the entity value (without processing).
235 * An entity definition has been parsed
237 static void
238 entityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type,
239 const xmlChar *publicId, const xmlChar *systemId, xmlChar *content)
241 fprintf(stdout, "SAX.entityDecl(%s, %d, %s, %s, %s)\n",
242 name, type, publicId, systemId, content);
246 * attributeDeclDebug:
247 * @ctxt: An XML parser context
248 * @name: the attribute name
249 * @type: the attribute type
251 * An attribute definition has been parsed
253 static void
254 attributeDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *elem, const xmlChar *name,
255 int type, int def, const xmlChar *defaultValue,
256 xmlEnumerationPtr tree ATTRIBUTE_UNUSED)
258 fprintf(stdout, "SAX.attributeDecl(%s, %s, %d, %d, %s, ...)\n",
259 elem, name, type, def, defaultValue);
263 * elementDeclDebug:
264 * @ctxt: An XML parser context
265 * @name: the element name
266 * @type: the element type
267 * @content: the element value (without processing).
269 * An element definition has been parsed
271 static void
272 elementDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, int type,
273 xmlElementContentPtr content ATTRIBUTE_UNUSED)
275 fprintf(stdout, "SAX.elementDecl(%s, %d, ...)\n",
276 name, type);
280 * notationDeclDebug:
281 * @ctxt: An XML parser context
282 * @name: The name of the notation
283 * @publicId: The public ID of the entity
284 * @systemId: The system ID of the entity
286 * What to do when a notation declaration has been parsed.
288 static void
289 notationDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
290 const xmlChar *publicId, const xmlChar *systemId)
292 fprintf(stdout, "SAX.notationDecl(%s, %s, %s)\n",
293 (char *) name, (char *) publicId, (char *) systemId);
297 * unparsedEntityDeclDebug:
298 * @ctxt: An XML parser context
299 * @name: The name of the entity
300 * @publicId: The public ID of the entity
301 * @systemId: The system ID of the entity
302 * @notationName: the name of the notation
304 * What to do when an unparsed entity declaration is parsed
306 static void
307 unparsedEntityDeclDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name,
308 const xmlChar *publicId, const xmlChar *systemId,
309 const xmlChar *notationName)
311 fprintf(stdout, "SAX.unparsedEntityDecl(%s, %s, %s, %s)\n",
312 (char *) name, (char *) publicId, (char *) systemId,
313 (char *) notationName);
317 * setDocumentLocatorDebug:
318 * @ctxt: An XML parser context
319 * @loc: A SAX Locator
321 * Receive the document locator at startup, actually xmlDefaultSAXLocator
322 * Everything is available on the context, so this is useless in our case.
324 static void
325 setDocumentLocatorDebug(void *ctx ATTRIBUTE_UNUSED, xmlSAXLocatorPtr loc ATTRIBUTE_UNUSED)
327 fprintf(stdout, "SAX.setDocumentLocator()\n");
331 * startDocumentDebug:
332 * @ctxt: An XML parser context
334 * called when the document start being processed.
336 static void
337 startDocumentDebug(void *ctx ATTRIBUTE_UNUSED)
339 fprintf(stdout, "SAX.startDocument()\n");
343 * endDocumentDebug:
344 * @ctxt: An XML parser context
346 * called when the document end has been detected.
348 static void
349 endDocumentDebug(void *ctx ATTRIBUTE_UNUSED)
351 fprintf(stdout, "SAX.endDocument()\n");
355 * startElementDebug:
356 * @ctxt: An XML parser context
357 * @name: The element name
359 * called when an opening tag has been processed.
361 static void
362 startElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name, const xmlChar **atts)
364 int i;
366 fprintf(stdout, "SAX.startElement(%s", (char *) name);
367 if (atts != NULL) {
368 for (i = 0;(atts[i] != NULL);i++) {
369 fprintf(stdout, ", %s", atts[i++]);
370 if (atts[i] != NULL) {
371 unsigned char output[40];
372 const unsigned char *att = atts[i];
373 int outlen, attlen;
374 fprintf(stdout, "='");
375 while ((attlen = strlen((char*)att)) > 0) {
376 outlen = sizeof output - 1;
377 htmlEncodeEntities(output, &outlen, att, &attlen, '\'');
378 fprintf(stdout, "%.*s", outlen, output);
379 att += attlen;
381 fprintf(stdout, "'");
385 fprintf(stdout, ")\n");
389 * endElementDebug:
390 * @ctxt: An XML parser context
391 * @name: The element name
393 * called when the end of an element has been detected.
395 static void
396 endElementDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
398 fprintf(stdout, "SAX.endElement(%s)\n", (char *) name);
402 * charactersDebug:
403 * @ctxt: An XML parser context
404 * @ch: a xmlChar string
405 * @len: the number of xmlChar
407 * receiving some chars from the parser.
408 * Question: how much at a time ???
410 static void
411 charactersDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
413 unsigned char output[40];
414 int inlen = len, outlen = 30;
416 htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
417 output[outlen] = 0;
419 fprintf(stdout, "SAX.characters(%s, %d)\n", output, len);
423 * cdataDebug:
424 * @ctxt: An XML parser context
425 * @ch: a xmlChar string
426 * @len: the number of xmlChar
428 * receiving some cdata chars from the parser.
429 * Question: how much at a time ???
431 static void
432 cdataDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
434 unsigned char output[40];
435 int inlen = len, outlen = 30;
437 htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
438 output[outlen] = 0;
440 fprintf(stdout, "SAX.cdata(%s, %d)\n", output, len);
444 * referenceDebug:
445 * @ctxt: An XML parser context
446 * @name: The entity name
448 * called when an entity reference is detected.
450 static void
451 referenceDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *name)
453 fprintf(stdout, "SAX.reference(%s)\n", name);
457 * ignorableWhitespaceDebug:
458 * @ctxt: An XML parser context
459 * @ch: a xmlChar string
460 * @start: the first char in the string
461 * @len: the number of xmlChar
463 * receiving some ignorable whitespaces from the parser.
464 * Question: how much at a time ???
466 static void
467 ignorableWhitespaceDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *ch, int len)
469 char output[40];
470 int i;
472 for (i = 0;(i<len) && (i < 30);i++)
473 output[i] = ch[i];
474 output[i] = 0;
476 fprintf(stdout, "SAX.ignorableWhitespace(%s, %d)\n", output, len);
480 * processingInstructionDebug:
481 * @ctxt: An XML parser context
482 * @target: the target name
483 * @data: the PI data's
484 * @len: the number of xmlChar
486 * A processing instruction has been parsed.
488 static void
489 processingInstructionDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *target,
490 const xmlChar *data)
492 fprintf(stdout, "SAX.processingInstruction(%s, %s)\n",
493 (char *) target, (char *) data);
497 * commentDebug:
498 * @ctxt: An XML parser context
499 * @value: the comment content
501 * A comment has been parsed.
503 static void
504 commentDebug(void *ctx ATTRIBUTE_UNUSED, const xmlChar *value)
506 fprintf(stdout, "SAX.comment(%s)\n", value);
510 * warningDebug:
511 * @ctxt: An XML parser context
512 * @msg: the message to display/transmit
513 * @...: extra parameters for the message display
515 * Display and format a warning messages, gives file, line, position and
516 * extra parameters.
518 static void
519 warningDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
521 va_list args;
523 va_start(args, msg);
524 fprintf(stdout, "SAX.warning: ");
525 vfprintf(stdout, msg, args);
526 va_end(args);
530 * errorDebug:
531 * @ctxt: An XML parser context
532 * @msg: the message to display/transmit
533 * @...: extra parameters for the message display
535 * Display and format a error messages, gives file, line, position and
536 * extra parameters.
538 static void
539 errorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
541 va_list args;
543 va_start(args, msg);
544 fprintf(stdout, "SAX.error: ");
545 vfprintf(stdout, msg, args);
546 va_end(args);
550 * fatalErrorDebug:
551 * @ctxt: An XML parser context
552 * @msg: the message to display/transmit
553 * @...: extra parameters for the message display
555 * Display and format a fatalError messages, gives file, line, position and
556 * extra parameters.
558 static void
559 fatalErrorDebug(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...)
561 va_list args;
563 va_start(args, msg);
564 fprintf(stdout, "SAX.fatalError: ");
565 vfprintf(stdout, msg, args);
566 va_end(args);
569 xmlSAXHandler debugSAXHandlerStruct = {
570 internalSubsetDebug,
571 isStandaloneDebug,
572 hasInternalSubsetDebug,
573 hasExternalSubsetDebug,
574 resolveEntityDebug,
575 getEntityDebug,
576 entityDeclDebug,
577 notationDeclDebug,
578 attributeDeclDebug,
579 elementDeclDebug,
580 unparsedEntityDeclDebug,
581 setDocumentLocatorDebug,
582 startDocumentDebug,
583 endDocumentDebug,
584 startElementDebug,
585 endElementDebug,
586 referenceDebug,
587 charactersDebug,
588 ignorableWhitespaceDebug,
589 processingInstructionDebug,
590 commentDebug,
591 warningDebug,
592 errorDebug,
593 fatalErrorDebug,
594 getParameterEntityDebug,
595 cdataDebug,
596 NULL,
600 xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct;
601 /************************************************************************
603 * Debug *
605 ************************************************************************/
607 static void
608 parseSAXFile(char *filename) {
609 htmlDocPtr doc = NULL;
612 * Empty callbacks for checking
614 if (push) {
615 FILE *f;
617 f = fopen(filename, "r");
618 if (f != NULL) {
619 int res, size = 3;
620 char chars[4096];
621 htmlParserCtxtPtr ctxt;
623 /* if (repeat) */
624 size = 4096;
625 res = fread(chars, 1, 4, f);
626 if (res > 0) {
627 ctxt = htmlCreatePushParserCtxt(emptySAXHandler, NULL,
628 chars, res, filename, 0);
629 while ((res = fread(chars, 1, size, f)) > 0) {
630 htmlParseChunk(ctxt, chars, res, 0);
632 htmlParseChunk(ctxt, chars, 0, 1);
633 doc = ctxt->myDoc;
634 htmlFreeParserCtxt(ctxt);
636 if (doc != NULL) {
637 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
638 xmlFreeDoc(doc);
640 fclose(f);
642 if (!noout) {
643 f = fopen(filename, "r");
644 if (f != NULL) {
645 int res, size = 3;
646 char chars[4096];
647 htmlParserCtxtPtr ctxt;
649 /* if (repeat) */
650 size = 4096;
651 res = fread(chars, 1, 4, f);
652 if (res > 0) {
653 ctxt = htmlCreatePushParserCtxt(debugSAXHandler, NULL,
654 chars, res, filename, 0);
655 while ((res = fread(chars, 1, size, f)) > 0) {
656 htmlParseChunk(ctxt, chars, res, 0);
658 htmlParseChunk(ctxt, chars, 0, 1);
659 doc = ctxt->myDoc;
660 htmlFreeParserCtxt(ctxt);
662 if (doc != NULL) {
663 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
664 xmlFreeDoc(doc);
666 fclose(f);
669 } else {
670 doc = htmlSAXParseFile(filename, NULL, emptySAXHandler, NULL);
671 if (doc != NULL) {
672 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
673 xmlFreeDoc(doc);
676 if (!noout) {
678 * Debug callback
680 doc = htmlSAXParseFile(filename, NULL, debugSAXHandler, NULL);
681 if (doc != NULL) {
682 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
683 xmlFreeDoc(doc);
689 static void
690 parseAndPrintFile(char *filename) {
691 htmlDocPtr doc = NULL, tmp;
694 * build an HTML tree from a string;
696 if (push) {
697 FILE *f;
699 f = fopen(filename, "r");
700 if (f != NULL) {
701 int res, size = 3;
702 char chars[4096];
703 htmlParserCtxtPtr ctxt;
705 /* if (repeat) */
706 size = 4096;
707 res = fread(chars, 1, 4, f);
708 if (res > 0) {
709 ctxt = htmlCreatePushParserCtxt(NULL, NULL,
710 chars, res, filename, 0);
711 while ((res = fread(chars, 1, size, f)) > 0) {
712 htmlParseChunk(ctxt, chars, res, 0);
714 htmlParseChunk(ctxt, chars, 0, 1);
715 doc = ctxt->myDoc;
716 htmlFreeParserCtxt(ctxt);
718 fclose(f);
720 } else {
721 doc = htmlParseFile(filename, NULL);
723 if (doc == NULL) {
724 xmlGenericError(xmlGenericErrorContext,
725 "Could not parse %s\n", filename);
729 * test intermediate copy if needed.
731 if (copy) {
732 tmp = doc;
733 doc = xmlCopyDoc(doc, 1);
734 xmlFreeDoc(tmp);
738 * print it.
740 if (!noout) {
741 #ifdef LIBXML_DEBUG_ENABLED
742 if (!debug) {
743 if (encoding)
744 htmlSaveFileEnc("-", doc, encoding);
745 else
746 htmlDocDump(stdout, doc);
747 } else
748 xmlDebugDumpDocument(stdout, doc);
749 #else
750 if (encoding)
751 htmlSaveFileEnc("-", doc, encoding);
752 else
753 htmlDocDump(stdout, doc);
754 #endif
758 * free it.
760 xmlFreeDoc(doc);
763 int main(int argc, char **argv) {
764 int i, count;
765 int files = 0;
767 for (i = 1; i < argc ; i++) {
768 #ifdef LIBXML_DEBUG_ENABLED
769 if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug")))
770 debug++;
771 else
772 #endif
773 if ((!strcmp(argv[i], "-copy")) || (!strcmp(argv[i], "--copy")))
774 copy++;
775 else if ((!strcmp(argv[i], "-push")) || (!strcmp(argv[i], "--push")))
776 push++;
777 else if ((!strcmp(argv[i], "-sax")) || (!strcmp(argv[i], "--sax")))
778 sax++;
779 else if ((!strcmp(argv[i], "-noout")) || (!strcmp(argv[i], "--noout")))
780 noout++;
781 else if ((!strcmp(argv[i], "-repeat")) ||
782 (!strcmp(argv[i], "--repeat")))
783 repeat++;
784 else if ((!strcmp(argv[i], "-encode")) ||
785 (!strcmp(argv[i], "--encode"))) {
786 i++;
787 encoding = argv[i];
790 for (i = 1; i < argc ; i++) {
791 if ((!strcmp(argv[i], "-encode")) ||
792 (!strcmp(argv[i], "--encode"))) {
793 i++;
794 continue;
796 if (argv[i][0] != '-') {
797 if (repeat) {
798 for (count = 0;count < 100 * repeat;count++) {
799 if (sax)
800 parseSAXFile(argv[i]);
801 else
802 parseAndPrintFile(argv[i]);
804 } else {
805 if (sax)
806 parseSAXFile(argv[i]);
807 else
808 parseAndPrintFile(argv[i]);
810 files ++;
813 if (files == 0) {
814 printf("Usage : %s [--debug] [--copy] [--copy] HTMLfiles ...\n",
815 argv[0]);
816 printf("\tParse the HTML files and output the result of the parsing\n");
817 #ifdef LIBXML_DEBUG_ENABLED
818 printf("\t--debug : dump a debug tree of the in-memory document\n");
819 #endif
820 printf("\t--copy : used to test the internal copy implementation\n");
821 printf("\t--sax : debug the sequence of SAX callbacks\n");
822 printf("\t--repeat : parse the file 100 times, for timing\n");
823 printf("\t--noout : do not print the result\n");
824 printf("\t--push : use the push mode parser\n");
825 printf("\t--encode encoding : output in the given encoding\n");
827 xmlCleanupParser();
828 xmlMemoryDump();
830 return(0);
832 #else /* !LIBXML_HTML_ENABLED */
833 #include <stdio.h>
834 int main(int argc, char **argv) {
835 printf("%s : HTML support not compiled in\n", argv[0]);
836 return(0);
838 #endif