Harmonize more parameter names in bulk.
[pgsql.git] / src / backend / utils / adt / xml.c
blobd32cb11436e7e70e1463491abe3593c04cf61631
1 /*-------------------------------------------------------------------------
3 * xml.c
4 * XML data type support.
7 * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
10 * src/backend/utils/adt/xml.c
12 *-------------------------------------------------------------------------
16 * Generally, XML type support is only available when libxml use was
17 * configured during the build. But even if that is not done, the
18 * type and all the functions are available, but most of them will
19 * fail. For one thing, this avoids having to manage variant catalog
20 * installations. But it also has nice effects such as that you can
21 * dump a database containing XML type data even if the server is not
22 * linked with libxml. Thus, make sure xml_out() works even if nothing
23 * else does.
27 * Notes on memory management:
29 * Sometimes libxml allocates global structures in the hope that it can reuse
30 * them later on. This makes it impractical to change the xmlMemSetup
31 * functions on-the-fly; that is likely to lead to trying to pfree() chunks
32 * allocated with malloc() or vice versa. Since libxml might be used by
33 * loadable modules, eg libperl, our only safe choices are to change the
34 * functions at postmaster/backend launch or not at all. Since we'd rather
35 * not activate libxml in sessions that might never use it, the latter choice
36 * is the preferred one. However, for debugging purposes it can be awfully
37 * handy to constrain libxml's allocations to be done in a specific palloc
38 * context, where they're easy to track. Therefore there is code here that
39 * can be enabled in debug builds to redirect libxml's allocations into a
40 * special context LibxmlContext. It's not recommended to turn this on in
41 * a production build because of the possibility of bad interactions with
42 * external modules.
44 /* #define USE_LIBXMLCONTEXT */
46 #include "postgres.h"
48 #ifdef USE_LIBXML
49 #include <libxml/chvalid.h>
50 #include <libxml/parser.h>
51 #include <libxml/parserInternals.h>
52 #include <libxml/tree.h>
53 #include <libxml/uri.h>
54 #include <libxml/xmlerror.h>
55 #include <libxml/xmlversion.h>
56 #include <libxml/xmlwriter.h>
57 #include <libxml/xpath.h>
58 #include <libxml/xpathInternals.h>
61 * We used to check for xmlStructuredErrorContext via a configure test; but
62 * that doesn't work on Windows, so instead use this grottier method of
63 * testing the library version number.
65 #if LIBXML_VERSION >= 20704
66 #define HAVE_XMLSTRUCTUREDERRORCONTEXT 1
67 #endif
68 #endif /* USE_LIBXML */
70 #include "access/htup_details.h"
71 #include "access/table.h"
72 #include "catalog/namespace.h"
73 #include "catalog/pg_class.h"
74 #include "catalog/pg_type.h"
75 #include "commands/dbcommands.h"
76 #include "executor/spi.h"
77 #include "executor/tablefunc.h"
78 #include "fmgr.h"
79 #include "lib/stringinfo.h"
80 #include "libpq/pqformat.h"
81 #include "mb/pg_wchar.h"
82 #include "miscadmin.h"
83 #include "nodes/execnodes.h"
84 #include "nodes/nodeFuncs.h"
85 #include "utils/array.h"
86 #include "utils/builtins.h"
87 #include "utils/date.h"
88 #include "utils/datetime.h"
89 #include "utils/lsyscache.h"
90 #include "utils/memutils.h"
91 #include "utils/rel.h"
92 #include "utils/syscache.h"
93 #include "utils/xml.h"
96 /* GUC variables */
97 int xmlbinary;
98 int xmloption;
100 #ifdef USE_LIBXML
102 /* random number to identify PgXmlErrorContext */
103 #define ERRCXT_MAGIC 68275028
105 struct PgXmlErrorContext
107 int magic;
108 /* strictness argument passed to pg_xml_init */
109 PgXmlStrictness strictness;
110 /* current error status and accumulated message, if any */
111 bool err_occurred;
112 StringInfoData err_buf;
113 /* previous libxml error handling state (saved by pg_xml_init) */
114 xmlStructuredErrorFunc saved_errfunc;
115 void *saved_errcxt;
116 /* previous libxml entity handler (saved by pg_xml_init) */
117 xmlExternalEntityLoader saved_entityfunc;
120 static xmlParserInputPtr xmlPgEntityLoader(const char *URL, const char *ID,
121 xmlParserCtxtPtr ctxt);
122 static void xml_errorHandler(void *data, xmlErrorPtr error);
123 static void xml_ereport_by_code(int level, int sqlcode,
124 const char *msg, int code);
125 static void chopStringInfoNewlines(StringInfo str);
126 static void appendStringInfoLineSeparator(StringInfo str);
128 #ifdef USE_LIBXMLCONTEXT
130 static MemoryContext LibxmlContext = NULL;
132 static void xml_memory_init(void);
133 static void *xml_palloc(size_t size);
134 static void *xml_repalloc(void *ptr, size_t size);
135 static void xml_pfree(void *ptr);
136 static char *xml_pstrdup(const char *string);
137 #endif /* USE_LIBXMLCONTEXT */
139 static xmlChar *xml_text2xmlChar(text *in);
140 static int parse_xml_decl(const xmlChar *str, size_t *lenp,
141 xmlChar **version, xmlChar **encoding, int *standalone);
142 static bool print_xml_decl(StringInfo buf, const xmlChar *version,
143 pg_enc encoding, int standalone);
144 static bool xml_doctype_in_content(const xmlChar *str);
145 static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
146 bool preserve_whitespace, int encoding);
147 static text *xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt);
148 static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
149 ArrayBuildState *astate,
150 PgXmlErrorContext *xmlerrcxt);
151 static xmlChar *pg_xmlCharStrndup(const char *str, size_t len);
152 #endif /* USE_LIBXML */
154 static void xmldata_root_element_start(StringInfo result, const char *eltname,
155 const char *xmlschema, const char *targetns,
156 bool top_level);
157 static void xmldata_root_element_end(StringInfo result, const char *eltname);
158 static StringInfo query_to_xml_internal(const char *query, char *tablename,
159 const char *xmlschema, bool nulls, bool tableforest,
160 const char *targetns, bool top_level);
161 static const char *map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid,
162 bool nulls, bool tableforest, const char *targetns);
163 static const char *map_sql_schema_to_xmlschema_types(Oid nspid,
164 List *relid_list, bool nulls,
165 bool tableforest, const char *targetns);
166 static const char *map_sql_catalog_to_xmlschema_types(List *nspid_list,
167 bool nulls, bool tableforest,
168 const char *targetns);
169 static const char *map_sql_type_to_xml_name(Oid typeoid, int typmod);
170 static const char *map_sql_typecoll_to_xmlschema_types(List *tupdesc_list);
171 static const char *map_sql_type_to_xmlschema_type(Oid typeoid, int typmod);
172 static void SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result,
173 char *tablename, bool nulls, bool tableforest,
174 const char *targetns, bool top_level);
176 /* XMLTABLE support */
177 #ifdef USE_LIBXML
178 /* random number to identify XmlTableContext */
179 #define XMLTABLE_CONTEXT_MAGIC 46922182
180 typedef struct XmlTableBuilderData
182 int magic;
183 int natts;
184 long int row_count;
185 PgXmlErrorContext *xmlerrcxt;
186 xmlParserCtxtPtr ctxt;
187 xmlDocPtr doc;
188 xmlXPathContextPtr xpathcxt;
189 xmlXPathCompExprPtr xpathcomp;
190 xmlXPathObjectPtr xpathobj;
191 xmlXPathCompExprPtr *xpathscomp;
192 } XmlTableBuilderData;
193 #endif
195 static void XmlTableInitOpaque(struct TableFuncScanState *state, int natts);
196 static void XmlTableSetDocument(struct TableFuncScanState *state, Datum value);
197 static void XmlTableSetNamespace(struct TableFuncScanState *state, const char *name,
198 const char *uri);
199 static void XmlTableSetRowFilter(struct TableFuncScanState *state, const char *path);
200 static void XmlTableSetColumnFilter(struct TableFuncScanState *state,
201 const char *path, int colnum);
202 static bool XmlTableFetchRow(struct TableFuncScanState *state);
203 static Datum XmlTableGetValue(struct TableFuncScanState *state, int colnum,
204 Oid typid, int32 typmod, bool *isnull);
205 static void XmlTableDestroyOpaque(struct TableFuncScanState *state);
207 const TableFuncRoutine XmlTableRoutine =
209 XmlTableInitOpaque,
210 XmlTableSetDocument,
211 XmlTableSetNamespace,
212 XmlTableSetRowFilter,
213 XmlTableSetColumnFilter,
214 XmlTableFetchRow,
215 XmlTableGetValue,
216 XmlTableDestroyOpaque
219 #define NO_XML_SUPPORT() \
220 ereport(ERROR, \
221 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
222 errmsg("unsupported XML feature"), \
223 errdetail("This functionality requires the server to be built with libxml support.")))
226 /* from SQL/XML:2008 section 4.9 */
227 #define NAMESPACE_XSD "http://www.w3.org/2001/XMLSchema"
228 #define NAMESPACE_XSI "http://www.w3.org/2001/XMLSchema-instance"
229 #define NAMESPACE_SQLXML "http://standards.iso.org/iso/9075/2003/sqlxml"
232 #ifdef USE_LIBXML
234 static int
235 xmlChar_to_encoding(const xmlChar *encoding_name)
237 int encoding = pg_char_to_encoding((const char *) encoding_name);
239 if (encoding < 0)
240 ereport(ERROR,
241 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
242 errmsg("invalid encoding name \"%s\"",
243 (const char *) encoding_name)));
244 return encoding;
246 #endif
250 * xml_in uses a plain C string to VARDATA conversion, so for the time being
251 * we use the conversion function for the text datatype.
253 * This is only acceptable so long as xmltype and text use the same
254 * representation.
256 Datum
257 xml_in(PG_FUNCTION_ARGS)
259 #ifdef USE_LIBXML
260 char *s = PG_GETARG_CSTRING(0);
261 xmltype *vardata;
262 xmlDocPtr doc;
264 vardata = (xmltype *) cstring_to_text(s);
267 * Parse the data to check if it is well-formed XML data. Assume that
268 * ERROR occurred if parsing failed.
270 doc = xml_parse(vardata, xmloption, true, GetDatabaseEncoding());
271 xmlFreeDoc(doc);
273 PG_RETURN_XML_P(vardata);
274 #else
275 NO_XML_SUPPORT();
276 return 0;
277 #endif
281 #define PG_XML_DEFAULT_VERSION "1.0"
285 * xml_out_internal uses a plain VARDATA to C string conversion, so for the
286 * time being we use the conversion function for the text datatype.
288 * This is only acceptable so long as xmltype and text use the same
289 * representation.
291 static char *
292 xml_out_internal(xmltype *x, pg_enc target_encoding)
294 char *str = text_to_cstring((text *) x);
296 #ifdef USE_LIBXML
297 size_t len = strlen(str);
298 xmlChar *version;
299 int standalone;
300 int res_code;
302 if ((res_code = parse_xml_decl((xmlChar *) str,
303 &len, &version, NULL, &standalone)) == 0)
305 StringInfoData buf;
307 initStringInfo(&buf);
309 if (!print_xml_decl(&buf, version, target_encoding, standalone))
312 * If we are not going to produce an XML declaration, eat a single
313 * newline in the original string to prevent empty first lines in
314 * the output.
316 if (*(str + len) == '\n')
317 len += 1;
319 appendStringInfoString(&buf, str + len);
321 pfree(str);
323 return buf.data;
326 xml_ereport_by_code(WARNING, ERRCODE_INTERNAL_ERROR,
327 "could not parse XML declaration in stored value",
328 res_code);
329 #endif
330 return str;
334 Datum
335 xml_out(PG_FUNCTION_ARGS)
337 xmltype *x = PG_GETARG_XML_P(0);
340 * xml_out removes the encoding property in all cases. This is because we
341 * cannot control from here whether the datum will be converted to a
342 * different client encoding, so we'd do more harm than good by including
343 * it.
345 PG_RETURN_CSTRING(xml_out_internal(x, 0));
349 Datum
350 xml_recv(PG_FUNCTION_ARGS)
352 #ifdef USE_LIBXML
353 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
354 xmltype *result;
355 char *str;
356 char *newstr;
357 int nbytes;
358 xmlDocPtr doc;
359 xmlChar *encodingStr = NULL;
360 int encoding;
363 * Read the data in raw format. We don't know yet what the encoding is, as
364 * that information is embedded in the xml declaration; so we have to
365 * parse that before converting to server encoding.
367 nbytes = buf->len - buf->cursor;
368 str = (char *) pq_getmsgbytes(buf, nbytes);
371 * We need a null-terminated string to pass to parse_xml_decl(). Rather
372 * than make a separate copy, make the temporary result one byte bigger
373 * than it needs to be.
375 result = palloc(nbytes + 1 + VARHDRSZ);
376 SET_VARSIZE(result, nbytes + VARHDRSZ);
377 memcpy(VARDATA(result), str, nbytes);
378 str = VARDATA(result);
379 str[nbytes] = '\0';
381 parse_xml_decl((const xmlChar *) str, NULL, NULL, &encodingStr, NULL);
384 * If encoding wasn't explicitly specified in the XML header, treat it as
385 * UTF-8, as that's the default in XML. This is different from xml_in(),
386 * where the input has to go through the normal client to server encoding
387 * conversion.
389 encoding = encodingStr ? xmlChar_to_encoding(encodingStr) : PG_UTF8;
392 * Parse the data to check if it is well-formed XML data. Assume that
393 * xml_parse will throw ERROR if not.
395 doc = xml_parse(result, xmloption, true, encoding);
396 xmlFreeDoc(doc);
398 /* Now that we know what we're dealing with, convert to server encoding */
399 newstr = pg_any_to_server(str, nbytes, encoding);
401 if (newstr != str)
403 pfree(result);
404 result = (xmltype *) cstring_to_text(newstr);
405 pfree(newstr);
408 PG_RETURN_XML_P(result);
409 #else
410 NO_XML_SUPPORT();
411 return 0;
412 #endif
416 Datum
417 xml_send(PG_FUNCTION_ARGS)
419 xmltype *x = PG_GETARG_XML_P(0);
420 char *outval;
421 StringInfoData buf;
424 * xml_out_internal doesn't convert the encoding, it just prints the right
425 * declaration. pq_sendtext will do the conversion.
427 outval = xml_out_internal(x, pg_get_client_encoding());
429 pq_begintypsend(&buf);
430 pq_sendtext(&buf, outval, strlen(outval));
431 pfree(outval);
432 PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
436 #ifdef USE_LIBXML
437 static void
438 appendStringInfoText(StringInfo str, const text *t)
440 appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
442 #endif
445 static xmltype *
446 stringinfo_to_xmltype(StringInfo buf)
448 return (xmltype *) cstring_to_text_with_len(buf->data, buf->len);
452 static xmltype *
453 cstring_to_xmltype(const char *string)
455 return (xmltype *) cstring_to_text(string);
459 #ifdef USE_LIBXML
460 static xmltype *
461 xmlBuffer_to_xmltype(xmlBufferPtr buf)
463 return (xmltype *) cstring_to_text_with_len((const char *) xmlBufferContent(buf),
464 xmlBufferLength(buf));
466 #endif
469 Datum
470 xmlcomment(PG_FUNCTION_ARGS)
472 #ifdef USE_LIBXML
473 text *arg = PG_GETARG_TEXT_PP(0);
474 char *argdata = VARDATA_ANY(arg);
475 int len = VARSIZE_ANY_EXHDR(arg);
476 StringInfoData buf;
477 int i;
479 /* check for "--" in string or "-" at the end */
480 for (i = 1; i < len; i++)
482 if (argdata[i] == '-' && argdata[i - 1] == '-')
483 ereport(ERROR,
484 (errcode(ERRCODE_INVALID_XML_COMMENT),
485 errmsg("invalid XML comment")));
487 if (len > 0 && argdata[len - 1] == '-')
488 ereport(ERROR,
489 (errcode(ERRCODE_INVALID_XML_COMMENT),
490 errmsg("invalid XML comment")));
492 initStringInfo(&buf);
493 appendStringInfoString(&buf, "<!--");
494 appendStringInfoText(&buf, arg);
495 appendStringInfoString(&buf, "-->");
497 PG_RETURN_XML_P(stringinfo_to_xmltype(&buf));
498 #else
499 NO_XML_SUPPORT();
500 return 0;
501 #endif
507 * TODO: xmlconcat needs to merge the notations and unparsed entities
508 * of the argument values. Not very important in practice, though.
510 xmltype *
511 xmlconcat(List *args)
513 #ifdef USE_LIBXML
514 int global_standalone = 1;
515 xmlChar *global_version = NULL;
516 bool global_version_no_value = false;
517 StringInfoData buf;
518 ListCell *v;
520 initStringInfo(&buf);
521 foreach(v, args)
523 xmltype *x = DatumGetXmlP(PointerGetDatum(lfirst(v)));
524 size_t len;
525 xmlChar *version;
526 int standalone;
527 char *str;
529 len = VARSIZE(x) - VARHDRSZ;
530 str = text_to_cstring((text *) x);
532 parse_xml_decl((xmlChar *) str, &len, &version, NULL, &standalone);
534 if (standalone == 0 && global_standalone == 1)
535 global_standalone = 0;
536 if (standalone < 0)
537 global_standalone = -1;
539 if (!version)
540 global_version_no_value = true;
541 else if (!global_version)
542 global_version = version;
543 else if (xmlStrcmp(version, global_version) != 0)
544 global_version_no_value = true;
546 appendStringInfoString(&buf, str + len);
547 pfree(str);
550 if (!global_version_no_value || global_standalone >= 0)
552 StringInfoData buf2;
554 initStringInfo(&buf2);
556 print_xml_decl(&buf2,
557 (!global_version_no_value) ? global_version : NULL,
559 global_standalone);
561 appendBinaryStringInfo(&buf2, buf.data, buf.len);
562 buf = buf2;
565 return stringinfo_to_xmltype(&buf);
566 #else
567 NO_XML_SUPPORT();
568 return NULL;
569 #endif
574 * XMLAGG support
576 Datum
577 xmlconcat2(PG_FUNCTION_ARGS)
579 if (PG_ARGISNULL(0))
581 if (PG_ARGISNULL(1))
582 PG_RETURN_NULL();
583 else
584 PG_RETURN_XML_P(PG_GETARG_XML_P(1));
586 else if (PG_ARGISNULL(1))
587 PG_RETURN_XML_P(PG_GETARG_XML_P(0));
588 else
589 PG_RETURN_XML_P(xmlconcat(list_make2(PG_GETARG_XML_P(0),
590 PG_GETARG_XML_P(1))));
594 Datum
595 texttoxml(PG_FUNCTION_ARGS)
597 text *data = PG_GETARG_TEXT_PP(0);
599 PG_RETURN_XML_P(xmlparse(data, xmloption, true));
603 Datum
604 xmltotext(PG_FUNCTION_ARGS)
606 xmltype *data = PG_GETARG_XML_P(0);
608 /* It's actually binary compatible. */
609 PG_RETURN_TEXT_P((text *) data);
613 text *
614 xmltotext_with_xmloption(xmltype *data, XmlOptionType xmloption_arg)
616 if (xmloption_arg == XMLOPTION_DOCUMENT && !xml_is_document(data))
617 ereport(ERROR,
618 (errcode(ERRCODE_NOT_AN_XML_DOCUMENT),
619 errmsg("not an XML document")));
621 /* It's actually binary compatible, save for the above check. */
622 return (text *) data;
626 xmltype *
627 xmlelement(XmlExpr *xexpr,
628 Datum *named_argvalue, bool *named_argnull,
629 Datum *argvalue, bool *argnull)
631 #ifdef USE_LIBXML
632 xmltype *result;
633 List *named_arg_strings;
634 List *arg_strings;
635 int i;
636 ListCell *arg;
637 ListCell *narg;
638 PgXmlErrorContext *xmlerrcxt;
639 volatile xmlBufferPtr buf = NULL;
640 volatile xmlTextWriterPtr writer = NULL;
643 * All arguments are already evaluated, and their values are passed in the
644 * named_argvalue/named_argnull or argvalue/argnull arrays. This avoids
645 * issues if one of the arguments involves a call to some other function
646 * or subsystem that wants to use libxml on its own terms. We examine the
647 * original XmlExpr to identify the numbers and types of the arguments.
649 named_arg_strings = NIL;
650 i = 0;
651 foreach(arg, xexpr->named_args)
653 Expr *e = (Expr *) lfirst(arg);
654 char *str;
656 if (named_argnull[i])
657 str = NULL;
658 else
659 str = map_sql_value_to_xml_value(named_argvalue[i],
660 exprType((Node *) e),
661 false);
662 named_arg_strings = lappend(named_arg_strings, str);
663 i++;
666 arg_strings = NIL;
667 i = 0;
668 foreach(arg, xexpr->args)
670 Expr *e = (Expr *) lfirst(arg);
671 char *str;
673 /* here we can just forget NULL elements immediately */
674 if (!argnull[i])
676 str = map_sql_value_to_xml_value(argvalue[i],
677 exprType((Node *) e),
678 true);
679 arg_strings = lappend(arg_strings, str);
681 i++;
684 xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
686 PG_TRY();
688 buf = xmlBufferCreate();
689 if (buf == NULL || xmlerrcxt->err_occurred)
690 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
691 "could not allocate xmlBuffer");
692 writer = xmlNewTextWriterMemory(buf, 0);
693 if (writer == NULL || xmlerrcxt->err_occurred)
694 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
695 "could not allocate xmlTextWriter");
697 xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name);
699 forboth(arg, named_arg_strings, narg, xexpr->arg_names)
701 char *str = (char *) lfirst(arg);
702 char *argname = strVal(lfirst(narg));
704 if (str)
705 xmlTextWriterWriteAttribute(writer,
706 (xmlChar *) argname,
707 (xmlChar *) str);
710 foreach(arg, arg_strings)
712 char *str = (char *) lfirst(arg);
714 xmlTextWriterWriteRaw(writer, (xmlChar *) str);
717 xmlTextWriterEndElement(writer);
719 /* we MUST do this now to flush data out to the buffer ... */
720 xmlFreeTextWriter(writer);
721 writer = NULL;
723 result = xmlBuffer_to_xmltype(buf);
725 PG_CATCH();
727 if (writer)
728 xmlFreeTextWriter(writer);
729 if (buf)
730 xmlBufferFree(buf);
732 pg_xml_done(xmlerrcxt, true);
734 PG_RE_THROW();
736 PG_END_TRY();
738 xmlBufferFree(buf);
740 pg_xml_done(xmlerrcxt, false);
742 return result;
743 #else
744 NO_XML_SUPPORT();
745 return NULL;
746 #endif
750 xmltype *
751 xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace)
753 #ifdef USE_LIBXML
754 xmlDocPtr doc;
756 doc = xml_parse(data, xmloption_arg, preserve_whitespace,
757 GetDatabaseEncoding());
758 xmlFreeDoc(doc);
760 return (xmltype *) data;
761 #else
762 NO_XML_SUPPORT();
763 return NULL;
764 #endif
768 xmltype *
769 xmlpi(const char *target, text *arg, bool arg_is_null, bool *result_is_null)
771 #ifdef USE_LIBXML
772 xmltype *result;
773 StringInfoData buf;
775 if (pg_strcasecmp(target, "xml") == 0)
776 ereport(ERROR,
777 (errcode(ERRCODE_SYNTAX_ERROR), /* really */
778 errmsg("invalid XML processing instruction"),
779 errdetail("XML processing instruction target name cannot be \"%s\".", target)));
782 * Following the SQL standard, the null check comes after the syntax check
783 * above.
785 *result_is_null = arg_is_null;
786 if (*result_is_null)
787 return NULL;
789 initStringInfo(&buf);
791 appendStringInfo(&buf, "<?%s", target);
793 if (arg != NULL)
795 char *string;
797 string = text_to_cstring(arg);
798 if (strstr(string, "?>") != NULL)
799 ereport(ERROR,
800 (errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION),
801 errmsg("invalid XML processing instruction"),
802 errdetail("XML processing instruction cannot contain \"?>\".")));
804 appendStringInfoChar(&buf, ' ');
805 appendStringInfoString(&buf, string + strspn(string, " "));
806 pfree(string);
808 appendStringInfoString(&buf, "?>");
810 result = stringinfo_to_xmltype(&buf);
811 pfree(buf.data);
812 return result;
813 #else
814 NO_XML_SUPPORT();
815 return NULL;
816 #endif
820 xmltype *
821 xmlroot(xmltype *data, text *version, int standalone)
823 #ifdef USE_LIBXML
824 char *str;
825 size_t len;
826 xmlChar *orig_version;
827 int orig_standalone;
828 StringInfoData buf;
830 len = VARSIZE(data) - VARHDRSZ;
831 str = text_to_cstring((text *) data);
833 parse_xml_decl((xmlChar *) str, &len, &orig_version, NULL, &orig_standalone);
835 if (version)
836 orig_version = xml_text2xmlChar(version);
837 else
838 orig_version = NULL;
840 switch (standalone)
842 case XML_STANDALONE_YES:
843 orig_standalone = 1;
844 break;
845 case XML_STANDALONE_NO:
846 orig_standalone = 0;
847 break;
848 case XML_STANDALONE_NO_VALUE:
849 orig_standalone = -1;
850 break;
851 case XML_STANDALONE_OMITTED:
852 /* leave original value */
853 break;
856 initStringInfo(&buf);
857 print_xml_decl(&buf, orig_version, 0, orig_standalone);
858 appendStringInfoString(&buf, str + len);
860 return stringinfo_to_xmltype(&buf);
861 #else
862 NO_XML_SUPPORT();
863 return NULL;
864 #endif
869 * Validate document (given as string) against DTD (given as external link)
871 * This has been removed because it is a security hole: unprivileged users
872 * should not be able to use Postgres to fetch arbitrary external files,
873 * which unfortunately is exactly what libxml is willing to do with the DTD
874 * parameter.
876 Datum
877 xmlvalidate(PG_FUNCTION_ARGS)
879 ereport(ERROR,
880 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
881 errmsg("xmlvalidate is not implemented")));
882 return 0;
886 bool
887 xml_is_document(xmltype *arg)
889 #ifdef USE_LIBXML
890 bool result;
891 volatile xmlDocPtr doc = NULL;
892 MemoryContext ccxt = CurrentMemoryContext;
894 /* We want to catch ereport(INVALID_XML_DOCUMENT) and return false */
895 PG_TRY();
897 doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true,
898 GetDatabaseEncoding());
899 result = true;
901 PG_CATCH();
903 ErrorData *errdata;
904 MemoryContext ecxt;
906 ecxt = MemoryContextSwitchTo(ccxt);
907 errdata = CopyErrorData();
908 if (errdata->sqlerrcode == ERRCODE_INVALID_XML_DOCUMENT)
910 FlushErrorState();
911 result = false;
913 else
915 MemoryContextSwitchTo(ecxt);
916 PG_RE_THROW();
919 PG_END_TRY();
921 if (doc)
922 xmlFreeDoc(doc);
924 return result;
925 #else /* not USE_LIBXML */
926 NO_XML_SUPPORT();
927 return false;
928 #endif /* not USE_LIBXML */
932 #ifdef USE_LIBXML
935 * pg_xml_init_library --- set up for use of libxml
937 * This should be called by each function that is about to use libxml
938 * facilities but doesn't require error handling. It initializes libxml
939 * and verifies compatibility with the loaded libxml version. These are
940 * once-per-session activities.
942 * TODO: xmlChar is utf8-char, make proper tuning (initdb with enc!=utf8 and
943 * check)
945 void
946 pg_xml_init_library(void)
948 static bool first_time = true;
950 if (first_time)
952 /* Stuff we need do only once per session */
955 * Currently, we have no pure UTF-8 support for internals -- check if
956 * we can work.
958 if (sizeof(char) != sizeof(xmlChar))
959 ereport(ERROR,
960 (errmsg("could not initialize XML library"),
961 errdetail("libxml2 has incompatible char type: sizeof(char)=%zu, sizeof(xmlChar)=%zu.",
962 sizeof(char), sizeof(xmlChar))));
964 #ifdef USE_LIBXMLCONTEXT
965 /* Set up libxml's memory allocation our way */
966 xml_memory_init();
967 #endif
969 /* Check library compatibility */
970 LIBXML_TEST_VERSION;
972 first_time = false;
977 * pg_xml_init --- set up for use of libxml and register an error handler
979 * This should be called by each function that is about to use libxml
980 * facilities and requires error handling. It initializes libxml with
981 * pg_xml_init_library() and establishes our libxml error handler.
983 * strictness determines which errors are reported and which are ignored.
985 * Calls to this function MUST be followed by a PG_TRY block that guarantees
986 * that pg_xml_done() is called during either normal or error exit.
988 * This is exported for use by contrib/xml2, as well as other code that might
989 * wish to share use of this module's libxml error handler.
991 PgXmlErrorContext *
992 pg_xml_init(PgXmlStrictness strictness)
994 PgXmlErrorContext *errcxt;
995 void *new_errcxt;
997 /* Do one-time setup if needed */
998 pg_xml_init_library();
1000 /* Create error handling context structure */
1001 errcxt = (PgXmlErrorContext *) palloc(sizeof(PgXmlErrorContext));
1002 errcxt->magic = ERRCXT_MAGIC;
1003 errcxt->strictness = strictness;
1004 errcxt->err_occurred = false;
1005 initStringInfo(&errcxt->err_buf);
1008 * Save original error handler and install ours. libxml originally didn't
1009 * distinguish between the contexts for generic and for structured error
1010 * handlers. If we're using an old libxml version, we must thus save the
1011 * generic error context, even though we're using a structured error
1012 * handler.
1014 errcxt->saved_errfunc = xmlStructuredError;
1016 #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1017 errcxt->saved_errcxt = xmlStructuredErrorContext;
1018 #else
1019 errcxt->saved_errcxt = xmlGenericErrorContext;
1020 #endif
1022 xmlSetStructuredErrorFunc((void *) errcxt, xml_errorHandler);
1025 * Verify that xmlSetStructuredErrorFunc set the context variable we
1026 * expected it to. If not, the error context pointer we just saved is not
1027 * the correct thing to restore, and since that leaves us without a way to
1028 * restore the context in pg_xml_done, we must fail.
1030 * The only known situation in which this test fails is if we compile with
1031 * headers from a libxml2 that doesn't track the structured error context
1032 * separately (< 2.7.4), but at runtime use a version that does, or vice
1033 * versa. The libxml2 authors did not treat that change as constituting
1034 * an ABI break, so the LIBXML_TEST_VERSION test in pg_xml_init_library
1035 * fails to protect us from this.
1038 #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1039 new_errcxt = xmlStructuredErrorContext;
1040 #else
1041 new_errcxt = xmlGenericErrorContext;
1042 #endif
1044 if (new_errcxt != (void *) errcxt)
1045 ereport(ERROR,
1046 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1047 errmsg("could not set up XML error handler"),
1048 errhint("This probably indicates that the version of libxml2"
1049 " being used is not compatible with the libxml2"
1050 " header files that PostgreSQL was built with.")));
1053 * Also, install an entity loader to prevent unwanted fetches of external
1054 * files and URLs.
1056 errcxt->saved_entityfunc = xmlGetExternalEntityLoader();
1057 xmlSetExternalEntityLoader(xmlPgEntityLoader);
1059 return errcxt;
1064 * pg_xml_done --- restore previous libxml error handling
1066 * Resets libxml's global error-handling state to what it was before
1067 * pg_xml_init() was called.
1069 * This routine verifies that all pending errors have been dealt with
1070 * (in assert-enabled builds, anyway).
1072 void
1073 pg_xml_done(PgXmlErrorContext *errcxt, bool isError)
1075 void *cur_errcxt;
1077 /* An assert seems like enough protection here */
1078 Assert(errcxt->magic == ERRCXT_MAGIC);
1081 * In a normal exit, there should be no un-handled libxml errors. But we
1082 * shouldn't try to enforce this during error recovery, since the longjmp
1083 * could have been thrown before xml_ereport had a chance to run.
1085 Assert(!errcxt->err_occurred || isError);
1088 * Check that libxml's global state is correct, warn if not. This is a
1089 * real test and not an Assert because it has a higher probability of
1090 * happening.
1092 #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1093 cur_errcxt = xmlStructuredErrorContext;
1094 #else
1095 cur_errcxt = xmlGenericErrorContext;
1096 #endif
1098 if (cur_errcxt != (void *) errcxt)
1099 elog(WARNING, "libxml error handling state is out of sync with xml.c");
1101 /* Restore the saved handlers */
1102 xmlSetStructuredErrorFunc(errcxt->saved_errcxt, errcxt->saved_errfunc);
1103 xmlSetExternalEntityLoader(errcxt->saved_entityfunc);
1106 * Mark the struct as invalid, just in case somebody somehow manages to
1107 * call xml_errorHandler or xml_ereport with it.
1109 errcxt->magic = 0;
1111 /* Release memory */
1112 pfree(errcxt->err_buf.data);
1113 pfree(errcxt);
1118 * pg_xml_error_occurred() --- test the error flag
1120 bool
1121 pg_xml_error_occurred(PgXmlErrorContext *errcxt)
1123 return errcxt->err_occurred;
1128 * SQL/XML allows storing "XML documents" or "XML content". "XML
1129 * documents" are specified by the XML specification and are parsed
1130 * easily by libxml. "XML content" is specified by SQL/XML as the
1131 * production "XMLDecl? content". But libxml can only parse the
1132 * "content" part, so we have to parse the XML declaration ourselves
1133 * to complete this.
1136 #define CHECK_XML_SPACE(p) \
1137 do { \
1138 if (!xmlIsBlank_ch(*(p))) \
1139 return XML_ERR_SPACE_REQUIRED; \
1140 } while (0)
1142 #define SKIP_XML_SPACE(p) \
1143 while (xmlIsBlank_ch(*(p))) (p)++
1145 /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
1146 /* Beware of multiple evaluations of argument! */
1147 #define PG_XMLISNAMECHAR(c) \
1148 (xmlIsBaseChar_ch(c) || xmlIsIdeographicQ(c) \
1149 || xmlIsDigit_ch(c) \
1150 || c == '.' || c == '-' || c == '_' || c == ':' \
1151 || xmlIsCombiningQ(c) \
1152 || xmlIsExtender_ch(c))
1154 /* pnstrdup, but deal with xmlChar not char; len is measured in xmlChars */
1155 static xmlChar *
1156 xml_pnstrdup(const xmlChar *str, size_t len)
1158 xmlChar *result;
1160 result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
1161 memcpy(result, str, len * sizeof(xmlChar));
1162 result[len] = 0;
1163 return result;
1166 /* Ditto, except input is char* */
1167 static xmlChar *
1168 pg_xmlCharStrndup(const char *str, size_t len)
1170 xmlChar *result;
1172 result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
1173 memcpy(result, str, len);
1174 result[len] = '\0';
1176 return result;
1180 * Copy xmlChar string to PostgreSQL-owned memory, freeing the input.
1182 * The input xmlChar is freed regardless of success of the copy.
1184 static char *
1185 xml_pstrdup_and_free(xmlChar *str)
1187 char *result;
1189 if (str)
1191 PG_TRY();
1193 result = pstrdup((char *) str);
1195 PG_FINALLY();
1197 xmlFree(str);
1199 PG_END_TRY();
1201 else
1202 result = NULL;
1204 return result;
1208 * str is the null-terminated input string. Remaining arguments are
1209 * output arguments; each can be NULL if value is not wanted.
1210 * version and encoding are returned as locally-palloc'd strings.
1211 * Result is 0 if OK, an error code if not.
1213 static int
1214 parse_xml_decl(const xmlChar *str, size_t *lenp,
1215 xmlChar **version, xmlChar **encoding, int *standalone)
1217 const xmlChar *p;
1218 const xmlChar *save_p;
1219 size_t len;
1220 int utf8char;
1221 int utf8len;
1224 * Only initialize libxml. We don't need error handling here, but we do
1225 * need to make sure libxml is initialized before calling any of its
1226 * functions. Note that this is safe (and a no-op) if caller has already
1227 * done pg_xml_init().
1229 pg_xml_init_library();
1231 /* Initialize output arguments to "not present" */
1232 if (version)
1233 *version = NULL;
1234 if (encoding)
1235 *encoding = NULL;
1236 if (standalone)
1237 *standalone = -1;
1239 p = str;
1241 if (xmlStrncmp(p, (xmlChar *) "<?xml", 5) != 0)
1242 goto finished;
1245 * If next char is a name char, it's a PI like <?xml-stylesheet ...?>
1246 * rather than an XMLDecl, so we have done what we came to do and found no
1247 * XMLDecl.
1249 * We need an input length value for xmlGetUTF8Char, but there's no need
1250 * to count the whole document size, so use strnlen not strlen.
1252 utf8len = strnlen((const char *) (p + 5), MAX_MULTIBYTE_CHAR_LEN);
1253 utf8char = xmlGetUTF8Char(p + 5, &utf8len);
1254 if (PG_XMLISNAMECHAR(utf8char))
1255 goto finished;
1257 p += 5;
1259 /* version */
1260 CHECK_XML_SPACE(p);
1261 SKIP_XML_SPACE(p);
1262 if (xmlStrncmp(p, (xmlChar *) "version", 7) != 0)
1263 return XML_ERR_VERSION_MISSING;
1264 p += 7;
1265 SKIP_XML_SPACE(p);
1266 if (*p != '=')
1267 return XML_ERR_VERSION_MISSING;
1268 p += 1;
1269 SKIP_XML_SPACE(p);
1271 if (*p == '\'' || *p == '"')
1273 const xmlChar *q;
1275 q = xmlStrchr(p + 1, *p);
1276 if (!q)
1277 return XML_ERR_VERSION_MISSING;
1279 if (version)
1280 *version = xml_pnstrdup(p + 1, q - p - 1);
1281 p = q + 1;
1283 else
1284 return XML_ERR_VERSION_MISSING;
1286 /* encoding */
1287 save_p = p;
1288 SKIP_XML_SPACE(p);
1289 if (xmlStrncmp(p, (xmlChar *) "encoding", 8) == 0)
1291 CHECK_XML_SPACE(save_p);
1292 p += 8;
1293 SKIP_XML_SPACE(p);
1294 if (*p != '=')
1295 return XML_ERR_MISSING_ENCODING;
1296 p += 1;
1297 SKIP_XML_SPACE(p);
1299 if (*p == '\'' || *p == '"')
1301 const xmlChar *q;
1303 q = xmlStrchr(p + 1, *p);
1304 if (!q)
1305 return XML_ERR_MISSING_ENCODING;
1307 if (encoding)
1308 *encoding = xml_pnstrdup(p + 1, q - p - 1);
1309 p = q + 1;
1311 else
1312 return XML_ERR_MISSING_ENCODING;
1314 else
1316 p = save_p;
1319 /* standalone */
1320 save_p = p;
1321 SKIP_XML_SPACE(p);
1322 if (xmlStrncmp(p, (xmlChar *) "standalone", 10) == 0)
1324 CHECK_XML_SPACE(save_p);
1325 p += 10;
1326 SKIP_XML_SPACE(p);
1327 if (*p != '=')
1328 return XML_ERR_STANDALONE_VALUE;
1329 p += 1;
1330 SKIP_XML_SPACE(p);
1331 if (xmlStrncmp(p, (xmlChar *) "'yes'", 5) == 0 ||
1332 xmlStrncmp(p, (xmlChar *) "\"yes\"", 5) == 0)
1334 if (standalone)
1335 *standalone = 1;
1336 p += 5;
1338 else if (xmlStrncmp(p, (xmlChar *) "'no'", 4) == 0 ||
1339 xmlStrncmp(p, (xmlChar *) "\"no\"", 4) == 0)
1341 if (standalone)
1342 *standalone = 0;
1343 p += 4;
1345 else
1346 return XML_ERR_STANDALONE_VALUE;
1348 else
1350 p = save_p;
1353 SKIP_XML_SPACE(p);
1354 if (xmlStrncmp(p, (xmlChar *) "?>", 2) != 0)
1355 return XML_ERR_XMLDECL_NOT_FINISHED;
1356 p += 2;
1358 finished:
1359 len = p - str;
1361 for (p = str; p < str + len; p++)
1362 if (*p > 127)
1363 return XML_ERR_INVALID_CHAR;
1365 if (lenp)
1366 *lenp = len;
1368 return XML_ERR_OK;
1373 * Write an XML declaration. On output, we adjust the XML declaration
1374 * as follows. (These rules are the moral equivalent of the clause
1375 * "Serialization of an XML value" in the SQL standard.)
1377 * We try to avoid generating an XML declaration if possible. This is
1378 * so that you don't get trivial things like xml '<foo/>' resulting in
1379 * '<?xml version="1.0"?><foo/>', which would surely be annoying. We
1380 * must provide a declaration if the standalone property is specified
1381 * or if we include an encoding declaration. If we have a
1382 * declaration, we must specify a version (XML requires this).
1383 * Otherwise we only make a declaration if the version is not "1.0",
1384 * which is the default version specified in SQL:2003.
1386 static bool
1387 print_xml_decl(StringInfo buf, const xmlChar *version,
1388 pg_enc encoding, int standalone)
1390 if ((version && strcmp((const char *) version, PG_XML_DEFAULT_VERSION) != 0)
1391 || (encoding && encoding != PG_UTF8)
1392 || standalone != -1)
1394 appendStringInfoString(buf, "<?xml");
1396 if (version)
1397 appendStringInfo(buf, " version=\"%s\"", version);
1398 else
1399 appendStringInfo(buf, " version=\"%s\"", PG_XML_DEFAULT_VERSION);
1401 if (encoding && encoding != PG_UTF8)
1404 * XXX might be useful to convert this to IANA names (ISO-8859-1
1405 * instead of LATIN1 etc.); needs field experience
1407 appendStringInfo(buf, " encoding=\"%s\"",
1408 pg_encoding_to_char(encoding));
1411 if (standalone == 1)
1412 appendStringInfoString(buf, " standalone=\"yes\"");
1413 else if (standalone == 0)
1414 appendStringInfoString(buf, " standalone=\"no\"");
1415 appendStringInfoString(buf, "?>");
1417 return true;
1419 else
1420 return false;
1424 * Test whether an input that is to be parsed as CONTENT contains a DTD.
1426 * The SQL/XML:2003 definition of CONTENT ("XMLDecl? content") is not
1427 * satisfied by a document with a DTD, which is a bit of a wart, as it means
1428 * the CONTENT type is not a proper superset of DOCUMENT. SQL/XML:2006 and
1429 * later fix that, by redefining content with reference to the "more
1430 * permissive" Document Node of the XQuery/XPath Data Model, such that any
1431 * DOCUMENT value is indeed also a CONTENT value. That definition is more
1432 * useful, as CONTENT becomes usable for parsing input of unknown form (think
1433 * pg_restore).
1435 * As used below in parse_xml when parsing for CONTENT, libxml does not give
1436 * us the 2006+ behavior, but only the 2003; it will choke if the input has
1437 * a DTD. But we can provide the 2006+ definition of CONTENT easily enough,
1438 * by detecting this case first and simply doing the parse as DOCUMENT.
1440 * A DTD can be found arbitrarily far in, but that would be a contrived case;
1441 * it will ordinarily start within a few dozen characters. The only things
1442 * that can precede it are an XMLDecl (here, the caller will have called
1443 * parse_xml_decl already), whitespace, comments, and processing instructions.
1444 * This function need only return true if it sees a valid sequence of such
1445 * things leading to <!DOCTYPE. It can simply return false in any other
1446 * cases, including malformed input; that will mean the input gets parsed as
1447 * CONTENT as originally planned, with libxml reporting any errors.
1449 * This is only to be called from xml_parse, when pg_xml_init has already
1450 * been called. The input is already in UTF8 encoding.
1452 static bool
1453 xml_doctype_in_content(const xmlChar *str)
1455 const xmlChar *p = str;
1457 for (;;)
1459 const xmlChar *e;
1461 SKIP_XML_SPACE(p);
1462 if (*p != '<')
1463 return false;
1464 p++;
1466 if (*p == '!')
1468 p++;
1470 /* if we see <!DOCTYPE, we can return true */
1471 if (xmlStrncmp(p, (xmlChar *) "DOCTYPE", 7) == 0)
1472 return true;
1474 /* otherwise, if it's not a comment, fail */
1475 if (xmlStrncmp(p, (xmlChar *) "--", 2) != 0)
1476 return false;
1477 /* find end of comment: find -- and a > must follow */
1478 p = xmlStrstr(p + 2, (xmlChar *) "--");
1479 if (!p || p[2] != '>')
1480 return false;
1481 /* advance over comment, and keep scanning */
1482 p += 3;
1483 continue;
1486 /* otherwise, if it's not a PI <?target something?>, fail */
1487 if (*p != '?')
1488 return false;
1489 p++;
1491 /* find end of PI (the string ?> is forbidden within a PI) */
1492 e = xmlStrstr(p, (xmlChar *) "?>");
1493 if (!e)
1494 return false;
1496 /* advance over PI, keep scanning */
1497 p = e + 2;
1503 * Convert a C string to XML internal representation
1505 * Note: it is caller's responsibility to xmlFreeDoc() the result,
1506 * else a permanent memory leak will ensue!
1508 * TODO maybe libxml2's xmlreader is better? (do not construct DOM,
1509 * yet do not use SAX - see xmlreader.c)
1511 static xmlDocPtr
1512 xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
1513 int encoding)
1515 int32 len;
1516 xmlChar *string;
1517 xmlChar *utf8string;
1518 PgXmlErrorContext *xmlerrcxt;
1519 volatile xmlParserCtxtPtr ctxt = NULL;
1520 volatile xmlDocPtr doc = NULL;
1522 len = VARSIZE_ANY_EXHDR(data); /* will be useful later */
1523 string = xml_text2xmlChar(data);
1525 utf8string = pg_do_encoding_conversion(string,
1526 len,
1527 encoding,
1528 PG_UTF8);
1530 /* Start up libxml and its parser */
1531 xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_WELLFORMED);
1533 /* Use a TRY block to ensure we clean up correctly */
1534 PG_TRY();
1536 bool parse_as_document = false;
1537 int res_code;
1538 size_t count = 0;
1539 xmlChar *version = NULL;
1540 int standalone = 0;
1542 xmlInitParser();
1544 ctxt = xmlNewParserCtxt();
1545 if (ctxt == NULL || xmlerrcxt->err_occurred)
1546 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1547 "could not allocate parser context");
1549 /* Decide whether to parse as document or content */
1550 if (xmloption_arg == XMLOPTION_DOCUMENT)
1551 parse_as_document = true;
1552 else
1554 /* Parse and skip over the XML declaration, if any */
1555 res_code = parse_xml_decl(utf8string,
1556 &count, &version, NULL, &standalone);
1557 if (res_code != 0)
1558 xml_ereport_by_code(ERROR, ERRCODE_INVALID_XML_CONTENT,
1559 "invalid XML content: invalid XML declaration",
1560 res_code);
1562 /* Is there a DOCTYPE element? */
1563 if (xml_doctype_in_content(utf8string + count))
1564 parse_as_document = true;
1567 if (parse_as_document)
1570 * Note, that here we try to apply DTD defaults
1571 * (XML_PARSE_DTDATTR) according to SQL/XML:2008 GR 10.16.7.d:
1572 * 'Default values defined by internal DTD are applied'. As for
1573 * external DTDs, we try to support them too, (see SQL/XML:2008 GR
1574 * 10.16.7.e)
1576 doc = xmlCtxtReadDoc(ctxt, utf8string,
1577 NULL,
1578 "UTF-8",
1579 XML_PARSE_NOENT | XML_PARSE_DTDATTR
1580 | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS));
1581 if (doc == NULL || xmlerrcxt->err_occurred)
1583 /* Use original option to decide which error code to throw */
1584 if (xmloption_arg == XMLOPTION_DOCUMENT)
1585 xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
1586 "invalid XML document");
1587 else
1588 xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT,
1589 "invalid XML content");
1592 else
1594 doc = xmlNewDoc(version);
1595 Assert(doc->encoding == NULL);
1596 doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
1597 doc->standalone = standalone;
1599 /* allow empty content */
1600 if (*(utf8string + count))
1602 res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
1603 utf8string + count, NULL);
1604 if (res_code != 0 || xmlerrcxt->err_occurred)
1605 xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT,
1606 "invalid XML content");
1610 PG_CATCH();
1612 if (doc != NULL)
1613 xmlFreeDoc(doc);
1614 if (ctxt != NULL)
1615 xmlFreeParserCtxt(ctxt);
1617 pg_xml_done(xmlerrcxt, true);
1619 PG_RE_THROW();
1621 PG_END_TRY();
1623 xmlFreeParserCtxt(ctxt);
1625 pg_xml_done(xmlerrcxt, false);
1627 return doc;
1632 * xmlChar<->text conversions
1634 static xmlChar *
1635 xml_text2xmlChar(text *in)
1637 return (xmlChar *) text_to_cstring(in);
1641 #ifdef USE_LIBXMLCONTEXT
1644 * Manage the special context used for all libxml allocations (but only
1645 * in special debug builds; see notes at top of file)
1647 static void
1648 xml_memory_init(void)
1650 /* Create memory context if not there already */
1651 if (LibxmlContext == NULL)
1652 LibxmlContext = AllocSetContextCreate(TopMemoryContext,
1653 "Libxml context",
1654 ALLOCSET_DEFAULT_SIZES);
1656 /* Re-establish the callbacks even if already set */
1657 xmlMemSetup(xml_pfree, xml_palloc, xml_repalloc, xml_pstrdup);
1661 * Wrappers for memory management functions
1663 static void *
1664 xml_palloc(size_t size)
1666 return MemoryContextAlloc(LibxmlContext, size);
1670 static void *
1671 xml_repalloc(void *ptr, size_t size)
1673 return repalloc(ptr, size);
1677 static void
1678 xml_pfree(void *ptr)
1680 /* At least some parts of libxml assume xmlFree(NULL) is allowed */
1681 if (ptr)
1682 pfree(ptr);
1686 static char *
1687 xml_pstrdup(const char *string)
1689 return MemoryContextStrdup(LibxmlContext, string);
1691 #endif /* USE_LIBXMLCONTEXT */
1695 * xmlPgEntityLoader --- entity loader callback function
1697 * Silently prevent any external entity URL from being loaded. We don't want
1698 * to throw an error, so instead make the entity appear to expand to an empty
1699 * string.
1701 * We would prefer to allow loading entities that exist in the system's
1702 * global XML catalog; but the available libxml2 APIs make that a complex
1703 * and fragile task. For now, just shut down all external access.
1705 static xmlParserInputPtr
1706 xmlPgEntityLoader(const char *URL, const char *ID,
1707 xmlParserCtxtPtr ctxt)
1709 return xmlNewStringInputStream(ctxt, (const xmlChar *) "");
1714 * xml_ereport --- report an XML-related error
1716 * The "msg" is the SQL-level message; some can be adopted from the SQL/XML
1717 * standard. This function adds libxml's native error message, if any, as
1718 * detail.
1720 * This is exported for modules that want to share the core libxml error
1721 * handler. Note that pg_xml_init() *must* have been called previously.
1723 void
1724 xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode, const char *msg)
1726 char *detail;
1728 /* Defend against someone passing us a bogus context struct */
1729 if (errcxt->magic != ERRCXT_MAGIC)
1730 elog(ERROR, "xml_ereport called with invalid PgXmlErrorContext");
1732 /* Flag that the current libxml error has been reported */
1733 errcxt->err_occurred = false;
1735 /* Include detail only if we have some text from libxml */
1736 if (errcxt->err_buf.len > 0)
1737 detail = errcxt->err_buf.data;
1738 else
1739 detail = NULL;
1741 ereport(level,
1742 (errcode(sqlcode),
1743 errmsg_internal("%s", msg),
1744 detail ? errdetail_internal("%s", detail) : 0));
1749 * Error handler for libxml errors and warnings
1751 static void
1752 xml_errorHandler(void *data, xmlErrorPtr error)
1754 PgXmlErrorContext *xmlerrcxt = (PgXmlErrorContext *) data;
1755 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) error->ctxt;
1756 xmlParserInputPtr input = (ctxt != NULL) ? ctxt->input : NULL;
1757 xmlNodePtr node = error->node;
1758 const xmlChar *name = (node != NULL &&
1759 node->type == XML_ELEMENT_NODE) ? node->name : NULL;
1760 int domain = error->domain;
1761 int level = error->level;
1762 StringInfo errorBuf;
1765 * Defend against someone passing us a bogus context struct.
1767 * We force a backend exit if this check fails because longjmp'ing out of
1768 * libxml would likely render it unsafe to use further.
1770 if (xmlerrcxt->magic != ERRCXT_MAGIC)
1771 elog(FATAL, "xml_errorHandler called with invalid PgXmlErrorContext");
1773 /*----------
1774 * Older libxml versions report some errors differently.
1775 * First, some errors were previously reported as coming from the parser
1776 * domain but are now reported as coming from the namespace domain.
1777 * Second, some warnings were upgraded to errors.
1778 * We attempt to compensate for that here.
1779 *----------
1781 switch (error->code)
1783 case XML_WAR_NS_URI:
1784 level = XML_ERR_ERROR;
1785 domain = XML_FROM_NAMESPACE;
1786 break;
1788 case XML_ERR_NS_DECL_ERROR:
1789 case XML_WAR_NS_URI_RELATIVE:
1790 case XML_WAR_NS_COLUMN:
1791 case XML_NS_ERR_XML_NAMESPACE:
1792 case XML_NS_ERR_UNDEFINED_NAMESPACE:
1793 case XML_NS_ERR_QNAME:
1794 case XML_NS_ERR_ATTRIBUTE_REDEFINED:
1795 case XML_NS_ERR_EMPTY:
1796 domain = XML_FROM_NAMESPACE;
1797 break;
1800 /* Decide whether to act on the error or not */
1801 switch (domain)
1803 case XML_FROM_PARSER:
1804 case XML_FROM_NONE:
1805 case XML_FROM_MEMORY:
1806 case XML_FROM_IO:
1809 * Suppress warnings about undeclared entities. We need to do
1810 * this to avoid problems due to not loading DTD definitions.
1812 if (error->code == XML_WAR_UNDECLARED_ENTITY)
1813 return;
1815 /* Otherwise, accept error regardless of the parsing purpose */
1816 break;
1818 default:
1819 /* Ignore error if only doing well-formedness check */
1820 if (xmlerrcxt->strictness == PG_XML_STRICTNESS_WELLFORMED)
1821 return;
1822 break;
1825 /* Prepare error message in errorBuf */
1826 errorBuf = makeStringInfo();
1828 if (error->line > 0)
1829 appendStringInfo(errorBuf, "line %d: ", error->line);
1830 if (name != NULL)
1831 appendStringInfo(errorBuf, "element %s: ", name);
1832 if (error->message != NULL)
1833 appendStringInfoString(errorBuf, error->message);
1834 else
1835 appendStringInfoString(errorBuf, "(no message provided)");
1838 * Append context information to errorBuf.
1840 * xmlParserPrintFileContext() uses libxml's "generic" error handler to
1841 * write the context. Since we don't want to duplicate libxml
1842 * functionality here, we set up a generic error handler temporarily.
1844 * We use appendStringInfo() directly as libxml's generic error handler.
1845 * This should work because it has essentially the same signature as
1846 * libxml expects, namely (void *ptr, const char *msg, ...).
1848 if (input != NULL)
1850 xmlGenericErrorFunc errFuncSaved = xmlGenericError;
1851 void *errCtxSaved = xmlGenericErrorContext;
1853 xmlSetGenericErrorFunc((void *) errorBuf,
1854 (xmlGenericErrorFunc) appendStringInfo);
1856 /* Add context information to errorBuf */
1857 appendStringInfoLineSeparator(errorBuf);
1859 xmlParserPrintFileContext(input);
1861 /* Restore generic error func */
1862 xmlSetGenericErrorFunc(errCtxSaved, errFuncSaved);
1865 /* Get rid of any trailing newlines in errorBuf */
1866 chopStringInfoNewlines(errorBuf);
1869 * Legacy error handling mode. err_occurred is never set, we just add the
1870 * message to err_buf. This mode exists because the xml2 contrib module
1871 * uses our error-handling infrastructure, but we don't want to change its
1872 * behaviour since it's deprecated anyway. This is also why we don't
1873 * distinguish between notices, warnings and errors here --- the old-style
1874 * generic error handler wouldn't have done that either.
1876 if (xmlerrcxt->strictness == PG_XML_STRICTNESS_LEGACY)
1878 appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
1879 appendBinaryStringInfo(&xmlerrcxt->err_buf, errorBuf->data,
1880 errorBuf->len);
1882 pfree(errorBuf->data);
1883 pfree(errorBuf);
1884 return;
1888 * We don't want to ereport() here because that'd probably leave libxml in
1889 * an inconsistent state. Instead, we remember the error and ereport()
1890 * from xml_ereport().
1892 * Warnings and notices can be reported immediately since they won't cause
1893 * a longjmp() out of libxml.
1895 if (level >= XML_ERR_ERROR)
1897 appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
1898 appendBinaryStringInfo(&xmlerrcxt->err_buf, errorBuf->data,
1899 errorBuf->len);
1901 xmlerrcxt->err_occurred = true;
1903 else if (level >= XML_ERR_WARNING)
1905 ereport(WARNING,
1906 (errmsg_internal("%s", errorBuf->data)));
1908 else
1910 ereport(NOTICE,
1911 (errmsg_internal("%s", errorBuf->data)));
1914 pfree(errorBuf->data);
1915 pfree(errorBuf);
1920 * Wrapper for "ereport" function for XML-related errors. The "msg"
1921 * is the SQL-level message; some can be adopted from the SQL/XML
1922 * standard. This function uses "code" to create a textual detail
1923 * message. At the moment, we only need to cover those codes that we
1924 * may raise in this file.
1926 static void
1927 xml_ereport_by_code(int level, int sqlcode,
1928 const char *msg, int code)
1930 const char *det;
1932 switch (code)
1934 case XML_ERR_INVALID_CHAR:
1935 det = gettext_noop("Invalid character value.");
1936 break;
1937 case XML_ERR_SPACE_REQUIRED:
1938 det = gettext_noop("Space required.");
1939 break;
1940 case XML_ERR_STANDALONE_VALUE:
1941 det = gettext_noop("standalone accepts only 'yes' or 'no'.");
1942 break;
1943 case XML_ERR_VERSION_MISSING:
1944 det = gettext_noop("Malformed declaration: missing version.");
1945 break;
1946 case XML_ERR_MISSING_ENCODING:
1947 det = gettext_noop("Missing encoding in text declaration.");
1948 break;
1949 case XML_ERR_XMLDECL_NOT_FINISHED:
1950 det = gettext_noop("Parsing XML declaration: '?>' expected.");
1951 break;
1952 default:
1953 det = gettext_noop("Unrecognized libxml error code: %d.");
1954 break;
1957 ereport(level,
1958 (errcode(sqlcode),
1959 errmsg_internal("%s", msg),
1960 errdetail(det, code)));
1965 * Remove all trailing newlines from a StringInfo string
1967 static void
1968 chopStringInfoNewlines(StringInfo str)
1970 while (str->len > 0 && str->data[str->len - 1] == '\n')
1971 str->data[--str->len] = '\0';
1976 * Append a newline after removing any existing trailing newlines
1978 static void
1979 appendStringInfoLineSeparator(StringInfo str)
1981 chopStringInfoNewlines(str);
1982 if (str->len > 0)
1983 appendStringInfoChar(str, '\n');
1988 * Convert one char in the current server encoding to a Unicode codepoint.
1990 static pg_wchar
1991 sqlchar_to_unicode(const char *s)
1993 char *utf8string;
1994 pg_wchar ret[2]; /* need space for trailing zero */
1996 /* note we're not assuming s is null-terminated */
1997 utf8string = pg_server_to_any(s, pg_mblen(s), PG_UTF8);
1999 pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret,
2000 pg_encoding_mblen(PG_UTF8, utf8string));
2002 if (utf8string != s)
2003 pfree(utf8string);
2005 return ret[0];
2009 static bool
2010 is_valid_xml_namefirst(pg_wchar c)
2012 /* (Letter | '_' | ':') */
2013 return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
2014 || c == '_' || c == ':');
2018 static bool
2019 is_valid_xml_namechar(pg_wchar c)
2021 /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
2022 return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
2023 || xmlIsDigitQ(c)
2024 || c == '.' || c == '-' || c == '_' || c == ':'
2025 || xmlIsCombiningQ(c)
2026 || xmlIsExtenderQ(c));
2028 #endif /* USE_LIBXML */
2032 * Map SQL identifier to XML name; see SQL/XML:2008 section 9.1.
2034 char *
2035 map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped,
2036 bool escape_period)
2038 #ifdef USE_LIBXML
2039 StringInfoData buf;
2040 const char *p;
2043 * SQL/XML doesn't make use of this case anywhere, so it's probably a
2044 * mistake.
2046 Assert(fully_escaped || !escape_period);
2048 initStringInfo(&buf);
2050 for (p = ident; *p; p += pg_mblen(p))
2052 if (*p == ':' && (p == ident || fully_escaped))
2053 appendStringInfoString(&buf, "_x003A_");
2054 else if (*p == '_' && *(p + 1) == 'x')
2055 appendStringInfoString(&buf, "_x005F_");
2056 else if (fully_escaped && p == ident &&
2057 pg_strncasecmp(p, "xml", 3) == 0)
2059 if (*p == 'x')
2060 appendStringInfoString(&buf, "_x0078_");
2061 else
2062 appendStringInfoString(&buf, "_x0058_");
2064 else if (escape_period && *p == '.')
2065 appendStringInfoString(&buf, "_x002E_");
2066 else
2068 pg_wchar u = sqlchar_to_unicode(p);
2070 if ((p == ident)
2071 ? !is_valid_xml_namefirst(u)
2072 : !is_valid_xml_namechar(u))
2073 appendStringInfo(&buf, "_x%04X_", (unsigned int) u);
2074 else
2075 appendBinaryStringInfo(&buf, p, pg_mblen(p));
2079 return buf.data;
2080 #else /* not USE_LIBXML */
2081 NO_XML_SUPPORT();
2082 return NULL;
2083 #endif /* not USE_LIBXML */
2088 * Map XML name to SQL identifier; see SQL/XML:2008 section 9.3.
2090 char *
2091 map_xml_name_to_sql_identifier(const char *name)
2093 StringInfoData buf;
2094 const char *p;
2096 initStringInfo(&buf);
2098 for (p = name; *p; p += pg_mblen(p))
2100 if (*p == '_' && *(p + 1) == 'x'
2101 && isxdigit((unsigned char) *(p + 2))
2102 && isxdigit((unsigned char) *(p + 3))
2103 && isxdigit((unsigned char) *(p + 4))
2104 && isxdigit((unsigned char) *(p + 5))
2105 && *(p + 6) == '_')
2107 char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
2108 unsigned int u;
2110 sscanf(p + 2, "%X", &u);
2111 pg_unicode_to_server(u, (unsigned char *) cbuf);
2112 appendStringInfoString(&buf, cbuf);
2113 p += 6;
2115 else
2116 appendBinaryStringInfo(&buf, p, pg_mblen(p));
2119 return buf.data;
2123 * Map SQL value to XML value; see SQL/XML:2008 section 9.8.
2125 * When xml_escape_strings is true, then certain characters in string
2126 * values are replaced by entity references (&lt; etc.), as specified
2127 * in SQL/XML:2008 section 9.8 GR 9) a) iii). This is normally what is
2128 * wanted. The false case is mainly useful when the resulting value
2129 * is used with xmlTextWriterWriteAttribute() to write out an
2130 * attribute, because that function does the escaping itself.
2132 char *
2133 map_sql_value_to_xml_value(Datum value, Oid type, bool xml_escape_strings)
2135 if (type_is_array_domain(type))
2137 ArrayType *array;
2138 Oid elmtype;
2139 int16 elmlen;
2140 bool elmbyval;
2141 char elmalign;
2142 int num_elems;
2143 Datum *elem_values;
2144 bool *elem_nulls;
2145 StringInfoData buf;
2146 int i;
2148 array = DatumGetArrayTypeP(value);
2149 elmtype = ARR_ELEMTYPE(array);
2150 get_typlenbyvalalign(elmtype, &elmlen, &elmbyval, &elmalign);
2152 deconstruct_array(array, elmtype,
2153 elmlen, elmbyval, elmalign,
2154 &elem_values, &elem_nulls,
2155 &num_elems);
2157 initStringInfo(&buf);
2159 for (i = 0; i < num_elems; i++)
2161 if (elem_nulls[i])
2162 continue;
2163 appendStringInfoString(&buf, "<element>");
2164 appendStringInfoString(&buf,
2165 map_sql_value_to_xml_value(elem_values[i],
2166 elmtype, true));
2167 appendStringInfoString(&buf, "</element>");
2170 pfree(elem_values);
2171 pfree(elem_nulls);
2173 return buf.data;
2175 else
2177 Oid typeOut;
2178 bool isvarlena;
2179 char *str;
2182 * Flatten domains; the special-case treatments below should apply to,
2183 * eg, domains over boolean not just boolean.
2185 type = getBaseType(type);
2188 * Special XSD formatting for some data types
2190 switch (type)
2192 case BOOLOID:
2193 if (DatumGetBool(value))
2194 return "true";
2195 else
2196 return "false";
2198 case DATEOID:
2200 DateADT date;
2201 struct pg_tm tm;
2202 char buf[MAXDATELEN + 1];
2204 date = DatumGetDateADT(value);
2205 /* XSD doesn't support infinite values */
2206 if (DATE_NOT_FINITE(date))
2207 ereport(ERROR,
2208 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2209 errmsg("date out of range"),
2210 errdetail("XML does not support infinite date values.")));
2211 j2date(date + POSTGRES_EPOCH_JDATE,
2212 &(tm.tm_year), &(tm.tm_mon), &(tm.tm_mday));
2213 EncodeDateOnly(&tm, USE_XSD_DATES, buf);
2215 return pstrdup(buf);
2218 case TIMESTAMPOID:
2220 Timestamp timestamp;
2221 struct pg_tm tm;
2222 fsec_t fsec;
2223 char buf[MAXDATELEN + 1];
2225 timestamp = DatumGetTimestamp(value);
2227 /* XSD doesn't support infinite values */
2228 if (TIMESTAMP_NOT_FINITE(timestamp))
2229 ereport(ERROR,
2230 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2231 errmsg("timestamp out of range"),
2232 errdetail("XML does not support infinite timestamp values.")));
2233 else if (timestamp2tm(timestamp, NULL, &tm, &fsec, NULL, NULL) == 0)
2234 EncodeDateTime(&tm, fsec, false, 0, NULL, USE_XSD_DATES, buf);
2235 else
2236 ereport(ERROR,
2237 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2238 errmsg("timestamp out of range")));
2240 return pstrdup(buf);
2243 case TIMESTAMPTZOID:
2245 TimestampTz timestamp;
2246 struct pg_tm tm;
2247 int tz;
2248 fsec_t fsec;
2249 const char *tzn = NULL;
2250 char buf[MAXDATELEN + 1];
2252 timestamp = DatumGetTimestamp(value);
2254 /* XSD doesn't support infinite values */
2255 if (TIMESTAMP_NOT_FINITE(timestamp))
2256 ereport(ERROR,
2257 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2258 errmsg("timestamp out of range"),
2259 errdetail("XML does not support infinite timestamp values.")));
2260 else if (timestamp2tm(timestamp, &tz, &tm, &fsec, &tzn, NULL) == 0)
2261 EncodeDateTime(&tm, fsec, true, tz, tzn, USE_XSD_DATES, buf);
2262 else
2263 ereport(ERROR,
2264 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2265 errmsg("timestamp out of range")));
2267 return pstrdup(buf);
2270 #ifdef USE_LIBXML
2271 case BYTEAOID:
2273 bytea *bstr = DatumGetByteaPP(value);
2274 PgXmlErrorContext *xmlerrcxt;
2275 volatile xmlBufferPtr buf = NULL;
2276 volatile xmlTextWriterPtr writer = NULL;
2277 char *result;
2279 xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
2281 PG_TRY();
2283 buf = xmlBufferCreate();
2284 if (buf == NULL || xmlerrcxt->err_occurred)
2285 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2286 "could not allocate xmlBuffer");
2287 writer = xmlNewTextWriterMemory(buf, 0);
2288 if (writer == NULL || xmlerrcxt->err_occurred)
2289 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2290 "could not allocate xmlTextWriter");
2292 if (xmlbinary == XMLBINARY_BASE64)
2293 xmlTextWriterWriteBase64(writer, VARDATA_ANY(bstr),
2294 0, VARSIZE_ANY_EXHDR(bstr));
2295 else
2296 xmlTextWriterWriteBinHex(writer, VARDATA_ANY(bstr),
2297 0, VARSIZE_ANY_EXHDR(bstr));
2299 /* we MUST do this now to flush data out to the buffer */
2300 xmlFreeTextWriter(writer);
2301 writer = NULL;
2303 result = pstrdup((const char *) xmlBufferContent(buf));
2305 PG_CATCH();
2307 if (writer)
2308 xmlFreeTextWriter(writer);
2309 if (buf)
2310 xmlBufferFree(buf);
2312 pg_xml_done(xmlerrcxt, true);
2314 PG_RE_THROW();
2316 PG_END_TRY();
2318 xmlBufferFree(buf);
2320 pg_xml_done(xmlerrcxt, false);
2322 return result;
2324 #endif /* USE_LIBXML */
2329 * otherwise, just use the type's native text representation
2331 getTypeOutputInfo(type, &typeOut, &isvarlena);
2332 str = OidOutputFunctionCall(typeOut, value);
2334 /* ... exactly as-is for XML, and when escaping is not wanted */
2335 if (type == XMLOID || !xml_escape_strings)
2336 return str;
2338 /* otherwise, translate special characters as needed */
2339 return escape_xml(str);
2345 * Escape characters in text that have special meanings in XML.
2347 * Returns a palloc'd string.
2349 * NB: this is intentionally not dependent on libxml.
2351 char *
2352 escape_xml(const char *str)
2354 StringInfoData buf;
2355 const char *p;
2357 initStringInfo(&buf);
2358 for (p = str; *p; p++)
2360 switch (*p)
2362 case '&':
2363 appendStringInfoString(&buf, "&amp;");
2364 break;
2365 case '<':
2366 appendStringInfoString(&buf, "&lt;");
2367 break;
2368 case '>':
2369 appendStringInfoString(&buf, "&gt;");
2370 break;
2371 case '\r':
2372 appendStringInfoString(&buf, "&#x0d;");
2373 break;
2374 default:
2375 appendStringInfoCharMacro(&buf, *p);
2376 break;
2379 return buf.data;
2383 static char *
2384 _SPI_strdup(const char *s)
2386 size_t len = strlen(s) + 1;
2387 char *ret = SPI_palloc(len);
2389 memcpy(ret, s, len);
2390 return ret;
2395 * SQL to XML mapping functions
2397 * What follows below was at one point intentionally organized so that
2398 * you can read along in the SQL/XML standard. The functions are
2399 * mostly split up the way the clauses lay out in the standards
2400 * document, and the identifiers are also aligned with the standard
2401 * text. Unfortunately, SQL/XML:2006 reordered the clauses
2402 * differently than SQL/XML:2003, so the order below doesn't make much
2403 * sense anymore.
2405 * There are many things going on there:
2407 * There are two kinds of mappings: Mapping SQL data (table contents)
2408 * to XML documents, and mapping SQL structure (the "schema") to XML
2409 * Schema. And there are functions that do both at the same time.
2411 * Then you can map a database, a schema, or a table, each in both
2412 * ways. This breaks down recursively: Mapping a database invokes
2413 * mapping schemas, which invokes mapping tables, which invokes
2414 * mapping rows, which invokes mapping columns, although you can't
2415 * call the last two from the outside. Because of this, there are a
2416 * number of xyz_internal() functions which are to be called both from
2417 * the function manager wrapper and from some upper layer in a
2418 * recursive call.
2420 * See the documentation about what the common function arguments
2421 * nulls, tableforest, and targetns mean.
2423 * Some style guidelines for XML output: Use double quotes for quoting
2424 * XML attributes. Indent XML elements by two spaces, but remember
2425 * that a lot of code is called recursively at different levels, so
2426 * it's better not to indent rather than create output that indents
2427 * and outdents weirdly. Add newlines to make the output look nice.
2432 * Visibility of objects for XML mappings; see SQL/XML:2008 section
2433 * 4.10.8.
2437 * Given a query, which must return type oid as first column, produce
2438 * a list of Oids with the query results.
2440 static List *
2441 query_to_oid_list(const char *query)
2443 uint64 i;
2444 List *list = NIL;
2445 int spi_result;
2447 spi_result = SPI_execute(query, true, 0);
2448 if (spi_result != SPI_OK_SELECT)
2449 elog(ERROR, "SPI_execute returned %s for %s",
2450 SPI_result_code_string(spi_result), query);
2452 for (i = 0; i < SPI_processed; i++)
2454 Datum oid;
2455 bool isnull;
2457 oid = SPI_getbinval(SPI_tuptable->vals[i],
2458 SPI_tuptable->tupdesc,
2460 &isnull);
2461 if (!isnull)
2462 list = lappend_oid(list, DatumGetObjectId(oid));
2465 return list;
2469 static List *
2470 schema_get_xml_visible_tables(Oid nspid)
2472 StringInfoData query;
2474 initStringInfo(&query);
2475 appendStringInfo(&query, "SELECT oid FROM pg_catalog.pg_class"
2476 " WHERE relnamespace = %u AND relkind IN ("
2477 CppAsString2(RELKIND_RELATION) ","
2478 CppAsString2(RELKIND_MATVIEW) ","
2479 CppAsString2(RELKIND_VIEW) ")"
2480 " AND pg_catalog.has_table_privilege (oid, 'SELECT')"
2481 " ORDER BY relname;", nspid);
2483 return query_to_oid_list(query.data);
2488 * Including the system schemas is probably not useful for a database
2489 * mapping.
2491 #define XML_VISIBLE_SCHEMAS_EXCLUDE "(nspname ~ '^pg_' OR nspname = 'information_schema')"
2493 #define XML_VISIBLE_SCHEMAS "SELECT oid FROM pg_catalog.pg_namespace WHERE pg_catalog.has_schema_privilege (oid, 'USAGE') AND NOT " XML_VISIBLE_SCHEMAS_EXCLUDE
2496 static List *
2497 database_get_xml_visible_schemas(void)
2499 return query_to_oid_list(XML_VISIBLE_SCHEMAS " ORDER BY nspname;");
2503 static List *
2504 database_get_xml_visible_tables(void)
2506 /* At the moment there is no order required here. */
2507 return query_to_oid_list("SELECT oid FROM pg_catalog.pg_class"
2508 " WHERE relkind IN ("
2509 CppAsString2(RELKIND_RELATION) ","
2510 CppAsString2(RELKIND_MATVIEW) ","
2511 CppAsString2(RELKIND_VIEW) ")"
2512 " AND pg_catalog.has_table_privilege(pg_class.oid, 'SELECT')"
2513 " AND relnamespace IN (" XML_VISIBLE_SCHEMAS ");");
2518 * Map SQL table to XML and/or XML Schema document; see SQL/XML:2008
2519 * section 9.11.
2522 static StringInfo
2523 table_to_xml_internal(Oid relid,
2524 const char *xmlschema, bool nulls, bool tableforest,
2525 const char *targetns, bool top_level)
2527 StringInfoData query;
2529 initStringInfo(&query);
2530 appendStringInfo(&query, "SELECT * FROM %s",
2531 DatumGetCString(DirectFunctionCall1(regclassout,
2532 ObjectIdGetDatum(relid))));
2533 return query_to_xml_internal(query.data, get_rel_name(relid),
2534 xmlschema, nulls, tableforest,
2535 targetns, top_level);
2539 Datum
2540 table_to_xml(PG_FUNCTION_ARGS)
2542 Oid relid = PG_GETARG_OID(0);
2543 bool nulls = PG_GETARG_BOOL(1);
2544 bool tableforest = PG_GETARG_BOOL(2);
2545 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2547 PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid, NULL,
2548 nulls, tableforest,
2549 targetns, true)));
2553 Datum
2554 query_to_xml(PG_FUNCTION_ARGS)
2556 char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2557 bool nulls = PG_GETARG_BOOL(1);
2558 bool tableforest = PG_GETARG_BOOL(2);
2559 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2561 PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
2562 NULL, nulls, tableforest,
2563 targetns, true)));
2567 Datum
2568 cursor_to_xml(PG_FUNCTION_ARGS)
2570 char *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
2571 int32 count = PG_GETARG_INT32(1);
2572 bool nulls = PG_GETARG_BOOL(2);
2573 bool tableforest = PG_GETARG_BOOL(3);
2574 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(4));
2576 StringInfoData result;
2577 Portal portal;
2578 uint64 i;
2580 initStringInfo(&result);
2582 if (!tableforest)
2584 xmldata_root_element_start(&result, "table", NULL, targetns, true);
2585 appendStringInfoChar(&result, '\n');
2588 SPI_connect();
2589 portal = SPI_cursor_find(name);
2590 if (portal == NULL)
2591 ereport(ERROR,
2592 (errcode(ERRCODE_UNDEFINED_CURSOR),
2593 errmsg("cursor \"%s\" does not exist", name)));
2595 SPI_cursor_fetch(portal, true, count);
2596 for (i = 0; i < SPI_processed; i++)
2597 SPI_sql_row_to_xmlelement(i, &result, NULL, nulls,
2598 tableforest, targetns, true);
2600 SPI_finish();
2602 if (!tableforest)
2603 xmldata_root_element_end(&result, "table");
2605 PG_RETURN_XML_P(stringinfo_to_xmltype(&result));
2610 * Write the start tag of the root element of a data mapping.
2612 * top_level means that this is the very top level of the eventual
2613 * output. For example, when the user calls table_to_xml, then a call
2614 * with a table name to this function is the top level. When the user
2615 * calls database_to_xml, then a call with a schema name to this
2616 * function is not the top level. If top_level is false, then the XML
2617 * namespace declarations are omitted, because they supposedly already
2618 * appeared earlier in the output. Repeating them is not wrong, but
2619 * it looks ugly.
2621 static void
2622 xmldata_root_element_start(StringInfo result, const char *eltname,
2623 const char *xmlschema, const char *targetns,
2624 bool top_level)
2626 /* This isn't really wrong but currently makes no sense. */
2627 Assert(top_level || !xmlschema);
2629 appendStringInfo(result, "<%s", eltname);
2630 if (top_level)
2632 appendStringInfoString(result, " xmlns:xsi=\"" NAMESPACE_XSI "\"");
2633 if (strlen(targetns) > 0)
2634 appendStringInfo(result, " xmlns=\"%s\"", targetns);
2636 if (xmlschema)
2638 /* FIXME: better targets */
2639 if (strlen(targetns) > 0)
2640 appendStringInfo(result, " xsi:schemaLocation=\"%s #\"", targetns);
2641 else
2642 appendStringInfoString(result, " xsi:noNamespaceSchemaLocation=\"#\"");
2644 appendStringInfoString(result, ">\n");
2648 static void
2649 xmldata_root_element_end(StringInfo result, const char *eltname)
2651 appendStringInfo(result, "</%s>\n", eltname);
2655 static StringInfo
2656 query_to_xml_internal(const char *query, char *tablename,
2657 const char *xmlschema, bool nulls, bool tableforest,
2658 const char *targetns, bool top_level)
2660 StringInfo result;
2661 char *xmltn;
2662 uint64 i;
2664 if (tablename)
2665 xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
2666 else
2667 xmltn = "table";
2669 result = makeStringInfo();
2671 SPI_connect();
2672 if (SPI_execute(query, true, 0) != SPI_OK_SELECT)
2673 ereport(ERROR,
2674 (errcode(ERRCODE_DATA_EXCEPTION),
2675 errmsg("invalid query")));
2677 if (!tableforest)
2679 xmldata_root_element_start(result, xmltn, xmlschema,
2680 targetns, top_level);
2681 appendStringInfoChar(result, '\n');
2684 if (xmlschema)
2685 appendStringInfo(result, "%s\n\n", xmlschema);
2687 for (i = 0; i < SPI_processed; i++)
2688 SPI_sql_row_to_xmlelement(i, result, tablename, nulls,
2689 tableforest, targetns, top_level);
2691 if (!tableforest)
2692 xmldata_root_element_end(result, xmltn);
2694 SPI_finish();
2696 return result;
2700 Datum
2701 table_to_xmlschema(PG_FUNCTION_ARGS)
2703 Oid relid = PG_GETARG_OID(0);
2704 bool nulls = PG_GETARG_BOOL(1);
2705 bool tableforest = PG_GETARG_BOOL(2);
2706 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2707 const char *result;
2708 Relation rel;
2710 rel = table_open(relid, AccessShareLock);
2711 result = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
2712 tableforest, targetns);
2713 table_close(rel, NoLock);
2715 PG_RETURN_XML_P(cstring_to_xmltype(result));
2719 Datum
2720 query_to_xmlschema(PG_FUNCTION_ARGS)
2722 char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2723 bool nulls = PG_GETARG_BOOL(1);
2724 bool tableforest = PG_GETARG_BOOL(2);
2725 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2726 const char *result;
2727 SPIPlanPtr plan;
2728 Portal portal;
2730 SPI_connect();
2732 if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
2733 elog(ERROR, "SPI_prepare(\"%s\") failed", query);
2735 if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
2736 elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
2738 result = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
2739 InvalidOid, nulls,
2740 tableforest, targetns));
2741 SPI_cursor_close(portal);
2742 SPI_finish();
2744 PG_RETURN_XML_P(cstring_to_xmltype(result));
2748 Datum
2749 cursor_to_xmlschema(PG_FUNCTION_ARGS)
2751 char *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
2752 bool nulls = PG_GETARG_BOOL(1);
2753 bool tableforest = PG_GETARG_BOOL(2);
2754 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2755 const char *xmlschema;
2756 Portal portal;
2758 SPI_connect();
2759 portal = SPI_cursor_find(name);
2760 if (portal == NULL)
2761 ereport(ERROR,
2762 (errcode(ERRCODE_UNDEFINED_CURSOR),
2763 errmsg("cursor \"%s\" does not exist", name)));
2765 xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
2766 InvalidOid, nulls,
2767 tableforest, targetns));
2768 SPI_finish();
2770 PG_RETURN_XML_P(cstring_to_xmltype(xmlschema));
2774 Datum
2775 table_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
2777 Oid relid = PG_GETARG_OID(0);
2778 bool nulls = PG_GETARG_BOOL(1);
2779 bool tableforest = PG_GETARG_BOOL(2);
2780 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2781 Relation rel;
2782 const char *xmlschema;
2784 rel = table_open(relid, AccessShareLock);
2785 xmlschema = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
2786 tableforest, targetns);
2787 table_close(rel, NoLock);
2789 PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid,
2790 xmlschema, nulls, tableforest,
2791 targetns, true)));
2795 Datum
2796 query_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
2798 char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2799 bool nulls = PG_GETARG_BOOL(1);
2800 bool tableforest = PG_GETARG_BOOL(2);
2801 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2803 const char *xmlschema;
2804 SPIPlanPtr plan;
2805 Portal portal;
2807 SPI_connect();
2809 if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
2810 elog(ERROR, "SPI_prepare(\"%s\") failed", query);
2812 if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
2813 elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
2815 xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
2816 InvalidOid, nulls, tableforest, targetns));
2817 SPI_cursor_close(portal);
2818 SPI_finish();
2820 PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
2821 xmlschema, nulls, tableforest,
2822 targetns, true)));
2827 * Map SQL schema to XML and/or XML Schema document; see SQL/XML:2008
2828 * sections 9.13, 9.14.
2831 static StringInfo
2832 schema_to_xml_internal(Oid nspid, const char *xmlschema, bool nulls,
2833 bool tableforest, const char *targetns, bool top_level)
2835 StringInfo result;
2836 char *xmlsn;
2837 List *relid_list;
2838 ListCell *cell;
2840 xmlsn = map_sql_identifier_to_xml_name(get_namespace_name(nspid),
2841 true, false);
2842 result = makeStringInfo();
2844 xmldata_root_element_start(result, xmlsn, xmlschema, targetns, top_level);
2845 appendStringInfoChar(result, '\n');
2847 if (xmlschema)
2848 appendStringInfo(result, "%s\n\n", xmlschema);
2850 SPI_connect();
2852 relid_list = schema_get_xml_visible_tables(nspid);
2854 foreach(cell, relid_list)
2856 Oid relid = lfirst_oid(cell);
2857 StringInfo subres;
2859 subres = table_to_xml_internal(relid, NULL, nulls, tableforest,
2860 targetns, false);
2862 appendBinaryStringInfo(result, subres->data, subres->len);
2863 appendStringInfoChar(result, '\n');
2866 SPI_finish();
2868 xmldata_root_element_end(result, xmlsn);
2870 return result;
2874 Datum
2875 schema_to_xml(PG_FUNCTION_ARGS)
2877 Name name = PG_GETARG_NAME(0);
2878 bool nulls = PG_GETARG_BOOL(1);
2879 bool tableforest = PG_GETARG_BOOL(2);
2880 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2882 char *schemaname;
2883 Oid nspid;
2885 schemaname = NameStr(*name);
2886 nspid = LookupExplicitNamespace(schemaname, false);
2888 PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid, NULL,
2889 nulls, tableforest, targetns, true)));
2894 * Write the start element of the root element of an XML Schema mapping.
2896 static void
2897 xsd_schema_element_start(StringInfo result, const char *targetns)
2899 appendStringInfoString(result,
2900 "<xsd:schema\n"
2901 " xmlns:xsd=\"" NAMESPACE_XSD "\"");
2902 if (strlen(targetns) > 0)
2903 appendStringInfo(result,
2904 "\n"
2905 " targetNamespace=\"%s\"\n"
2906 " elementFormDefault=\"qualified\"",
2907 targetns);
2908 appendStringInfoString(result,
2909 ">\n\n");
2913 static void
2914 xsd_schema_element_end(StringInfo result)
2916 appendStringInfoString(result, "</xsd:schema>");
2920 static StringInfo
2921 schema_to_xmlschema_internal(const char *schemaname, bool nulls,
2922 bool tableforest, const char *targetns)
2924 Oid nspid;
2925 List *relid_list;
2926 List *tupdesc_list;
2927 ListCell *cell;
2928 StringInfo result;
2930 result = makeStringInfo();
2932 nspid = LookupExplicitNamespace(schemaname, false);
2934 xsd_schema_element_start(result, targetns);
2936 SPI_connect();
2938 relid_list = schema_get_xml_visible_tables(nspid);
2940 tupdesc_list = NIL;
2941 foreach(cell, relid_list)
2943 Relation rel;
2945 rel = table_open(lfirst_oid(cell), AccessShareLock);
2946 tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
2947 table_close(rel, NoLock);
2950 appendStringInfoString(result,
2951 map_sql_typecoll_to_xmlschema_types(tupdesc_list));
2953 appendStringInfoString(result,
2954 map_sql_schema_to_xmlschema_types(nspid, relid_list,
2955 nulls, tableforest, targetns));
2957 xsd_schema_element_end(result);
2959 SPI_finish();
2961 return result;
2965 Datum
2966 schema_to_xmlschema(PG_FUNCTION_ARGS)
2968 Name name = PG_GETARG_NAME(0);
2969 bool nulls = PG_GETARG_BOOL(1);
2970 bool tableforest = PG_GETARG_BOOL(2);
2971 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2973 PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xmlschema_internal(NameStr(*name),
2974 nulls, tableforest, targetns)));
2978 Datum
2979 schema_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
2981 Name name = PG_GETARG_NAME(0);
2982 bool nulls = PG_GETARG_BOOL(1);
2983 bool tableforest = PG_GETARG_BOOL(2);
2984 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2985 char *schemaname;
2986 Oid nspid;
2987 StringInfo xmlschema;
2989 schemaname = NameStr(*name);
2990 nspid = LookupExplicitNamespace(schemaname, false);
2992 xmlschema = schema_to_xmlschema_internal(schemaname, nulls,
2993 tableforest, targetns);
2995 PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid,
2996 xmlschema->data, nulls,
2997 tableforest, targetns, true)));
3002 * Map SQL database to XML and/or XML Schema document; see SQL/XML:2008
3003 * sections 9.16, 9.17.
3006 static StringInfo
3007 database_to_xml_internal(const char *xmlschema, bool nulls,
3008 bool tableforest, const char *targetns)
3010 StringInfo result;
3011 List *nspid_list;
3012 ListCell *cell;
3013 char *xmlcn;
3015 xmlcn = map_sql_identifier_to_xml_name(get_database_name(MyDatabaseId),
3016 true, false);
3017 result = makeStringInfo();
3019 xmldata_root_element_start(result, xmlcn, xmlschema, targetns, true);
3020 appendStringInfoChar(result, '\n');
3022 if (xmlschema)
3023 appendStringInfo(result, "%s\n\n", xmlschema);
3025 SPI_connect();
3027 nspid_list = database_get_xml_visible_schemas();
3029 foreach(cell, nspid_list)
3031 Oid nspid = lfirst_oid(cell);
3032 StringInfo subres;
3034 subres = schema_to_xml_internal(nspid, NULL, nulls,
3035 tableforest, targetns, false);
3037 appendBinaryStringInfo(result, subres->data, subres->len);
3038 appendStringInfoChar(result, '\n');
3041 SPI_finish();
3043 xmldata_root_element_end(result, xmlcn);
3045 return result;
3049 Datum
3050 database_to_xml(PG_FUNCTION_ARGS)
3052 bool nulls = PG_GETARG_BOOL(0);
3053 bool tableforest = PG_GETARG_BOOL(1);
3054 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3056 PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(NULL, nulls,
3057 tableforest, targetns)));
3061 static StringInfo
3062 database_to_xmlschema_internal(bool nulls, bool tableforest,
3063 const char *targetns)
3065 List *relid_list;
3066 List *nspid_list;
3067 List *tupdesc_list;
3068 ListCell *cell;
3069 StringInfo result;
3071 result = makeStringInfo();
3073 xsd_schema_element_start(result, targetns);
3075 SPI_connect();
3077 relid_list = database_get_xml_visible_tables();
3078 nspid_list = database_get_xml_visible_schemas();
3080 tupdesc_list = NIL;
3081 foreach(cell, relid_list)
3083 Relation rel;
3085 rel = table_open(lfirst_oid(cell), AccessShareLock);
3086 tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
3087 table_close(rel, NoLock);
3090 appendStringInfoString(result,
3091 map_sql_typecoll_to_xmlschema_types(tupdesc_list));
3093 appendStringInfoString(result,
3094 map_sql_catalog_to_xmlschema_types(nspid_list, nulls, tableforest, targetns));
3096 xsd_schema_element_end(result);
3098 SPI_finish();
3100 return result;
3104 Datum
3105 database_to_xmlschema(PG_FUNCTION_ARGS)
3107 bool nulls = PG_GETARG_BOOL(0);
3108 bool tableforest = PG_GETARG_BOOL(1);
3109 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3111 PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xmlschema_internal(nulls,
3112 tableforest, targetns)));
3116 Datum
3117 database_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3119 bool nulls = PG_GETARG_BOOL(0);
3120 bool tableforest = PG_GETARG_BOOL(1);
3121 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3122 StringInfo xmlschema;
3124 xmlschema = database_to_xmlschema_internal(nulls, tableforest, targetns);
3126 PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(xmlschema->data,
3127 nulls, tableforest, targetns)));
3132 * Map a multi-part SQL name to an XML name; see SQL/XML:2008 section
3133 * 9.2.
3135 static char *
3136 map_multipart_sql_identifier_to_xml_name(const char *a, const char *b, const char *c, const char *d)
3138 StringInfoData result;
3140 initStringInfo(&result);
3142 if (a)
3143 appendStringInfoString(&result,
3144 map_sql_identifier_to_xml_name(a, true, true));
3145 if (b)
3146 appendStringInfo(&result, ".%s",
3147 map_sql_identifier_to_xml_name(b, true, true));
3148 if (c)
3149 appendStringInfo(&result, ".%s",
3150 map_sql_identifier_to_xml_name(c, true, true));
3151 if (d)
3152 appendStringInfo(&result, ".%s",
3153 map_sql_identifier_to_xml_name(d, true, true));
3155 return result.data;
3160 * Map an SQL table to an XML Schema document; see SQL/XML:2008
3161 * section 9.11.
3163 * Map an SQL table to XML Schema data types; see SQL/XML:2008 section
3164 * 9.9.
3166 static const char *
3167 map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid, bool nulls,
3168 bool tableforest, const char *targetns)
3170 int i;
3171 char *xmltn;
3172 char *tabletypename;
3173 char *rowtypename;
3174 StringInfoData result;
3176 initStringInfo(&result);
3178 if (OidIsValid(relid))
3180 HeapTuple tuple;
3181 Form_pg_class reltuple;
3183 tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
3184 if (!HeapTupleIsValid(tuple))
3185 elog(ERROR, "cache lookup failed for relation %u", relid);
3186 reltuple = (Form_pg_class) GETSTRUCT(tuple);
3188 xmltn = map_sql_identifier_to_xml_name(NameStr(reltuple->relname),
3189 true, false);
3191 tabletypename = map_multipart_sql_identifier_to_xml_name("TableType",
3192 get_database_name(MyDatabaseId),
3193 get_namespace_name(reltuple->relnamespace),
3194 NameStr(reltuple->relname));
3196 rowtypename = map_multipart_sql_identifier_to_xml_name("RowType",
3197 get_database_name(MyDatabaseId),
3198 get_namespace_name(reltuple->relnamespace),
3199 NameStr(reltuple->relname));
3201 ReleaseSysCache(tuple);
3203 else
3205 if (tableforest)
3206 xmltn = "row";
3207 else
3208 xmltn = "table";
3210 tabletypename = "TableType";
3211 rowtypename = "RowType";
3214 xsd_schema_element_start(&result, targetns);
3216 appendStringInfoString(&result,
3217 map_sql_typecoll_to_xmlschema_types(list_make1(tupdesc)));
3219 appendStringInfo(&result,
3220 "<xsd:complexType name=\"%s\">\n"
3221 " <xsd:sequence>\n",
3222 rowtypename);
3224 for (i = 0; i < tupdesc->natts; i++)
3226 Form_pg_attribute att = TupleDescAttr(tupdesc, i);
3228 if (att->attisdropped)
3229 continue;
3230 appendStringInfo(&result,
3231 " <xsd:element name=\"%s\" type=\"%s\"%s></xsd:element>\n",
3232 map_sql_identifier_to_xml_name(NameStr(att->attname),
3233 true, false),
3234 map_sql_type_to_xml_name(att->atttypid, -1),
3235 nulls ? " nillable=\"true\"" : " minOccurs=\"0\"");
3238 appendStringInfoString(&result,
3239 " </xsd:sequence>\n"
3240 "</xsd:complexType>\n\n");
3242 if (!tableforest)
3244 appendStringInfo(&result,
3245 "<xsd:complexType name=\"%s\">\n"
3246 " <xsd:sequence>\n"
3247 " <xsd:element name=\"row\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n"
3248 " </xsd:sequence>\n"
3249 "</xsd:complexType>\n\n",
3250 tabletypename, rowtypename);
3252 appendStringInfo(&result,
3253 "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3254 xmltn, tabletypename);
3256 else
3257 appendStringInfo(&result,
3258 "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3259 xmltn, rowtypename);
3261 xsd_schema_element_end(&result);
3263 return result.data;
3268 * Map an SQL schema to XML Schema data types; see SQL/XML:2008
3269 * section 9.12.
3271 static const char *
3272 map_sql_schema_to_xmlschema_types(Oid nspid, List *relid_list, bool nulls,
3273 bool tableforest, const char *targetns)
3275 char *dbname;
3276 char *nspname;
3277 char *xmlsn;
3278 char *schematypename;
3279 StringInfoData result;
3280 ListCell *cell;
3282 dbname = get_database_name(MyDatabaseId);
3283 nspname = get_namespace_name(nspid);
3285 initStringInfo(&result);
3287 xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3289 schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3290 dbname,
3291 nspname,
3292 NULL);
3294 appendStringInfo(&result,
3295 "<xsd:complexType name=\"%s\">\n", schematypename);
3296 if (!tableforest)
3297 appendStringInfoString(&result,
3298 " <xsd:all>\n");
3299 else
3300 appendStringInfoString(&result,
3301 " <xsd:sequence>\n");
3303 foreach(cell, relid_list)
3305 Oid relid = lfirst_oid(cell);
3306 char *relname = get_rel_name(relid);
3307 char *xmltn = map_sql_identifier_to_xml_name(relname, true, false);
3308 char *tabletypename = map_multipart_sql_identifier_to_xml_name(tableforest ? "RowType" : "TableType",
3309 dbname,
3310 nspname,
3311 relname);
3313 if (!tableforest)
3314 appendStringInfo(&result,
3315 " <xsd:element name=\"%s\" type=\"%s\"/>\n",
3316 xmltn, tabletypename);
3317 else
3318 appendStringInfo(&result,
3319 " <xsd:element name=\"%s\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n",
3320 xmltn, tabletypename);
3323 if (!tableforest)
3324 appendStringInfoString(&result,
3325 " </xsd:all>\n");
3326 else
3327 appendStringInfoString(&result,
3328 " </xsd:sequence>\n");
3329 appendStringInfoString(&result,
3330 "</xsd:complexType>\n\n");
3332 appendStringInfo(&result,
3333 "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3334 xmlsn, schematypename);
3336 return result.data;
3341 * Map an SQL catalog to XML Schema data types; see SQL/XML:2008
3342 * section 9.15.
3344 static const char *
3345 map_sql_catalog_to_xmlschema_types(List *nspid_list, bool nulls,
3346 bool tableforest, const char *targetns)
3348 char *dbname;
3349 char *xmlcn;
3350 char *catalogtypename;
3351 StringInfoData result;
3352 ListCell *cell;
3354 dbname = get_database_name(MyDatabaseId);
3356 initStringInfo(&result);
3358 xmlcn = map_sql_identifier_to_xml_name(dbname, true, false);
3360 catalogtypename = map_multipart_sql_identifier_to_xml_name("CatalogType",
3361 dbname,
3362 NULL,
3363 NULL);
3365 appendStringInfo(&result,
3366 "<xsd:complexType name=\"%s\">\n", catalogtypename);
3367 appendStringInfoString(&result,
3368 " <xsd:all>\n");
3370 foreach(cell, nspid_list)
3372 Oid nspid = lfirst_oid(cell);
3373 char *nspname = get_namespace_name(nspid);
3374 char *xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3375 char *schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3376 dbname,
3377 nspname,
3378 NULL);
3380 appendStringInfo(&result,
3381 " <xsd:element name=\"%s\" type=\"%s\"/>\n",
3382 xmlsn, schematypename);
3385 appendStringInfoString(&result,
3386 " </xsd:all>\n");
3387 appendStringInfoString(&result,
3388 "</xsd:complexType>\n\n");
3390 appendStringInfo(&result,
3391 "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3392 xmlcn, catalogtypename);
3394 return result.data;
3399 * Map an SQL data type to an XML name; see SQL/XML:2008 section 9.4.
3401 static const char *
3402 map_sql_type_to_xml_name(Oid typeoid, int typmod)
3404 StringInfoData result;
3406 initStringInfo(&result);
3408 switch (typeoid)
3410 case BPCHAROID:
3411 if (typmod == -1)
3412 appendStringInfoString(&result, "CHAR");
3413 else
3414 appendStringInfo(&result, "CHAR_%d", typmod - VARHDRSZ);
3415 break;
3416 case VARCHAROID:
3417 if (typmod == -1)
3418 appendStringInfoString(&result, "VARCHAR");
3419 else
3420 appendStringInfo(&result, "VARCHAR_%d", typmod - VARHDRSZ);
3421 break;
3422 case NUMERICOID:
3423 if (typmod == -1)
3424 appendStringInfoString(&result, "NUMERIC");
3425 else
3426 appendStringInfo(&result, "NUMERIC_%d_%d",
3427 ((typmod - VARHDRSZ) >> 16) & 0xffff,
3428 (typmod - VARHDRSZ) & 0xffff);
3429 break;
3430 case INT4OID:
3431 appendStringInfoString(&result, "INTEGER");
3432 break;
3433 case INT2OID:
3434 appendStringInfoString(&result, "SMALLINT");
3435 break;
3436 case INT8OID:
3437 appendStringInfoString(&result, "BIGINT");
3438 break;
3439 case FLOAT4OID:
3440 appendStringInfoString(&result, "REAL");
3441 break;
3442 case FLOAT8OID:
3443 appendStringInfoString(&result, "DOUBLE");
3444 break;
3445 case BOOLOID:
3446 appendStringInfoString(&result, "BOOLEAN");
3447 break;
3448 case TIMEOID:
3449 if (typmod == -1)
3450 appendStringInfoString(&result, "TIME");
3451 else
3452 appendStringInfo(&result, "TIME_%d", typmod);
3453 break;
3454 case TIMETZOID:
3455 if (typmod == -1)
3456 appendStringInfoString(&result, "TIME_WTZ");
3457 else
3458 appendStringInfo(&result, "TIME_WTZ_%d", typmod);
3459 break;
3460 case TIMESTAMPOID:
3461 if (typmod == -1)
3462 appendStringInfoString(&result, "TIMESTAMP");
3463 else
3464 appendStringInfo(&result, "TIMESTAMP_%d", typmod);
3465 break;
3466 case TIMESTAMPTZOID:
3467 if (typmod == -1)
3468 appendStringInfoString(&result, "TIMESTAMP_WTZ");
3469 else
3470 appendStringInfo(&result, "TIMESTAMP_WTZ_%d", typmod);
3471 break;
3472 case DATEOID:
3473 appendStringInfoString(&result, "DATE");
3474 break;
3475 case XMLOID:
3476 appendStringInfoString(&result, "XML");
3477 break;
3478 default:
3480 HeapTuple tuple;
3481 Form_pg_type typtuple;
3483 tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typeoid));
3484 if (!HeapTupleIsValid(tuple))
3485 elog(ERROR, "cache lookup failed for type %u", typeoid);
3486 typtuple = (Form_pg_type) GETSTRUCT(tuple);
3488 appendStringInfoString(&result,
3489 map_multipart_sql_identifier_to_xml_name((typtuple->typtype == TYPTYPE_DOMAIN) ? "Domain" : "UDT",
3490 get_database_name(MyDatabaseId),
3491 get_namespace_name(typtuple->typnamespace),
3492 NameStr(typtuple->typname)));
3494 ReleaseSysCache(tuple);
3498 return result.data;
3503 * Map a collection of SQL data types to XML Schema data types; see
3504 * SQL/XML:2008 section 9.7.
3506 static const char *
3507 map_sql_typecoll_to_xmlschema_types(List *tupdesc_list)
3509 List *uniquetypes = NIL;
3510 int i;
3511 StringInfoData result;
3512 ListCell *cell0;
3514 /* extract all column types used in the set of TupleDescs */
3515 foreach(cell0, tupdesc_list)
3517 TupleDesc tupdesc = (TupleDesc) lfirst(cell0);
3519 for (i = 0; i < tupdesc->natts; i++)
3521 Form_pg_attribute att = TupleDescAttr(tupdesc, i);
3523 if (att->attisdropped)
3524 continue;
3525 uniquetypes = list_append_unique_oid(uniquetypes, att->atttypid);
3529 /* add base types of domains */
3530 foreach(cell0, uniquetypes)
3532 Oid typid = lfirst_oid(cell0);
3533 Oid basetypid = getBaseType(typid);
3535 if (basetypid != typid)
3536 uniquetypes = list_append_unique_oid(uniquetypes, basetypid);
3539 /* Convert to textual form */
3540 initStringInfo(&result);
3542 foreach(cell0, uniquetypes)
3544 appendStringInfo(&result, "%s\n",
3545 map_sql_type_to_xmlschema_type(lfirst_oid(cell0),
3546 -1));
3549 return result.data;
3554 * Map an SQL data type to a named XML Schema data type; see
3555 * SQL/XML:2008 sections 9.5 and 9.6.
3557 * (The distinction between 9.5 and 9.6 is basically that 9.6 adds
3558 * a name attribute, which this function does. The name-less version
3559 * 9.5 doesn't appear to be required anywhere.)
3561 static const char *
3562 map_sql_type_to_xmlschema_type(Oid typeoid, int typmod)
3564 StringInfoData result;
3565 const char *typename = map_sql_type_to_xml_name(typeoid, typmod);
3567 initStringInfo(&result);
3569 if (typeoid == XMLOID)
3571 appendStringInfoString(&result,
3572 "<xsd:complexType mixed=\"true\">\n"
3573 " <xsd:sequence>\n"
3574 " <xsd:any name=\"element\" minOccurs=\"0\" maxOccurs=\"unbounded\" processContents=\"skip\"/>\n"
3575 " </xsd:sequence>\n"
3576 "</xsd:complexType>\n");
3578 else
3580 appendStringInfo(&result,
3581 "<xsd:simpleType name=\"%s\">\n", typename);
3583 switch (typeoid)
3585 case BPCHAROID:
3586 case VARCHAROID:
3587 case TEXTOID:
3588 appendStringInfoString(&result,
3589 " <xsd:restriction base=\"xsd:string\">\n");
3590 if (typmod != -1)
3591 appendStringInfo(&result,
3592 " <xsd:maxLength value=\"%d\"/>\n",
3593 typmod - VARHDRSZ);
3594 appendStringInfoString(&result, " </xsd:restriction>\n");
3595 break;
3597 case BYTEAOID:
3598 appendStringInfo(&result,
3599 " <xsd:restriction base=\"xsd:%s\">\n"
3600 " </xsd:restriction>\n",
3601 xmlbinary == XMLBINARY_BASE64 ? "base64Binary" : "hexBinary");
3602 break;
3604 case NUMERICOID:
3605 if (typmod != -1)
3606 appendStringInfo(&result,
3607 " <xsd:restriction base=\"xsd:decimal\">\n"
3608 " <xsd:totalDigits value=\"%d\"/>\n"
3609 " <xsd:fractionDigits value=\"%d\"/>\n"
3610 " </xsd:restriction>\n",
3611 ((typmod - VARHDRSZ) >> 16) & 0xffff,
3612 (typmod - VARHDRSZ) & 0xffff);
3613 break;
3615 case INT2OID:
3616 appendStringInfo(&result,
3617 " <xsd:restriction base=\"xsd:short\">\n"
3618 " <xsd:maxInclusive value=\"%d\"/>\n"
3619 " <xsd:minInclusive value=\"%d\"/>\n"
3620 " </xsd:restriction>\n",
3621 SHRT_MAX, SHRT_MIN);
3622 break;
3624 case INT4OID:
3625 appendStringInfo(&result,
3626 " <xsd:restriction base=\"xsd:int\">\n"
3627 " <xsd:maxInclusive value=\"%d\"/>\n"
3628 " <xsd:minInclusive value=\"%d\"/>\n"
3629 " </xsd:restriction>\n",
3630 INT_MAX, INT_MIN);
3631 break;
3633 case INT8OID:
3634 appendStringInfo(&result,
3635 " <xsd:restriction base=\"xsd:long\">\n"
3636 " <xsd:maxInclusive value=\"" INT64_FORMAT "\"/>\n"
3637 " <xsd:minInclusive value=\"" INT64_FORMAT "\"/>\n"
3638 " </xsd:restriction>\n",
3639 PG_INT64_MAX,
3640 PG_INT64_MIN);
3641 break;
3643 case FLOAT4OID:
3644 appendStringInfoString(&result,
3645 " <xsd:restriction base=\"xsd:float\"></xsd:restriction>\n");
3646 break;
3648 case FLOAT8OID:
3649 appendStringInfoString(&result,
3650 " <xsd:restriction base=\"xsd:double\"></xsd:restriction>\n");
3651 break;
3653 case BOOLOID:
3654 appendStringInfoString(&result,
3655 " <xsd:restriction base=\"xsd:boolean\"></xsd:restriction>\n");
3656 break;
3658 case TIMEOID:
3659 case TIMETZOID:
3661 const char *tz = (typeoid == TIMETZOID ? "(\\+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
3663 if (typmod == -1)
3664 appendStringInfo(&result,
3665 " <xsd:restriction base=\"xsd:time\">\n"
3666 " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
3667 " </xsd:restriction>\n", tz);
3668 else if (typmod == 0)
3669 appendStringInfo(&result,
3670 " <xsd:restriction base=\"xsd:time\">\n"
3671 " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
3672 " </xsd:restriction>\n", tz);
3673 else
3674 appendStringInfo(&result,
3675 " <xsd:restriction base=\"xsd:time\">\n"
3676 " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
3677 " </xsd:restriction>\n", typmod - VARHDRSZ, tz);
3678 break;
3681 case TIMESTAMPOID:
3682 case TIMESTAMPTZOID:
3684 const char *tz = (typeoid == TIMESTAMPTZOID ? "(\\+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
3686 if (typmod == -1)
3687 appendStringInfo(&result,
3688 " <xsd:restriction base=\"xsd:dateTime\">\n"
3689 " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
3690 " </xsd:restriction>\n", tz);
3691 else if (typmod == 0)
3692 appendStringInfo(&result,
3693 " <xsd:restriction base=\"xsd:dateTime\">\n"
3694 " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
3695 " </xsd:restriction>\n", tz);
3696 else
3697 appendStringInfo(&result,
3698 " <xsd:restriction base=\"xsd:dateTime\">\n"
3699 " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
3700 " </xsd:restriction>\n", typmod - VARHDRSZ, tz);
3701 break;
3704 case DATEOID:
3705 appendStringInfoString(&result,
3706 " <xsd:restriction base=\"xsd:date\">\n"
3707 " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}\"/>\n"
3708 " </xsd:restriction>\n");
3709 break;
3711 default:
3712 if (get_typtype(typeoid) == TYPTYPE_DOMAIN)
3714 Oid base_typeoid;
3715 int32 base_typmod = -1;
3717 base_typeoid = getBaseTypeAndTypmod(typeoid, &base_typmod);
3719 appendStringInfo(&result,
3720 " <xsd:restriction base=\"%s\"/>\n",
3721 map_sql_type_to_xml_name(base_typeoid, base_typmod));
3723 break;
3725 appendStringInfoString(&result, "</xsd:simpleType>\n");
3728 return result.data;
3733 * Map an SQL row to an XML element, taking the row from the active
3734 * SPI cursor. See also SQL/XML:2008 section 9.10.
3736 static void
3737 SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result, char *tablename,
3738 bool nulls, bool tableforest,
3739 const char *targetns, bool top_level)
3741 int i;
3742 char *xmltn;
3744 if (tablename)
3745 xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
3746 else
3748 if (tableforest)
3749 xmltn = "row";
3750 else
3751 xmltn = "table";
3754 if (tableforest)
3755 xmldata_root_element_start(result, xmltn, NULL, targetns, top_level);
3756 else
3757 appendStringInfoString(result, "<row>\n");
3759 for (i = 1; i <= SPI_tuptable->tupdesc->natts; i++)
3761 char *colname;
3762 Datum colval;
3763 bool isnull;
3765 colname = map_sql_identifier_to_xml_name(SPI_fname(SPI_tuptable->tupdesc, i),
3766 true, false);
3767 colval = SPI_getbinval(SPI_tuptable->vals[rownum],
3768 SPI_tuptable->tupdesc,
3770 &isnull);
3771 if (isnull)
3773 if (nulls)
3774 appendStringInfo(result, " <%s xsi:nil=\"true\"/>\n", colname);
3776 else
3777 appendStringInfo(result, " <%s>%s</%s>\n",
3778 colname,
3779 map_sql_value_to_xml_value(colval,
3780 SPI_gettypeid(SPI_tuptable->tupdesc, i), true),
3781 colname);
3784 if (tableforest)
3786 xmldata_root_element_end(result, xmltn);
3787 appendStringInfoChar(result, '\n');
3789 else
3790 appendStringInfoString(result, "</row>\n\n");
3795 * XPath related functions
3798 #ifdef USE_LIBXML
3801 * Convert XML node to text.
3803 * For attribute and text nodes, return the escaped text. For anything else,
3804 * dump the whole subtree.
3806 static text *
3807 xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt)
3809 xmltype *result = NULL;
3811 if (cur->type != XML_ATTRIBUTE_NODE && cur->type != XML_TEXT_NODE)
3813 void (*volatile nodefree) (xmlNodePtr) = NULL;
3814 volatile xmlBufferPtr buf = NULL;
3815 volatile xmlNodePtr cur_copy = NULL;
3817 PG_TRY();
3819 int bytes;
3821 buf = xmlBufferCreate();
3822 if (buf == NULL || xmlerrcxt->err_occurred)
3823 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
3824 "could not allocate xmlBuffer");
3827 * Produce a dump of the node that we can serialize. xmlNodeDump
3828 * does that, but the result of that function won't contain
3829 * namespace definitions from ancestor nodes, so we first do a
3830 * xmlCopyNode() which duplicates the node along with its required
3831 * namespace definitions.
3833 * Some old libxml2 versions such as 2.7.6 produce partially
3834 * broken XML_DOCUMENT_NODE nodes (unset content field) when
3835 * copying them. xmlNodeDump of such a node works fine, but
3836 * xmlFreeNode crashes; set us up to call xmlFreeDoc instead.
3838 cur_copy = xmlCopyNode(cur, 1);
3839 if (cur_copy == NULL || xmlerrcxt->err_occurred)
3840 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
3841 "could not copy node");
3842 nodefree = (cur_copy->type == XML_DOCUMENT_NODE) ?
3843 (void (*) (xmlNodePtr)) xmlFreeDoc : xmlFreeNode;
3845 bytes = xmlNodeDump(buf, NULL, cur_copy, 0, 0);
3846 if (bytes == -1 || xmlerrcxt->err_occurred)
3847 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
3848 "could not dump node");
3850 result = xmlBuffer_to_xmltype(buf);
3852 PG_FINALLY();
3854 if (nodefree)
3855 nodefree(cur_copy);
3856 if (buf)
3857 xmlBufferFree(buf);
3859 PG_END_TRY();
3861 else
3863 xmlChar *str;
3865 str = xmlXPathCastNodeToString(cur);
3866 PG_TRY();
3868 /* Here we rely on XML having the same representation as TEXT */
3869 char *escaped = escape_xml((char *) str);
3871 result = (xmltype *) cstring_to_text(escaped);
3872 pfree(escaped);
3874 PG_FINALLY();
3876 xmlFree(str);
3878 PG_END_TRY();
3881 return result;
3885 * Convert an XML XPath object (the result of evaluating an XPath expression)
3886 * to an array of xml values, which are appended to astate. The function
3887 * result value is the number of elements in the array.
3889 * If "astate" is NULL then we don't generate the array value, but we still
3890 * return the number of elements it would have had.
3892 * Nodesets are converted to an array containing the nodes' textual
3893 * representations. Primitive values (float, double, string) are converted
3894 * to a single-element array containing the value's string representation.
3896 static int
3897 xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
3898 ArrayBuildState *astate,
3899 PgXmlErrorContext *xmlerrcxt)
3901 int result = 0;
3902 Datum datum;
3903 Oid datumtype;
3904 char *result_str;
3906 switch (xpathobj->type)
3908 case XPATH_NODESET:
3909 if (xpathobj->nodesetval != NULL)
3911 result = xpathobj->nodesetval->nodeNr;
3912 if (astate != NULL)
3914 int i;
3916 for (i = 0; i < result; i++)
3918 datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
3919 xmlerrcxt));
3920 (void) accumArrayResult(astate, datum, false,
3921 XMLOID, CurrentMemoryContext);
3925 return result;
3927 case XPATH_BOOLEAN:
3928 if (astate == NULL)
3929 return 1;
3930 datum = BoolGetDatum(xpathobj->boolval);
3931 datumtype = BOOLOID;
3932 break;
3934 case XPATH_NUMBER:
3935 if (astate == NULL)
3936 return 1;
3937 datum = Float8GetDatum(xpathobj->floatval);
3938 datumtype = FLOAT8OID;
3939 break;
3941 case XPATH_STRING:
3942 if (astate == NULL)
3943 return 1;
3944 datum = CStringGetDatum((char *) xpathobj->stringval);
3945 datumtype = CSTRINGOID;
3946 break;
3948 default:
3949 elog(ERROR, "xpath expression result type %d is unsupported",
3950 xpathobj->type);
3951 return 0; /* keep compiler quiet */
3954 /* Common code for scalar-value cases */
3955 result_str = map_sql_value_to_xml_value(datum, datumtype, true);
3956 datum = PointerGetDatum(cstring_to_xmltype(result_str));
3957 (void) accumArrayResult(astate, datum, false,
3958 XMLOID, CurrentMemoryContext);
3959 return 1;
3964 * Common code for xpath() and xmlexists()
3966 * Evaluate XPath expression and return number of nodes in res_nitems
3967 * and array of XML values in astate. Either of those pointers can be
3968 * NULL if the corresponding result isn't wanted.
3970 * It is up to the user to ensure that the XML passed is in fact
3971 * an XML document - XPath doesn't work easily on fragments without
3972 * a context node being known.
3974 static void
3975 xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
3976 int *res_nitems, ArrayBuildState *astate)
3978 PgXmlErrorContext *xmlerrcxt;
3979 volatile xmlParserCtxtPtr ctxt = NULL;
3980 volatile xmlDocPtr doc = NULL;
3981 volatile xmlXPathContextPtr xpathctx = NULL;
3982 volatile xmlXPathCompExprPtr xpathcomp = NULL;
3983 volatile xmlXPathObjectPtr xpathobj = NULL;
3984 char *datastr;
3985 int32 len;
3986 int32 xpath_len;
3987 xmlChar *string;
3988 xmlChar *xpath_expr;
3989 size_t xmldecl_len = 0;
3990 int i;
3991 int ndim;
3992 Datum *ns_names_uris;
3993 bool *ns_names_uris_nulls;
3994 int ns_count;
3997 * Namespace mappings are passed as text[]. If an empty array is passed
3998 * (ndim = 0, "0-dimensional"), then there are no namespace mappings.
3999 * Else, a 2-dimensional array with length of the second axis being equal
4000 * to 2 should be passed, i.e., every subarray contains 2 elements, the
4001 * first element defining the name, the second one the URI. Example:
4002 * ARRAY[ARRAY['myns', 'http://example.com'], ARRAY['myns2',
4003 * 'http://example2.com']].
4005 ndim = namespaces ? ARR_NDIM(namespaces) : 0;
4006 if (ndim != 0)
4008 int *dims;
4010 dims = ARR_DIMS(namespaces);
4012 if (ndim != 2 || dims[1] != 2)
4013 ereport(ERROR,
4014 (errcode(ERRCODE_DATA_EXCEPTION),
4015 errmsg("invalid array for XML namespace mapping"),
4016 errdetail("The array must be two-dimensional with length of the second axis equal to 2.")));
4018 Assert(ARR_ELEMTYPE(namespaces) == TEXTOID);
4020 deconstruct_array_builtin(namespaces, TEXTOID,
4021 &ns_names_uris, &ns_names_uris_nulls,
4022 &ns_count);
4024 Assert((ns_count % 2) == 0); /* checked above */
4025 ns_count /= 2; /* count pairs only */
4027 else
4029 ns_names_uris = NULL;
4030 ns_names_uris_nulls = NULL;
4031 ns_count = 0;
4034 datastr = VARDATA(data);
4035 len = VARSIZE(data) - VARHDRSZ;
4036 xpath_len = VARSIZE_ANY_EXHDR(xpath_expr_text);
4037 if (xpath_len == 0)
4038 ereport(ERROR,
4039 (errcode(ERRCODE_DATA_EXCEPTION),
4040 errmsg("empty XPath expression")));
4042 string = pg_xmlCharStrndup(datastr, len);
4043 xpath_expr = pg_xmlCharStrndup(VARDATA_ANY(xpath_expr_text), xpath_len);
4046 * In a UTF8 database, skip any xml declaration, which might assert
4047 * another encoding. Ignore parse_xml_decl() failure, letting
4048 * xmlCtxtReadMemory() report parse errors. Documentation disclaims
4049 * xpath() support for non-ASCII data in non-UTF8 databases, so leave
4050 * those scenarios bug-compatible with historical behavior.
4052 if (GetDatabaseEncoding() == PG_UTF8)
4053 parse_xml_decl(string, &xmldecl_len, NULL, NULL, NULL);
4055 xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
4057 PG_TRY();
4059 xmlInitParser();
4062 * redundant XML parsing (two parsings for the same value during one
4063 * command execution are possible)
4065 ctxt = xmlNewParserCtxt();
4066 if (ctxt == NULL || xmlerrcxt->err_occurred)
4067 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4068 "could not allocate parser context");
4069 doc = xmlCtxtReadMemory(ctxt, (char *) string + xmldecl_len,
4070 len - xmldecl_len, NULL, NULL, 0);
4071 if (doc == NULL || xmlerrcxt->err_occurred)
4072 xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
4073 "could not parse XML document");
4074 xpathctx = xmlXPathNewContext(doc);
4075 if (xpathctx == NULL || xmlerrcxt->err_occurred)
4076 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4077 "could not allocate XPath context");
4078 xpathctx->node = (xmlNodePtr) doc;
4080 /* register namespaces, if any */
4081 if (ns_count > 0)
4083 for (i = 0; i < ns_count; i++)
4085 char *ns_name;
4086 char *ns_uri;
4088 if (ns_names_uris_nulls[i * 2] ||
4089 ns_names_uris_nulls[i * 2 + 1])
4090 ereport(ERROR,
4091 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
4092 errmsg("neither namespace name nor URI may be null")));
4093 ns_name = TextDatumGetCString(ns_names_uris[i * 2]);
4094 ns_uri = TextDatumGetCString(ns_names_uris[i * 2 + 1]);
4095 if (xmlXPathRegisterNs(xpathctx,
4096 (xmlChar *) ns_name,
4097 (xmlChar *) ns_uri) != 0)
4098 ereport(ERROR, /* is this an internal error??? */
4099 (errmsg("could not register XML namespace with name \"%s\" and URI \"%s\"",
4100 ns_name, ns_uri)));
4104 xpathcomp = xmlXPathCompile(xpath_expr);
4105 if (xpathcomp == NULL || xmlerrcxt->err_occurred)
4106 xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4107 "invalid XPath expression");
4110 * Version 2.6.27 introduces a function named
4111 * xmlXPathCompiledEvalToBoolean, which would be enough for xmlexists,
4112 * but we can derive the existence by whether any nodes are returned,
4113 * thereby preventing a library version upgrade and keeping the code
4114 * the same.
4116 xpathobj = xmlXPathCompiledEval(xpathcomp, xpathctx);
4117 if (xpathobj == NULL || xmlerrcxt->err_occurred)
4118 xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4119 "could not create XPath object");
4122 * Extract the results as requested.
4124 if (res_nitems != NULL)
4125 *res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
4126 else
4127 (void) xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
4129 PG_CATCH();
4131 if (xpathobj)
4132 xmlXPathFreeObject(xpathobj);
4133 if (xpathcomp)
4134 xmlXPathFreeCompExpr(xpathcomp);
4135 if (xpathctx)
4136 xmlXPathFreeContext(xpathctx);
4137 if (doc)
4138 xmlFreeDoc(doc);
4139 if (ctxt)
4140 xmlFreeParserCtxt(ctxt);
4142 pg_xml_done(xmlerrcxt, true);
4144 PG_RE_THROW();
4146 PG_END_TRY();
4148 xmlXPathFreeObject(xpathobj);
4149 xmlXPathFreeCompExpr(xpathcomp);
4150 xmlXPathFreeContext(xpathctx);
4151 xmlFreeDoc(doc);
4152 xmlFreeParserCtxt(ctxt);
4154 pg_xml_done(xmlerrcxt, false);
4156 #endif /* USE_LIBXML */
4159 * Evaluate XPath expression and return array of XML values.
4161 * As we have no support of XQuery sequences yet, this function seems
4162 * to be the most useful one (array of XML functions plays a role of
4163 * some kind of substitution for XQuery sequences).
4165 Datum
4166 xpath(PG_FUNCTION_ARGS)
4168 #ifdef USE_LIBXML
4169 text *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4170 xmltype *data = PG_GETARG_XML_P(1);
4171 ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2);
4172 ArrayBuildState *astate;
4174 astate = initArrayResult(XMLOID, CurrentMemoryContext, true);
4175 xpath_internal(xpath_expr_text, data, namespaces,
4176 NULL, astate);
4177 PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
4178 #else
4179 NO_XML_SUPPORT();
4180 return 0;
4181 #endif
4185 * Determines if the node specified by the supplied XPath exists
4186 * in a given XML document, returning a boolean.
4188 Datum
4189 xmlexists(PG_FUNCTION_ARGS)
4191 #ifdef USE_LIBXML
4192 text *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4193 xmltype *data = PG_GETARG_XML_P(1);
4194 int res_nitems;
4196 xpath_internal(xpath_expr_text, data, NULL,
4197 &res_nitems, NULL);
4199 PG_RETURN_BOOL(res_nitems > 0);
4200 #else
4201 NO_XML_SUPPORT();
4202 return 0;
4203 #endif
4207 * Determines if the node specified by the supplied XPath exists
4208 * in a given XML document, returning a boolean. Differs from
4209 * xmlexists as it supports namespaces and is not defined in SQL/XML.
4211 Datum
4212 xpath_exists(PG_FUNCTION_ARGS)
4214 #ifdef USE_LIBXML
4215 text *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4216 xmltype *data = PG_GETARG_XML_P(1);
4217 ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2);
4218 int res_nitems;
4220 xpath_internal(xpath_expr_text, data, namespaces,
4221 &res_nitems, NULL);
4223 PG_RETURN_BOOL(res_nitems > 0);
4224 #else
4225 NO_XML_SUPPORT();
4226 return 0;
4227 #endif
4231 * Functions for checking well-formed-ness
4234 #ifdef USE_LIBXML
4235 static bool
4236 wellformed_xml(text *data, XmlOptionType xmloption_arg)
4238 bool result;
4239 volatile xmlDocPtr doc = NULL;
4241 /* We want to catch any exceptions and return false */
4242 PG_TRY();
4244 doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding());
4245 result = true;
4247 PG_CATCH();
4249 FlushErrorState();
4250 result = false;
4252 PG_END_TRY();
4254 if (doc)
4255 xmlFreeDoc(doc);
4257 return result;
4259 #endif
4261 Datum
4262 xml_is_well_formed(PG_FUNCTION_ARGS)
4264 #ifdef USE_LIBXML
4265 text *data = PG_GETARG_TEXT_PP(0);
4267 PG_RETURN_BOOL(wellformed_xml(data, xmloption));
4268 #else
4269 NO_XML_SUPPORT();
4270 return 0;
4271 #endif /* not USE_LIBXML */
4274 Datum
4275 xml_is_well_formed_document(PG_FUNCTION_ARGS)
4277 #ifdef USE_LIBXML
4278 text *data = PG_GETARG_TEXT_PP(0);
4280 PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_DOCUMENT));
4281 #else
4282 NO_XML_SUPPORT();
4283 return 0;
4284 #endif /* not USE_LIBXML */
4287 Datum
4288 xml_is_well_formed_content(PG_FUNCTION_ARGS)
4290 #ifdef USE_LIBXML
4291 text *data = PG_GETARG_TEXT_PP(0);
4293 PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_CONTENT));
4294 #else
4295 NO_XML_SUPPORT();
4296 return 0;
4297 #endif /* not USE_LIBXML */
4301 * support functions for XMLTABLE
4304 #ifdef USE_LIBXML
4307 * Returns private data from executor state. Ensure validity by check with
4308 * MAGIC number.
4310 static inline XmlTableBuilderData *
4311 GetXmlTableBuilderPrivateData(TableFuncScanState *state, const char *fname)
4313 XmlTableBuilderData *result;
4315 if (!IsA(state, TableFuncScanState))
4316 elog(ERROR, "%s called with invalid TableFuncScanState", fname);
4317 result = (XmlTableBuilderData *) state->opaque;
4318 if (result->magic != XMLTABLE_CONTEXT_MAGIC)
4319 elog(ERROR, "%s called with invalid TableFuncScanState", fname);
4321 return result;
4323 #endif
4326 * XmlTableInitOpaque
4327 * Fill in TableFuncScanState->opaque for XmlTable processor; initialize
4328 * the XML parser.
4330 * Note: Because we call pg_xml_init() here and pg_xml_done() in
4331 * XmlTableDestroyOpaque, it is critical for robustness that no other
4332 * executor nodes run until this node is processed to completion. Caller
4333 * must execute this to completion (probably filling a tuplestore to exhaust
4334 * this node in a single pass) instead of using row-per-call mode.
4336 static void
4337 XmlTableInitOpaque(TableFuncScanState *state, int natts)
4339 #ifdef USE_LIBXML
4340 volatile xmlParserCtxtPtr ctxt = NULL;
4341 XmlTableBuilderData *xtCxt;
4342 PgXmlErrorContext *xmlerrcxt;
4344 xtCxt = palloc0(sizeof(XmlTableBuilderData));
4345 xtCxt->magic = XMLTABLE_CONTEXT_MAGIC;
4346 xtCxt->natts = natts;
4347 xtCxt->xpathscomp = palloc0(sizeof(xmlXPathCompExprPtr) * natts);
4349 xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
4351 PG_TRY();
4353 xmlInitParser();
4355 ctxt = xmlNewParserCtxt();
4356 if (ctxt == NULL || xmlerrcxt->err_occurred)
4357 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4358 "could not allocate parser context");
4360 PG_CATCH();
4362 if (ctxt != NULL)
4363 xmlFreeParserCtxt(ctxt);
4365 pg_xml_done(xmlerrcxt, true);
4367 PG_RE_THROW();
4369 PG_END_TRY();
4371 xtCxt->xmlerrcxt = xmlerrcxt;
4372 xtCxt->ctxt = ctxt;
4374 state->opaque = xtCxt;
4375 #else
4376 NO_XML_SUPPORT();
4377 #endif /* not USE_LIBXML */
4381 * XmlTableSetDocument
4382 * Install the input document
4384 static void
4385 XmlTableSetDocument(TableFuncScanState *state, Datum value)
4387 #ifdef USE_LIBXML
4388 XmlTableBuilderData *xtCxt;
4389 xmltype *xmlval = DatumGetXmlP(value);
4390 char *str;
4391 xmlChar *xstr;
4392 int length;
4393 volatile xmlDocPtr doc = NULL;
4394 volatile xmlXPathContextPtr xpathcxt = NULL;
4396 xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetDocument");
4399 * Use out function for casting to string (remove encoding property). See
4400 * comment in xml_out.
4402 str = xml_out_internal(xmlval, 0);
4404 length = strlen(str);
4405 xstr = pg_xmlCharStrndup(str, length);
4407 PG_TRY();
4409 doc = xmlCtxtReadMemory(xtCxt->ctxt, (char *) xstr, length, NULL, NULL, 0);
4410 if (doc == NULL || xtCxt->xmlerrcxt->err_occurred)
4411 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
4412 "could not parse XML document");
4413 xpathcxt = xmlXPathNewContext(doc);
4414 if (xpathcxt == NULL || xtCxt->xmlerrcxt->err_occurred)
4415 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4416 "could not allocate XPath context");
4417 xpathcxt->node = (xmlNodePtr) doc;
4419 PG_CATCH();
4421 if (xpathcxt != NULL)
4422 xmlXPathFreeContext(xpathcxt);
4423 if (doc != NULL)
4424 xmlFreeDoc(doc);
4426 PG_RE_THROW();
4428 PG_END_TRY();
4430 xtCxt->doc = doc;
4431 xtCxt->xpathcxt = xpathcxt;
4432 #else
4433 NO_XML_SUPPORT();
4434 #endif /* not USE_LIBXML */
4438 * XmlTableSetNamespace
4439 * Add a namespace declaration
4441 static void
4442 XmlTableSetNamespace(TableFuncScanState *state, const char *name, const char *uri)
4444 #ifdef USE_LIBXML
4445 XmlTableBuilderData *xtCxt;
4447 if (name == NULL)
4448 ereport(ERROR,
4449 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4450 errmsg("DEFAULT namespace is not supported")));
4451 xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetNamespace");
4453 if (xmlXPathRegisterNs(xtCxt->xpathcxt,
4454 pg_xmlCharStrndup(name, strlen(name)),
4455 pg_xmlCharStrndup(uri, strlen(uri))))
4456 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_DATA_EXCEPTION,
4457 "could not set XML namespace");
4458 #else
4459 NO_XML_SUPPORT();
4460 #endif /* not USE_LIBXML */
4464 * XmlTableSetRowFilter
4465 * Install the row-filter Xpath expression.
4467 static void
4468 XmlTableSetRowFilter(TableFuncScanState *state, const char *path)
4470 #ifdef USE_LIBXML
4471 XmlTableBuilderData *xtCxt;
4472 xmlChar *xstr;
4474 xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetRowFilter");
4476 if (*path == '\0')
4477 ereport(ERROR,
4478 (errcode(ERRCODE_DATA_EXCEPTION),
4479 errmsg("row path filter must not be empty string")));
4481 xstr = pg_xmlCharStrndup(path, strlen(path));
4483 xtCxt->xpathcomp = xmlXPathCompile(xstr);
4484 if (xtCxt->xpathcomp == NULL || xtCxt->xmlerrcxt->err_occurred)
4485 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_SYNTAX_ERROR,
4486 "invalid XPath expression");
4487 #else
4488 NO_XML_SUPPORT();
4489 #endif /* not USE_LIBXML */
4493 * XmlTableSetColumnFilter
4494 * Install the column-filter Xpath expression, for the given column.
4496 static void
4497 XmlTableSetColumnFilter(TableFuncScanState *state, const char *path, int colnum)
4499 #ifdef USE_LIBXML
4500 XmlTableBuilderData *xtCxt;
4501 xmlChar *xstr;
4503 AssertArg(PointerIsValid(path));
4505 xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetColumnFilter");
4507 if (*path == '\0')
4508 ereport(ERROR,
4509 (errcode(ERRCODE_DATA_EXCEPTION),
4510 errmsg("column path filter must not be empty string")));
4512 xstr = pg_xmlCharStrndup(path, strlen(path));
4514 xtCxt->xpathscomp[colnum] = xmlXPathCompile(xstr);
4515 if (xtCxt->xpathscomp[colnum] == NULL || xtCxt->xmlerrcxt->err_occurred)
4516 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_DATA_EXCEPTION,
4517 "invalid XPath expression");
4518 #else
4519 NO_XML_SUPPORT();
4520 #endif /* not USE_LIBXML */
4524 * XmlTableFetchRow
4525 * Prepare the next "current" tuple for upcoming GetValue calls.
4526 * Returns false if the row-filter expression returned no more rows.
4528 static bool
4529 XmlTableFetchRow(TableFuncScanState *state)
4531 #ifdef USE_LIBXML
4532 XmlTableBuilderData *xtCxt;
4534 xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableFetchRow");
4536 /* Propagate our own error context to libxml2 */
4537 xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
4539 if (xtCxt->xpathobj == NULL)
4541 xtCxt->xpathobj = xmlXPathCompiledEval(xtCxt->xpathcomp, xtCxt->xpathcxt);
4542 if (xtCxt->xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
4543 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4544 "could not create XPath object");
4546 xtCxt->row_count = 0;
4549 if (xtCxt->xpathobj->type == XPATH_NODESET)
4551 if (xtCxt->xpathobj->nodesetval != NULL)
4553 if (xtCxt->row_count++ < xtCxt->xpathobj->nodesetval->nodeNr)
4554 return true;
4558 return false;
4559 #else
4560 NO_XML_SUPPORT();
4561 return false;
4562 #endif /* not USE_LIBXML */
4566 * XmlTableGetValue
4567 * Return the value for column number 'colnum' for the current row. If
4568 * column -1 is requested, return representation of the whole row.
4570 * This leaks memory, so be sure to reset often the context in which it's
4571 * called.
4573 static Datum
4574 XmlTableGetValue(TableFuncScanState *state, int colnum,
4575 Oid typid, int32 typmod, bool *isnull)
4577 #ifdef USE_LIBXML
4578 XmlTableBuilderData *xtCxt;
4579 Datum result = (Datum) 0;
4580 xmlNodePtr cur;
4581 char *cstr = NULL;
4582 volatile xmlXPathObjectPtr xpathobj = NULL;
4584 xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableGetValue");
4586 Assert(xtCxt->xpathobj &&
4587 xtCxt->xpathobj->type == XPATH_NODESET &&
4588 xtCxt->xpathobj->nodesetval != NULL);
4590 /* Propagate our own error context to libxml2 */
4591 xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
4593 *isnull = false;
4595 cur = xtCxt->xpathobj->nodesetval->nodeTab[xtCxt->row_count - 1];
4597 Assert(xtCxt->xpathscomp[colnum] != NULL);
4599 PG_TRY();
4601 /* Set current node as entry point for XPath evaluation */
4602 xtCxt->xpathcxt->node = cur;
4604 /* Evaluate column path */
4605 xpathobj = xmlXPathCompiledEval(xtCxt->xpathscomp[colnum], xtCxt->xpathcxt);
4606 if (xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
4607 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4608 "could not create XPath object");
4611 * There are four possible cases, depending on the number of nodes
4612 * returned by the XPath expression and the type of the target column:
4613 * a) XPath returns no nodes. b) The target type is XML (return all
4614 * as XML). For non-XML return types: c) One node (return content).
4615 * d) Multiple nodes (error).
4617 if (xpathobj->type == XPATH_NODESET)
4619 int count = 0;
4621 if (xpathobj->nodesetval != NULL)
4622 count = xpathobj->nodesetval->nodeNr;
4624 if (xpathobj->nodesetval == NULL || count == 0)
4626 *isnull = true;
4628 else
4630 if (typid == XMLOID)
4632 text *textstr;
4633 StringInfoData str;
4635 /* Concatenate serialized values */
4636 initStringInfo(&str);
4637 for (int i = 0; i < count; i++)
4639 textstr =
4640 xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
4641 xtCxt->xmlerrcxt);
4643 appendStringInfoText(&str, textstr);
4645 cstr = str.data;
4647 else
4649 xmlChar *str;
4651 if (count > 1)
4652 ereport(ERROR,
4653 (errcode(ERRCODE_CARDINALITY_VIOLATION),
4654 errmsg("more than one value returned by column XPath expression")));
4656 str = xmlXPathCastNodeSetToString(xpathobj->nodesetval);
4657 cstr = str ? xml_pstrdup_and_free(str) : "";
4661 else if (xpathobj->type == XPATH_STRING)
4663 /* Content should be escaped when target will be XML */
4664 if (typid == XMLOID)
4665 cstr = escape_xml((char *) xpathobj->stringval);
4666 else
4667 cstr = (char *) xpathobj->stringval;
4669 else if (xpathobj->type == XPATH_BOOLEAN)
4671 char typcategory;
4672 bool typispreferred;
4673 xmlChar *str;
4675 /* Allow implicit casting from boolean to numbers */
4676 get_type_category_preferred(typid, &typcategory, &typispreferred);
4678 if (typcategory != TYPCATEGORY_NUMERIC)
4679 str = xmlXPathCastBooleanToString(xpathobj->boolval);
4680 else
4681 str = xmlXPathCastNumberToString(xmlXPathCastBooleanToNumber(xpathobj->boolval));
4683 cstr = xml_pstrdup_and_free(str);
4685 else if (xpathobj->type == XPATH_NUMBER)
4687 xmlChar *str;
4689 str = xmlXPathCastNumberToString(xpathobj->floatval);
4690 cstr = xml_pstrdup_and_free(str);
4692 else
4693 elog(ERROR, "unexpected XPath object type %u", xpathobj->type);
4696 * By here, either cstr contains the result value, or the isnull flag
4697 * has been set.
4699 Assert(cstr || *isnull);
4701 if (!*isnull)
4702 result = InputFunctionCall(&state->in_functions[colnum],
4703 cstr,
4704 state->typioparams[colnum],
4705 typmod);
4707 PG_FINALLY();
4709 if (xpathobj != NULL)
4710 xmlXPathFreeObject(xpathobj);
4712 PG_END_TRY();
4714 return result;
4715 #else
4716 NO_XML_SUPPORT();
4717 return 0;
4718 #endif /* not USE_LIBXML */
4722 * XmlTableDestroyOpaque
4723 * Release all libxml2 resources
4725 static void
4726 XmlTableDestroyOpaque(TableFuncScanState *state)
4728 #ifdef USE_LIBXML
4729 XmlTableBuilderData *xtCxt;
4731 xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableDestroyOpaque");
4733 /* Propagate our own error context to libxml2 */
4734 xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
4736 if (xtCxt->xpathscomp != NULL)
4738 int i;
4740 for (i = 0; i < xtCxt->natts; i++)
4741 if (xtCxt->xpathscomp[i] != NULL)
4742 xmlXPathFreeCompExpr(xtCxt->xpathscomp[i]);
4745 if (xtCxt->xpathobj != NULL)
4746 xmlXPathFreeObject(xtCxt->xpathobj);
4747 if (xtCxt->xpathcomp != NULL)
4748 xmlXPathFreeCompExpr(xtCxt->xpathcomp);
4749 if (xtCxt->xpathcxt != NULL)
4750 xmlXPathFreeContext(xtCxt->xpathcxt);
4751 if (xtCxt->doc != NULL)
4752 xmlFreeDoc(xtCxt->doc);
4753 if (xtCxt->ctxt != NULL)
4754 xmlFreeParserCtxt(xtCxt->ctxt);
4756 pg_xml_done(xtCxt->xmlerrcxt, true);
4758 /* not valid anymore */
4759 xtCxt->magic = 0;
4760 state->opaque = NULL;
4762 #else
4763 NO_XML_SUPPORT();
4764 #endif /* not USE_LIBXML */