2 Copyright 2012-2016 David Robillard <http://drobilla.net>
4 Permission to use, copy, modify, and/or distribute this software for any
5 purpose with or without fee is hereby granted, provided that the above
6 copyright notice and this permission notice appear in all copies.
8 THIS SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 #define _BSD_SOURCE 1 // for realpath
18 #define _DEFAULT_SOURCE 1 // for realpath
28 #include "serd/serd.h"
29 #include "sord/sord.h"
30 #include "sord_config.h"
36 #define USTR(s) ((const uint8_t*)s)
38 #define NS_foaf (const uint8_t*)"http://xmlns.com/foaf/0.1/"
39 #define NS_owl (const uint8_t*)"http://www.w3.org/2002/07/owl#"
40 #define NS_rdf (const uint8_t*)"http://www.w3.org/1999/02/22-rdf-syntax-ns#"
41 #define NS_rdfs (const uint8_t*)"http://www.w3.org/2000/01/rdf-schema#"
42 #define NS_xsd (const uint8_t*)"http://www.w3.org/2001/XMLSchema#"
45 SordNode
* foaf_Document
;
46 SordNode
* owl_AnnotationProperty
;
48 SordNode
* owl_DatatypeProperty
;
49 SordNode
* owl_FunctionalProperty
;
50 SordNode
* owl_InverseFunctionalProperty
;
51 SordNode
* owl_ObjectProperty
;
52 SordNode
* owl_OntologyProperty
;
53 SordNode
* owl_Restriction
;
55 SordNode
* owl_cardinality
;
56 SordNode
* owl_equivalentClass
;
57 SordNode
* owl_minCardinality
;
58 SordNode
* owl_onDatatype
;
59 SordNode
* owl_onProperty
;
60 SordNode
* owl_someValuesFrom
;
61 SordNode
* owl_withRestrictions
;
62 SordNode
* rdf_PlainLiteral
;
63 SordNode
* rdf_Property
;
68 SordNode
* rdfs_Literal
;
69 SordNode
* rdfs_Resource
;
70 SordNode
* rdfs_domain
;
73 SordNode
* rdfs_subClassOf
;
75 SordNode
* xsd_decimal
;
77 SordNode
* xsd_maxInclusive
;
78 SordNode
* xsd_minInclusive
;
79 SordNode
* xsd_pattern
;
84 int n_restrictions
= 0;
85 bool one_line_errors
= false;
90 printf("sord_validate " SORD_VERSION
91 " <http://drobilla.net/software/sord>\n");
92 printf("Copyright 2012-2015 David Robillard <http://drobilla.net>.\n"
93 "License: <http://www.opensource.org/licenses/isc>\n"
94 "This is free software; you are free to change and redistribute it."
95 "\nThere is NO WARRANTY, to the extent permitted by law.\n");
100 print_usage(const char* name
, bool error
)
102 FILE* const os
= error
? stderr
: stdout
;
103 fprintf(os
, "Usage: %s [OPTION]... INPUT...\n", name
);
104 fprintf(os
, "Validate RDF data\n\n");
105 fprintf(os
, " -h Display this help and exit\n");
106 fprintf(os
, " -l Print errors on a single line.\n");
107 fprintf(os
, " -v Display version information and exit\n");
109 "Validate RDF data. This is a simple validator which checks\n"
110 "that all used properties are actually defined. It does not do\n"
111 "any fancy file retrieval, the files passed on the command line\n"
112 "are the only data that is read. In other words, you must pass\n"
113 "the definition of all vocabularies used on the command line.\n");
114 return error
? 1 : 0;
118 absolute_path(const uint8_t* path
)
121 char* out
= (char*)malloc(MAX_PATH
);
122 GetFullPathName((const char*)path
, MAX_PATH
, out
, NULL
);
123 return (uint8_t*)out
;
125 return (uint8_t*)realpath((const char*)path
, NULL
);
130 error(const char* msg
, const SordQuad quad
)
132 const char* sep
= one_line_errors
? "\t" : "\n ";
134 fprintf(stderr
, "error: %s:%s%s%s%s%s%s\n",
136 sep
, (const char*)sord_node_get_string(quad
[SORD_SUBJECT
]),
137 sep
, (const char*)sord_node_get_string(quad
[SORD_PREDICATE
]),
138 sep
, (const char*)sord_node_get_string(quad
[SORD_OBJECT
]));
143 errorf(const char* fmt
, ...)
147 vfprintf(stderr
, fmt
, args
);
153 is_descendant_of(SordModel
* model
,
155 const SordNode
* child
,
156 const SordNode
* parent
,
157 const SordNode
* pred
)
161 } else if (sord_node_equals(child
, parent
) ||
162 sord_ask(model
, child
, uris
->owl_equivalentClass
, parent
, NULL
)) {
166 SordIter
* i
= sord_search(model
, child
, pred
, NULL
, NULL
);
167 for (; !sord_iter_end(i
); sord_iter_next(i
)) {
168 const SordNode
* o
= sord_iter_get_node(i
, SORD_OBJECT
);
169 if (sord_node_equals(child
, o
)) {
170 continue; // Weird class is explicitly a descendent of itself
172 if (is_descendant_of(model
, uris
, o
, parent
, pred
)) {
183 regexp_match(const uint8_t* pat
, const char* str
)
186 // Append a $ to the pattern so we only match if the entire string matches
187 const size_t len
= strlen((const char*)pat
);
188 char* const regx
= (char*)malloc(len
+ 2);
189 memcpy(regx
, pat
, len
);
191 regx
[len
+ 1] = '\0';
195 pcre
* re
= pcre_compile(regx
, PCRE_ANCHORED
, &err
, &erroffset
, NULL
);
198 fprintf(stderr
, "Error in pattern `%s' at offset %d (%s)\n",
199 pat
, erroffset
, err
);
203 const bool ret
= pcre_exec(re
, NULL
, str
, strlen(str
), 0, 0, NULL
, 0) >= 0;
211 bound_cmp(SordModel
* model
,
213 const SordNode
* literal
,
214 const SordNode
* type
,
215 const SordNode
* bound
)
217 const char* str
= (const char*)sord_node_get_string(literal
);
218 const char* bound_str
= (const char*)sord_node_get_string(bound
);
219 const bool is_numeric
=
220 is_descendant_of(model
, uris
, type
, uris
->xsd_decimal
, uris
->owl_onDatatype
) ||
221 is_descendant_of(model
, uris
, type
, uris
->xsd_double
, uris
->owl_onDatatype
);
224 const double fbound
= serd_strtod(bound_str
, NULL
);
225 const double fliteral
= serd_strtod(str
, NULL
);
226 return ((fliteral
< fbound
) ? -1 :
227 (fliteral
> fbound
) ? 1 :
230 return strcmp(str
, bound_str
);
235 check_restriction(SordModel
* model
,
237 const SordNode
* literal
,
238 const SordNode
* type
,
239 const SordNode
* restriction
)
242 const char* str
= (const char*)sord_node_get_string_counted(literal
, &len
);
246 SordIter
* p
= sord_search(model
, restriction
, uris
->xsd_pattern
, 0, 0);
248 const SordNode
* pat
= sord_iter_get_node(p
, SORD_OBJECT
);
249 if (!regexp_match(sord_node_get_string(pat
), str
)) {
250 fprintf(stderr
, "`%s' does not match <%s> pattern `%s'\n",
251 sord_node_get_string(literal
),
252 sord_node_get_string(type
),
253 sord_node_get_string(pat
));
260 // Check xsd:minInclusive
261 SordIter
* l
= sord_search(model
, restriction
, uris
->xsd_minInclusive
, 0, 0);
263 const SordNode
* lower
= sord_iter_get_node(l
, SORD_OBJECT
);
264 if (bound_cmp(model
, uris
, literal
, type
, lower
) < 0) {
265 fprintf(stderr
, "`%s' is not >= <%s> minimum `%s'\n",
266 sord_node_get_string(literal
),
267 sord_node_get_string(type
),
268 sord_node_get_string(lower
));
275 // Check xsd:maxInclusive
276 SordIter
* u
= sord_search(model
, restriction
, uris
->xsd_maxInclusive
, 0, 0);
278 const SordNode
* upper
= sord_iter_get_node(u
, SORD_OBJECT
);
279 if (bound_cmp(model
, uris
, literal
, type
, upper
) > 0) {
280 fprintf(stderr
, "`%s' is not <= <%s> maximum `%s'\n",
281 sord_node_get_string(literal
),
282 sord_node_get_string(type
),
283 sord_node_get_string(upper
));
291 return true; // Unknown restriction, be quietly tolerant
295 literal_is_valid(SordModel
* model
,
297 const SordNode
* literal
,
298 const SordNode
* type
)
304 /* Check that literal data is related to required type. We don't do a
305 strict subtype check here because e.g. an xsd:decimal might be a valid
306 xsd:unsignedInt, which the pattern checks will verify, but if the
307 literal type is not related to the required type at all
308 (e.g. xsd:decimal and xsd:string) there is a problem. */
309 const SordNode
* datatype
= sord_node_get_datatype(literal
);
310 if (datatype
&& datatype
!= type
) {
311 if (!is_descendant_of(
313 datatype
, type
, uris
->owl_onDatatype
) &&
316 type
, datatype
, uris
->owl_onDatatype
) &&
317 !(sord_node_equals(datatype
, uris
->xsd_decimal
) &&
320 type
, uris
->xsd_double
, uris
->owl_onDatatype
))) {
321 errorf("Literal `%s' datatype <%s> is not compatible with <%s>\n",
322 sord_node_get_string(literal
),
323 sord_node_get_string(datatype
),
324 sord_node_get_string(type
));
329 // Find restrictions list
330 SordIter
* rs
= sord_search(model
, type
, uris
->owl_withRestrictions
, 0, 0);
331 if (sord_iter_end(rs
)) {
332 return true; // No restrictions
335 // Walk list, checking each restriction
336 const SordNode
* head
= sord_iter_get_node(rs
, SORD_OBJECT
);
338 SordIter
* f
= sord_search(model
, head
, uris
->rdf_first
, 0, 0);
340 break; // Reached end of restrictions list without failure
343 // Check this restriction
344 const bool good
= check_restriction(
345 model
, uris
, literal
, type
, sord_iter_get_node(f
, SORD_OBJECT
));
350 return false; // Failed, literal is invalid
353 // Seek to next list node
354 SordIter
* n
= sord_search(model
, head
, uris
->rdf_rest
, 0, 0);
355 head
= n
? sord_iter_get_node(n
, SORD_OBJECT
) : NULL
;
361 SordIter
* s
= sord_search(model
, type
, uris
->owl_onDatatype
, 0, 0);
363 const SordNode
* super
= sord_iter_get_node(s
, SORD_OBJECT
);
364 const bool good
= literal_is_valid(model
, uris
, literal
, super
);
366 return good
; // Match iff literal also matches supertype
369 return true; // Matches top level type
373 check_type(SordModel
* model
,
375 const SordNode
* node
,
376 const SordNode
* type
)
378 if (sord_node_equals(type
, uris
->rdfs_Resource
) ||
379 sord_node_equals(type
, uris
->owl_Thing
)) {
383 if (sord_node_get_type(node
) == SORD_LITERAL
) {
384 if (sord_node_equals(type
, uris
->rdfs_Literal
)) {
386 } else if (sord_node_equals(type
, uris
->rdf_PlainLiteral
)) {
387 return !sord_node_get_language(node
);
389 return literal_is_valid(model
, uris
, node
, type
);
391 } else if (sord_node_get_type(node
) == SORD_URI
) {
392 if (sord_node_equals(type
, uris
->foaf_Document
)) {
393 return true; // Questionable...
394 } else if (is_descendant_of(
396 type
, uris
->xsd_anyURI
, uris
->owl_onDatatype
)) {
397 /* Type is any URI and this is a URI, so pass. Restrictions on
398 anyURI subtypes are not currently checked (very uncommon). */
399 return true; // Type is anyURI, and this is a URI
401 SordIter
* t
= sord_search(model
, node
, uris
->rdf_type
, NULL
, NULL
);
402 for (; !sord_iter_end(t
); sord_iter_next(t
)) {
403 if (is_descendant_of(model
, uris
,
404 sord_iter_get_node(t
, SORD_OBJECT
),
406 uris
->rdfs_subClassOf
)) {
415 return true; // Blanks often lack explicit types, ignore
422 check_properties(SordModel
* model
, URIs
* uris
)
425 SordIter
* i
= sord_begin(model
);
426 for (; !sord_iter_end(i
); sord_iter_next(i
)) {
428 sord_iter_get(i
, quad
);
430 const SordNode
* subj
= quad
[SORD_SUBJECT
];
431 const SordNode
* pred
= quad
[SORD_PREDICATE
];
432 const SordNode
* obj
= quad
[SORD_OBJECT
];
434 bool is_any_property
= false;
435 SordIter
* t
= sord_search(model
, pred
, uris
->rdf_type
, NULL
, NULL
);
436 for (; !sord_iter_end(t
); sord_iter_next(t
)) {
437 if (is_descendant_of(model
, uris
,
438 sord_iter_get_node(t
, SORD_OBJECT
),
440 uris
->rdfs_subClassOf
)) {
441 is_any_property
= true;
447 const bool is_ObjectProperty
= sord_ask(
448 model
, pred
, uris
->rdf_type
, uris
->owl_ObjectProperty
, 0);
449 const bool is_FunctionalProperty
= sord_ask(
450 model
, pred
, uris
->rdf_type
, uris
->owl_FunctionalProperty
, 0);
451 const bool is_InverseFunctionalProperty
= sord_ask(
452 model
, pred
, uris
->rdf_type
, uris
->owl_InverseFunctionalProperty
, 0);
453 const bool is_DatatypeProperty
= sord_ask(
454 model
, pred
, uris
->rdf_type
, uris
->owl_DatatypeProperty
, 0);
456 if (!is_any_property
) {
457 st
= error("Use of undefined property", quad
);
460 if (!sord_ask(model
, pred
, uris
->rdfs_label
, NULL
, NULL
)) {
461 st
= errorf("Property <%s> has no label\n",
462 sord_node_get_string(pred
));
465 if (is_DatatypeProperty
&&
466 sord_node_get_type(obj
) != SORD_LITERAL
) {
467 st
= error("Datatype property with non-literal value", quad
);
470 if (is_ObjectProperty
&&
471 sord_node_get_type(obj
) == SORD_LITERAL
) {
472 st
= error("Object property with literal value", quad
);
475 if (is_FunctionalProperty
&&
476 sord_count(model
, subj
, pred
, NULL
, NULL
) > 1) {
477 st
= error("Functional property with several objects", quad
);
480 if (is_InverseFunctionalProperty
&&
481 sord_count(model
, NULL
, pred
, obj
, NULL
) > 1) {
482 st
= error("Inverse functional property with several subjects", quad
);
485 if (sord_node_equals(pred
, uris
->rdf_type
) &&
486 !sord_ask(model
, obj
, uris
->rdf_type
, uris
->rdfs_Class
, NULL
) &&
487 !sord_ask(model
, obj
, uris
->rdf_type
, uris
->owl_Class
, NULL
)) {
488 st
= error("Type is not a rdfs:Class or owl:Class", quad
);
491 if (sord_node_get_type(obj
) == SORD_LITERAL
&&
492 !literal_is_valid(model
, uris
, obj
, sord_node_get_datatype(obj
))) {
493 st
= error("Literal does not match datatype", quad
);
496 SordIter
* r
= sord_search(model
, pred
, uris
->rdfs_range
, NULL
, NULL
);
497 for (; !sord_iter_end(r
); sord_iter_next(r
)) {
498 const SordNode
* range
= sord_iter_get_node(r
, SORD_OBJECT
);
499 if (!check_type(model
, uris
, obj
, range
)) {
500 st
= error("Object not in property range", quad
);
501 fprintf(stderr
, "note: Range is <%s>\n",
502 sord_node_get_string(range
));
507 SordIter
* d
= sord_search(model
, pred
, uris
->rdfs_domain
, NULL
, NULL
);
509 const SordNode
* domain
= sord_iter_get_node(d
, SORD_OBJECT
);
510 if (!check_type(model
, uris
, subj
, domain
)) {
511 st
= error("Subject not in property domain", quad
);
512 fprintf(stderr
, "note: Domain is <%s>\n",
513 sord_node_get_string(domain
));
524 check_instance(SordModel
* model
,
526 const SordNode
* restriction
,
527 const SordNode
* instance
)
531 const SordNode
* prop
= sord_get(
532 model
, restriction
, uris
->owl_onProperty
, NULL
, NULL
);
537 const unsigned values
= sord_count(model
, instance
, prop
, NULL
, NULL
);
539 // Check exact cardinality
540 const SordNode
* card
= sord_get(
541 model
, restriction
, uris
->owl_cardinality
, NULL
, NULL
);
543 const unsigned c
= atoi((const char*)sord_node_get_string(card
));
545 st
= errorf("Property %s on %s has %u != %u values\n",
546 sord_node_get_string(prop
),
547 sord_node_get_string(instance
),
552 // Check minimum cardinality
553 const SordNode
* minCard
= sord_get(
554 model
, restriction
, uris
->owl_minCardinality
, NULL
, NULL
);
556 const unsigned m
= atoi((const char*)sord_node_get_string(minCard
));
558 st
= errorf("Property %s on %s has %u < %u values\n",
559 sord_node_get_string(prop
),
560 sord_node_get_string(instance
),
565 // Check someValuesFrom
566 SordIter
* sf
= sord_search(
567 model
, restriction
, uris
->owl_someValuesFrom
, NULL
, NULL
);
569 const SordNode
* type
= sord_iter_get_node(sf
, SORD_OBJECT
);
570 SordIter
* v
= sord_search(model
, instance
, prop
, NULL
, NULL
);
572 for (; !sord_iter_end(v
); sord_iter_next(v
)) {
573 const SordNode
* value
= sord_iter_get_node(v
, SORD_OBJECT
);
574 if (check_type(model
, uris
, value
, type
)) {
580 st
= errorf("%s has no <%s> values of type <%s>\n",
581 sord_node_get_string(instance
),
582 sord_node_get_string(prop
),
583 sord_node_get_string(type
));
593 check_class_instances(SordModel
* model
,
595 const SordNode
* restriction
,
596 const SordNode
* klass
)
598 // Check immediate instances of this class
599 SordIter
* i
= sord_search(model
, NULL
, uris
->rdf_type
, klass
, NULL
);
600 for (; !sord_iter_end(i
); sord_iter_next(i
)) {
601 const SordNode
* instance
= sord_iter_get_node(i
, SORD_SUBJECT
);
602 check_instance(model
, uris
, restriction
, instance
);
606 // Check instances of all subclasses recursively
607 SordIter
* s
= sord_search(model
, NULL
, uris
->rdfs_subClassOf
, klass
, NULL
);
608 for (; !sord_iter_end(s
); sord_iter_next(s
)) {
609 const SordNode
* subklass
= sord_iter_get_node(s
, SORD_SUBJECT
);
610 check_class_instances(model
, uris
, restriction
, subklass
);
618 check_instances(SordModel
* model
, const URIs
* uris
)
621 SordIter
* r
= sord_search(
622 model
, NULL
, uris
->rdf_type
, uris
->owl_Restriction
, NULL
);
623 for (; !sord_iter_end(r
); sord_iter_next(r
)) {
624 const SordNode
* restriction
= sord_iter_get_node(r
, SORD_SUBJECT
);
625 const SordNode
* prop
= sord_get(
626 model
, restriction
, uris
->owl_onProperty
, NULL
, NULL
);
631 SordIter
* c
= sord_search(
632 model
, NULL
, uris
->rdfs_subClassOf
, restriction
, NULL
);
633 for (; !sord_iter_end(c
); sord_iter_next(c
)) {
634 const SordNode
* klass
= sord_iter_get_node(c
, SORD_SUBJECT
);
635 check_class_instances(model
, uris
, restriction
, klass
);
645 main(int argc
, char** argv
)
648 return print_usage(argv
[0], true);
652 for (; a
< argc
&& argv
[a
][0] == '-'; ++a
) {
653 if (argv
[a
][1] == 'l') {
654 one_line_errors
= true;
655 } else if (argv
[a
][1] == 'v') {
656 return print_version();
658 fprintf(stderr
, "%s: Unknown option `%s'\n", argv
[0], argv
[a
]);
659 return print_usage(argv
[0], true);
663 SordWorld
* world
= sord_world_new();
664 SordModel
* model
= sord_new(world
, SORD_SPO
|SORD_OPS
, false);
665 SerdEnv
* env
= serd_env_new(&SERD_NODE_NULL
);
666 SerdReader
* reader
= sord_new_reader(model
, env
, SERD_TURTLE
, NULL
);
668 for (; a
< argc
; ++a
) {
669 const uint8_t* input
= (const uint8_t*)argv
[a
];
670 uint8_t* in_path
= absolute_path(serd_uri_to_path(input
));
673 fprintf(stderr
, "Skipping file %s\n", input
);
678 SerdNode base_uri_node
= serd_node_new_file_uri(
679 in_path
, NULL
, &base_uri
, false);
681 serd_env_set_base_uri(env
, &base_uri_node
);
682 const SerdStatus st
= serd_reader_read_file(reader
, in_path
);
684 fprintf(stderr
, "error reading %s: %s\n",
685 in_path
, serd_strerror(st
));
688 serd_node_free(&base_uri_node
);
691 serd_reader_free(reader
);
694 #define URI(prefix, suffix) \
695 uris.prefix##_##suffix = sord_new_uri(world, NS_##prefix #suffix)
699 URI(owl
, AnnotationProperty
);
701 URI(owl
, DatatypeProperty
);
702 URI(owl
, FunctionalProperty
);
703 URI(owl
, InverseFunctionalProperty
);
704 URI(owl
, ObjectProperty
);
705 URI(owl
, OntologyProperty
);
706 URI(owl
, Restriction
);
708 URI(owl
, cardinality
);
709 URI(owl
, equivalentClass
);
710 URI(owl
, minCardinality
);
711 URI(owl
, onDatatype
);
712 URI(owl
, onProperty
);
713 URI(owl
, someValuesFrom
);
714 URI(owl
, withRestrictions
);
715 URI(rdf
, PlainLiteral
);
726 URI(rdfs
, subClassOf
);
730 URI(xsd
, maxInclusive
);
731 URI(xsd
, minInclusive
);
736 fprintf(stderr
, "warning: Built without PCRE, datatypes not checked.\n");
739 const int prop_st
= check_properties(model
, &uris
);
740 const int inst_st
= check_instances(model
, &uris
);
742 printf("Found %d errors among %d files (checked %d restrictions)\n",
743 n_errors
, argc
- 1, n_restrictions
);
746 sord_world_free(world
);
747 return prop_st
|| inst_st
;