2 using System
.Collections
;
10 public class RdfXmlReader
: RdfReader
{
11 // TODO: Make some of the errors warnings.
15 Hashtable blankNodes
= new Hashtable();
16 UriMap namedNodes
= new UriMap();
17 Hashtable seenIDs
= new Hashtable();
19 StatementSink storage
;
21 static readonly Entity
22 rdfType
= "http://www.w3.org/1999/02/22-rdf-syntax-ns#type",
23 rdfFirst
= "http://www.w3.org/1999/02/22-rdf-syntax-ns#first",
24 rdfRest
= "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest",
25 rdfNil
= "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil",
26 rdfSubject
= "http://www.w3.org/1999/02/22-rdf-syntax-ns#subject",
27 rdfPredicate
= "http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate",
28 rdfObject
= "http://www.w3.org/1999/02/22-rdf-syntax-ns#object",
29 rdfStatement
= "http://www.w3.org/1999/02/22-rdf-syntax-ns#Statement";
31 public RdfXmlReader(XmlDocument document
) {
32 xml
= new XmlNodeReader(document
);
35 public RdfXmlReader(XmlReader document
) {
36 XmlValidatingReader reader
= new XmlValidatingReader(document
);
37 reader
.ValidationType
= ValidationType
.None
;
41 public RdfXmlReader(TextReader document
) : this(new XmlTextReader(document
)) {
44 public RdfXmlReader(Stream document
) : this(new XmlTextReader(document
)) {
47 public RdfXmlReader(string file
) : this(GetReader(file
)) {
50 public override void Select(StatementSink storage
) {
51 // Read past the processing instructions to
52 // the document element. If it is rdf:RDF,
53 // then process the description nodes within it.
54 // Otherwise, the document element is itself a
57 storage
= GetDupCheckSink(storage
);
58 this.storage
= storage
;
61 if (xml
.NamespaceURI
== NS
.RDF
&& xml
.LocalName
== "RDF" ) {
63 if (xml
.NodeType
== XmlNodeType
.Element
)
73 private string CurNode() {
74 return xml
.NamespaceURI
+ xml
.LocalName
;
77 private int isset(string attribute
) {
78 return attribute
!= null ? 1 : 0;
81 private string Unrelativize(string uri
) {
82 return GetAbsoluteUri(xml
.BaseURI
!= "" ? xml
.BaseURI
: BaseUri
, uri
);
85 private Entity
GetBlankNode(string nodeID
) {
86 if (blankNodes
.ContainsKey(nodeID
))
87 return (Entity
)blankNodes
[nodeID
];
89 Entity entity
= new Entity(null);
90 blankNodes
[nodeID
] = entity
;
95 private Entity
GetNamedNode(string uri
) {
97 return new Entity(uri
);
99 Entity ret
= (Entity
)namedNodes
[uri
];
100 if (ret
!= null) return ret
;
101 ret
= new Entity(uri
);
102 namedNodes
[uri
] = ret
;
106 private Entity
ParseDescription() {
107 // The XmlReader is positioned on an element node
108 // that is a description of an entity.
109 // On returning, the reader is positioned after the
110 // end element of the description node.
112 string nodeID
= xml
.GetAttribute("nodeID", NS
.RDF
);
113 string about
= xml
.GetAttribute("about", NS
.RDF
);
115 // about = xml.GetAttribute("about");
116 string ID
= xml
.GetAttribute("ID", NS
.RDF
);
117 if (isset(nodeID
) + isset(about
) + isset(ID
) > 1)
118 OnError("An entity description cannot specify more than one of rdf:nodeID, rdf:about, and rdf:ID");
123 entity
= GetNamedNode(Unrelativize(about
));
124 else if (ID
!= null) {
125 entity
= GetNamedNode(Unrelativize("#" + ID
));
127 if (seenIDs
.ContainsKey(entity
.Uri
))
128 OnError("Two descriptions cannot use the same rdf:ID: <" + entity
.Uri
+ ">");
129 seenIDs
[entity
.Uri
] = seenIDs
;
130 } else if (nodeID
!= null)
131 entity
= GetBlankNode(nodeID
);
133 entity
= new Entity(null);
135 // If the name of the element is not rdf:Description,
136 // then the name gives its type.
137 if (CurNode() != NS
.RDF
+ "Description") {
138 if (CurNode() == NS
.RDF
+ "li") OnError("rdf:li cannot be the type of a node");
139 storage
.Add(new Statement(entity
, rdfType
, (Entity
)CurNode(), Meta
));
142 ParsePropertyAttributes(entity
);
143 ParsePropertyNodes(entity
);
148 private bool ParsePropertyAttributes(Entity entity
) {
149 bool foundAttrs
= false;
151 if (!xml
.MoveToFirstAttribute()) return false;
153 // Propery attributes in the default namespace
154 // should be ignored.
155 if (xml
.NamespaceURI
== "")
158 string curnode
= CurNode();
160 // rdf:type is interpreted with an entity object,
161 // not a literal object.
162 if (curnode
== NS
.RDF
+ "type") {
163 storage
.Add(new Statement(entity
, rdfType
, (Entity
)xml
.Value
, Meta
));
168 // Properties which are not recognized as property
169 // attributes and should be ignored.
170 if (curnode
== NS
.RDF
+ "RDF") continue;
171 if (curnode
== NS
.RDF
+ "Description") continue;
172 if (curnode
== NS
.RDF
+ "ID") continue;
173 if (curnode
== NS
.RDF
+ "about") continue;
174 if (curnode
== NS
.RDF
+ "parseType") continue;
175 if (curnode
== NS
.RDF
+ "resource") continue;
176 if (curnode
== NS
.RDF
+ "nodeID") continue;
177 if (curnode
== NS
.RDF
+ "datatype") continue;
179 // Properties which are invalid as attributes.
180 if (curnode
== NS
.RDF
+ "li")
181 OnError("rdf:li is not a valid attribute");
182 if (curnode
== NS
.RDF
+ "aboutEach" || curnode
== NS
.RDF
+ "aboutEachPrefix")
183 OnError("rdf:aboutEach has been removed from the RDF spec");
185 // Unrecognized attributes in the xml namespace should be ignored.
186 if (xml
.Prefix
== "xml") continue;
187 if (xml
.Prefix
== "xmlns") continue;
188 if (curnode
== "http://www.w3.org/2000/xmlns/xmlns") continue;
190 // This is a literal property attribute.
191 string lang
= xml
.XmlLang
!= "" ? xml
.XmlLang
: null;
192 storage
.Add(new Statement(entity
, curnode
,
193 new Literal(xml
.Value
, lang
, null), Meta
));
196 } while (xml
.MoveToNextAttribute());
203 private void ParsePropertyNodes(Entity subject
) {
204 // The reader is positioned within a description node.
205 // On returning, the reader is positioned after the
206 // end element of the description node.
208 if (xml
.IsEmptyElement
) return;
213 if (xml
.NodeType
== XmlNodeType
.EndElement
)
215 if (xml
.NodeType
!= XmlNodeType
.Element
)
218 ParseProperty(subject
, ref liIndex
);
222 private void ParseProperty(Entity subject
, ref int liIndex
) {
223 // The reader is positioned on a propert node,
224 // and on returning the reader is positioned past
227 // Get all of the attributes before we move the reader forward.
229 string nodeID
= xml
.GetAttribute("nodeID", NS
.RDF
);
230 string resource
= xml
.GetAttribute("resource", NS
.RDF
);
232 string parseType
= xml
.GetAttribute("parseType", NS
.RDF
);
233 string datatype
= xml
.GetAttribute("datatype", NS
.RDF
);
235 string lang
= xml
.XmlLang
!= "" ? xml
.XmlLang
: null;
237 string predicate
= CurNode();
238 if (predicate
== NS
.RDF
+ "li")
239 predicate
= NS
.RDF
+ "_" + (liIndex
++);
241 string ID
= xml
.GetAttribute("ID", NS
.RDF
);
243 Resource objct
= null;
244 if (nodeID
!= null || resource
!= null) {
245 if (isset(nodeID
) + isset(resource
) > 1)
246 OnError("A predicate node cannot specify more than one of rdf:nodeID and rdf:resource");
248 if (parseType
!= null || datatype
!= null)
249 OnError("The attributes rdf:parseType and rdf:datatype are not valid on a predicate with a rdf:nodeID or rdf:resource attribute");
251 // Object is an entity given by nodeID or resource.
254 objct
= GetBlankNode(nodeID
);
255 else if (resource
!= null)
256 objct
= GetNamedNode(Unrelativize(resource
));
258 ParsePropertyAttributes((Entity
)objct
);
260 // No children are allowed in this element.
261 if (!xml
.IsEmptyElement
)
263 if (xml
.NodeType
== XmlNodeType
.EndElement
) break;
264 if (xml
.NodeType
== XmlNodeType
.Whitespace
) continue;
265 if (xml
.NodeType
== XmlNodeType
.Comment
) continue;
266 if (xml
.NodeType
== XmlNodeType
.ProcessingInstruction
) continue;
267 OnError("Content is not allowed within a property with a rdf:nodeID or rdf:resource attribute");
270 } else if (parseType
!= null && parseType
== "Literal") {
271 if (datatype
== null)
272 datatype
= "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral";
274 if (ParsePropertyAttributes(new Entity(null)))
275 OnError("Property attributes are not valid when parseType is Literal");
277 objct
= new Literal(xml
.ReadInnerXml(), null, datatype
);
279 } else if (parseType
!= null && parseType
== "Resource") {
280 objct
= new Entity(null);
282 ParsePropertyAttributes((Entity
)objct
);
283 if (!xml
.IsEmptyElement
)
284 ParsePropertyNodes((Entity
)objct
);
286 } else if (parseType
!= null && parseType
== "Collection") {
287 Entity collection
= new Entity(null);
288 Entity lastnode
= collection
;
291 ParsePropertyAttributes(collection
);
293 if (!xml
.IsEmptyElement
)
295 if (xml
.NodeType
== XmlNodeType
.EndElement
) break;
296 if (xml
.NodeType
!= XmlNodeType
.Element
) continue;
299 Entity next
= new Entity(null);
300 storage
.Add(new Statement(lastnode
, rdfRest
, next
, Meta
));
304 Entity item
= ParseDescription();
305 storage
.Add(new Statement(lastnode
, rdfFirst
, item
, Meta
));
310 storage
.Add(new Statement(lastnode
, rdfRest
, rdfNil
, Meta
));
317 } else if (datatype
!= null) {
318 // Forces even xml content to be read as in parseType=Literal?
319 // Note that any xml:lang is discarded.
321 if (ParsePropertyAttributes(new Entity(null)))
322 OnError("Property attributes are not valid when a data type is given");
324 objct
= new Literal(xml
.ReadInnerXml(), null, datatype
);
327 // We don't know whether the contents of this element
328 // refer to a literal or an entity. If an element is
329 // a child of this node, then it must be an entity.
330 // If the property has predicate attributes, then it
331 // is an anonymous entity. Otherwise the text content
332 // is the literal value.
334 objct
= new Entity(null);
335 if (ParsePropertyAttributes((Entity
)objct
)) {
336 // Found property attributes. There should be no other internal content?
338 if (!xml
.IsEmptyElement
)
340 if (xml
.NodeType
== XmlNodeType
.EndElement
) break;
341 if (xml
.NodeType
== XmlNodeType
.Whitespace
) continue;
342 if (xml
.NodeType
== XmlNodeType
.Comment
) continue;
343 if (xml
.NodeType
== XmlNodeType
.ProcessingInstruction
) continue;
344 OnError(xml
.NodeType
+ " is not allowed within a property with property attributes");
348 StringBuilder textcontent
= new StringBuilder();
349 bool hadText
= false;
350 bool hadElement
= false;
352 if (!xml
.IsEmptyElement
)
354 if (xml
.NodeType
== XmlNodeType
.EndElement
) break;
355 if (xml
.NodeType
== XmlNodeType
.Element
) {
357 OnError("Both text and elements are present as a property value");
360 objct
= ParseDescription();
361 } else if (xml
.NodeType
== XmlNodeType
.Text
|| xml
.NodeType
== XmlNodeType
.SignificantWhitespace
) {
363 OnError("Both text and elements are present as a property value");
364 textcontent
.Append(xml
.Value
);
367 textcontent
.Append(xml
.Value
);
372 objct
= new Literal(textcontent
.ToString(), lang
, null);
376 storage
.Add(new Statement(subject
, predicate
, objct
, Meta
));
379 // In addition to adding the statement as normal, also
380 // add a reified statement.
381 Entity statement
= GetNamedNode(Unrelativize("#" + ID
));;
382 storage
.Add(new Statement(statement
, rdfType
, rdfStatement
, Meta
));
383 storage
.Add(new Statement(statement
, rdfSubject
, subject
, Meta
));
384 storage
.Add(new Statement(statement
, rdfPredicate
, (Entity
)predicate
, Meta
));
385 storage
.Add(new Statement(statement
, rdfObject
, objct
, Meta
));
389 private void OnError(string message
) {
390 if (xml
is IXmlLineInfo
&& ((IXmlLineInfo
)xml
).HasLineInfo()) {
391 IXmlLineInfo line
= (IXmlLineInfo
)xml
;
392 message
+= ", line " + line
.LineNumber
+ " col " + line
.LinePosition
;
394 throw new ParserException(message
);