2 using System
.Collections
;
11 public class N3Reader
: RdfReader
{
12 Resource PrefixResource
= new Literal("@prefix");
13 Resource KeywordsResource
= new Literal("@keywords");
15 TextReader sourcestream
;
16 NamespaceManager namespaces
= new NamespaceManager();
18 Entity entRDFTYPE
= "http://www.w3.org/1999/02/22-rdf-syntax-ns#type";
19 Entity entRDFFIRST
= "http://www.w3.org/1999/02/22-rdf-syntax-ns#first";
20 Entity entRDFREST
= "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest";
21 Entity entRDFNIL
= "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil";
22 //Entity entOWLSAMEAS = "http://www.w3.org/2002/07/owl#sameAs";
23 Entity entDAMLEQUIV
= "http://www.daml.org/2000/12/daml+oil#equivalentTo";
24 Entity entLOGIMPLIES
= "http://www.w3.org/2000/10/swap/log#implies";
26 public N3Reader(TextReader source
) {
27 this.sourcestream
= source
;
30 public N3Reader(string sourcefile
) {
31 this.sourcestream
= GetReader(sourcefile
);
32 BaseUri
= "file:" + sourcefile
+ "#";
35 private struct ParseContext
{
36 public MyReader source
;
37 public StatementSink store
;
38 public NamespaceManager namespaces
;
39 public UriMap namedNode
;
40 public Hashtable anonymous
;
42 public bool UsingKeywords
;
43 public Hashtable Keywords
;
45 public Location Location { get { return new Location(source.Line, source.Col); }
}
48 public override void Select(StatementSink store
) {
49 ParseContext context
= new ParseContext();
50 context
.source
= new MyReader(sourcestream
);
51 context
.store
= GetDupCheckSink(store
);
52 context
.namespaces
= namespaces
;
53 context
.namedNode
= new UriMap();
54 context
.anonymous
= new Hashtable();
57 while (ReadStatement(context
)) { }
60 private bool ReadStatement(ParseContext context
) {
61 Location loc
= context
.Location
;
64 Resource subject
= ReadResource(context
, out reverse
);
65 if (subject
== null) return false;
66 if (reverse
) OnError("is...of not allowed on a subject", loc
);
68 if ((object)subject
== (object)PrefixResource
) {
69 loc
= context
.Location
;
70 string qname
= ReadToken(context
.source
, context
) as string;
71 if (qname
== null || !qname
.EndsWith(":")) OnError("When using @prefix, the prefix identifier must end with a colon", loc
);
73 loc
= context
.Location
;
74 Resource uri
= ReadResource(context
, out reverse
);
75 if (uri
== null) OnError("Expecting a URI", loc
);
76 if (reverse
) OnError("is...of not allowed here", loc
);
77 namespaces
.AddNamespace(uri
.Uri
, qname
.Substring(0, qname
.Length
-1));
79 loc
= context
.Location
;
80 char punc
= ReadPunc(context
.source
);
82 OnError("Expected a period but found '" + punc
+ "'", loc
);
86 if ((object)subject
== (object)KeywordsResource
) {
87 context
.UsingKeywords
= true;
88 context
.Keywords
= new Hashtable();
90 ReadWhitespace(context
.source
);
91 if (context
.source
.Peek() == '.') {
92 context
.source
.Read();
96 loc
= context
.Location
;
97 string tok
= ReadToken(context
.source
, context
) as string;
99 OnError("Expecting keyword names", loc
);
101 context
.Keywords
[tok
] = tok
;
106 // It's possible to just assert the presence of an entity
107 // by following the entity with a period, or a } to end
108 // a reified context.
109 if (NextPunc(context
.source
) == '.') {
110 context
.source
.Read();
113 if (NextPunc(context
.source
) == '}') {
114 context
.source
.Read();
115 return false; // end of block
118 // Read the predicates for this subject.
119 char period
= ReadPredicates(subject
, context
);
120 loc
= context
.Location
;
121 if (period
!= '.' && period
!= '}')
122 OnError("Expected a period but found '" + period
+ "'", loc
);
123 if (period
== '}') return false;
127 private char ReadPredicates(Resource subject
, ParseContext context
) {
128 char punctuation
= ';';
129 while (punctuation
== ';')
130 punctuation
= ReadPredicate(subject
, context
);
134 private char ReadPredicate(Resource subject
, ParseContext context
) {
136 Location loc
= context
.Location
;
137 Resource predicate
= ReadResource(context
, out reverse
);
138 if (predicate
== null) OnError("Expecting a predicate", loc
);
139 if (predicate
is Literal
) OnError("Predicates cannot be literals", loc
);
141 char punctuation
= ',';
142 while (punctuation
== ',') {
143 ReadObject(subject
, (Entity
)predicate
, context
, reverse
);
144 loc
= context
.Location
;
145 punctuation
= ReadPunc(context
.source
);
147 if (punctuation
!= '.' && punctuation
!= ';' && punctuation
!= ']' && punctuation
!= '}')
148 OnError("Expecting a period, semicolon, comma, or close-bracket but found '" + punctuation
+ "'", loc
);
153 private void ReadObject(Resource subject
, Entity predicate
, ParseContext context
, bool reverse
) {
155 Location loc
= context
.Location
;
156 Resource
value = ReadResource(context
, out reverse2
);
157 if (value == null) OnError("Expecting a resource or literal object", loc
);
158 if (reverse2
) OnError("is...of not allowed on objects", loc
);
160 loc
= context
.Location
;
162 if (subject
is Literal
) OnError("Subjects of statements cannot be literals", loc
);
163 Add(context
.store
, new Statement((Entity
)subject
, predicate
, value, context
.meta
), loc
);
165 if (value is Literal
) OnError("A literal cannot be the object of a reverse-predicate statement", loc
);
166 Add(context
.store
, new Statement((Entity
)value, predicate
, subject
, context
.meta
), loc
);
170 private void ReadWhitespace(MyReader source
) {
172 while (char.IsWhiteSpace((char)source
.Peek()))
175 if (source
.Peek() == '#') {
177 int c
= source
.Read();
178 if (c
== -1 || c
== 10 || c
== 13) break;
187 private char ReadPunc(MyReader source
) {
188 ReadWhitespace(source
);
189 int c
= source
.Read();
191 OnError("End of file expecting punctuation", new Location(source
.Line
, source
.Col
));
195 private int NextPunc(MyReader source
) {
196 ReadWhitespace(source
);
197 return source
.Peek();
200 private void ReadEscapedChar(char c
, StringBuilder b
, MyReader source
, Location loc
) {
201 if (c
== 'n') b
.Append('\n');
202 else if (c
== 'r') b
.Append('\r');
203 else if (c
== 't') b
.Append('\t');
204 else if (c
== '\\') b
.Append('\\');
205 else if (c
== '"') b
.Append('"');
206 else if (c
== '\'') b
.Append('\'');
207 else if (c
== 'a') b
.Append('\a');
208 else if (c
== 'b') b
.Append('\b');
209 else if (c
== 'f') b
.Append('\f');
210 else if (c
== 'v') b
.Append('\v');
211 else if (c
== '\n') { }
212 else if (c
== '\r') { }
213 else if (c
== 'u' || c
== 'U') {
214 StringBuilder num
= new StringBuilder();
216 num
.Append((char)source
.Read()); // four hex digits
217 num
.Append((char)source
.Read());
218 num
.Append((char)source
.Read());
219 num
.Append((char)source
.Read());
221 source
.Read(); // two zeros
223 num
.Append((char)source
.Read()); // six hex digits
224 num
.Append((char)source
.Read());
225 num
.Append((char)source
.Read());
226 num
.Append((char)source
.Read());
227 num
.Append((char)source
.Read());
228 num
.Append((char)source
.Read());
231 int unicode
= int.Parse(num
.ToString(), System
.Globalization
.NumberStyles
.AllowHexSpecifier
);
232 b
.Append((char)unicode
); // is this correct?
234 } else if (char.IsDigit((char)c
) || c
== 'x')
235 OnError("Octal and hex byte-value escapes are deprecated and not supported", loc
);
237 OnError("Unrecognized escape character: " + (char)c
, loc
);
240 private StringBuilder readTokenBuffer
= new StringBuilder();
242 private object ReadToken(MyReader source
, ParseContext context
) {
243 ReadWhitespace(source
);
245 Location loc
= new Location(source
.Line
, source
.Col
);
247 int firstchar
= source
.Read();
251 StringBuilder b
= readTokenBuffer
; readTokenBuffer
.Length
= 0;
252 b
.Append((char)firstchar
);
254 if (firstchar
== '<') {
255 // This is a URI or the <= verb. URIs can be escaped like strings, at least in the NTriples spec.
256 bool escaped
= false;
258 int c
= source
.Read();
259 if (c
== -1) OnError("Unexpected end of stream within a token beginning with <", loc
);
261 if (b
.Length
== 2 && c
== '=')
262 return "<="; // the <= verb
265 ReadEscapedChar((char)c
, b
, source
, loc
);
267 } else if (c
== '\\') {
271 if (c
== '>') // end of the URI
276 } else if (firstchar
== '"') {
277 // A string: ("""[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*""")|("[^"\\]*(?:\\.[^"\\]*)*")
278 // What kind of crazy regex is this??
279 b
.Length
= 0; // get rid of the open quote
280 bool escaped
= false;
281 bool triplequoted
= false;
283 int c
= source
.Read();
284 if (c
== -1) OnError("Unexpected end of stream within a string", loc
);
286 if (b
.Length
== 0 && c
== (int)'"' && source
.Peek() == (int)'"') {
292 if (!escaped
&& c
== '\\')
295 ReadEscapedChar((char)c
, b
, source
, loc
);
298 if (c
== '"' && !triplequoted
)
300 if (c
== '"' && source
.Peek() == '"' && source
.Peek2() == '"' && triplequoted
)
306 if (triplequoted
) { // read the extra end quotes
311 string litvalue
= b
.ToString();
312 string litlang
= null;
315 // Strings can be suffixed with @langcode or ^^symbol (but not both?).
316 if (source
.Peek() == '@') {
319 while (char.IsLetterOrDigit((char)source
.Peek()) || source
.Peek() == (int)'-')
320 b
.Append((char)source
.Read());
321 litlang
= b
.ToString();
322 } else if (source
.Peek() == '^' && source
.Peek2() == '^') {
323 loc
= new Location(source
.Line
, source
.Col
);
326 litdt
= ReadToken(source
, context
).ToString(); // better be a string URI
327 if (litdt
.StartsWith("<") && litdt
.EndsWith(">"))
328 litdt
= litdt
.Substring(1, litdt
.Length
-2);
329 else if (litdt
.IndexOf(":") != -1) {
330 Resource r
= ResolveQName(litdt
, context
, loc
);
332 OnError("A literal datatype cannot be an anonymous entity", loc
);
337 return new Literal(litvalue
, litlang
, litdt
);
339 } else if (char.IsLetter((char)firstchar
) || firstchar
== '?' || firstchar
== '@' || firstchar
== ':' || firstchar
== '_') {
340 // Something starting with @
341 // A QName: ([a-zA-Z_][a-zA-Z0-9_]*)?:)?([a-zA-Z_][a-zA-Z0-9_]*)?
342 // A variable: \?[a-zA-Z_][a-zA-Z0-9_]*
344 int c
= source
.Peek();
345 if (c
== -1 || (!char.IsLetterOrDigit((char)c
) && c
!= '-' && c
!= '_' && c
!= ':')) break;
346 b
.Append((char)source
.Read());
349 } else if (char.IsDigit((char)firstchar
) || firstchar
== '+' || firstchar
== '-') {
351 int ci
= source
.Peek();
354 // punctuation followed by a space means the punctuation is
355 // punctuation, and not part of this token
356 if (!char.IsDigit((char)ci
) && source
.Peek2() != -1 && char.IsWhiteSpace((char)source
.Peek2()))
360 if (char.IsWhiteSpace(c
)) break;
362 b
.Append((char)source
.Read());
365 } else if (firstchar
== '=') {
366 if (source
.Peek() == (int)'>')
367 b
.Append((char)source
.Read());
369 } else if (firstchar
== '[') {
370 // The start of an anonymous node.
372 } else if (firstchar
== '{') {
375 } else if (firstchar
== '(') {
377 } else if (firstchar
== ')') {
382 int c
= source
.Read();
384 if (char.IsWhiteSpace((char)c
)) break;
387 OnError("Invalid token: " + b
.ToString(), loc
);
393 private Resource
ReadResource(ParseContext context
, out bool reverse
) {
394 Location loc
= context
.Location
;
396 Resource res
= ReadResource2(context
, out reverse
);
398 ReadWhitespace(context
.source
);
399 while (context
.source
.Peek() == '!' || context
.source
.Peek() == '^' || (context
.source
.Peek() == '.' && context
.source
.Peek2() != -1 && char.IsLetter((char)context
.source
.Peek2())) ) {
400 int pathType
= context
.source
.Read();
403 loc
= context
.Location
;
404 Resource path
= ReadResource2(context
, out reverse2
);
405 if (reverse
|| reverse2
) OnError("is...of is not allowed in path expressions", loc
);
406 if (!(path
is Entity
)) OnError("A path expression cannot be a literal", loc
);
408 Entity anon
= new Entity(null);
411 if (pathType
== '!' || pathType
== '.') {
412 if (!(res
is Entity
)) OnError("A path expression cannot contain a literal: " + res
, loc
);
413 s
= new Statement((Entity
)res
, (Entity
)path
, anon
, context
.meta
);
415 s
= new Statement(anon
, (Entity
)path
, res
, context
.meta
);
418 Add(context
.store
, s
, loc
);
422 ReadWhitespace(context
.source
);
428 private Entity
GetResource(ParseContext context
, string uri
) {
430 return new Entity(uri
);
432 Entity ret
= (Entity
)context
.namedNode
[uri
];
433 if (ret
!= null) return ret
;
434 ret
= new Entity(uri
);
435 context
.namedNode
[uri
] = ret
;
439 private Resource
ResolveQName(string str
, ParseContext context
, Location loc
) {
440 int colon
= str
.IndexOf(":");
441 string prefix
= str
.Substring(0, colon
);
443 Resource ret
= (Resource
)context
.anonymous
[str
];
445 ret
= new Entity(null);
446 context
.anonymous
[str
] = ret
;
449 } else if (prefix
== "") {
450 return GetResource(context
, (BaseUri
== null ? "" : BaseUri
) + str
.Substring(colon
+1));
452 string ns
= context
.namespaces
.GetNamespace(prefix
);
454 OnError("Prefix is undefined: " + str
, loc
);
455 return GetResource(context
, ns
+ str
.Substring(colon
+1));
459 private Resource
ReadResource2(ParseContext context
, out bool reverse
) {
462 Location loc
= context
.Location
;
464 object tok
= ReadToken(context
.source
, context
);
468 string str
= (string)tok
;
474 if (str
== "@prefix")
475 return PrefixResource
;
477 if (str
== "@keywords")
478 return KeywordsResource
;
480 if (context
.UsingKeywords
&& context
.Keywords
.Contains(str
))
482 if (!context
.UsingKeywords
&&
483 ( str
== "a" || str
== "has" || str
== "is"))
487 // TODO: Turn these off with @keywords
495 return entLOGIMPLIES
;
498 return entLOGIMPLIES
;
501 if (str
== "@has") // ignore this token
502 return ReadResource2(context
, out reverse
);
507 Resource pred
= ReadResource2(context
, out reversetemp
);
510 string of
= ReadToken(context
.source
, context
) as string;
511 if (of
== null) OnError("End of stream while expecting 'of'", loc
);
513 || (!context
.UsingKeywords
&& of
== "of")
514 || (context
.UsingKeywords
&& context
.Keywords
.Contains("of") && of
== "of"))
516 OnError("Expecting token 'of' but found '" + of
+ "'", loc
);
517 return null; // unreachable
520 if (str
.StartsWith("@"))
521 OnError("The " + str
+ " directive is not supported", loc
);
525 if (str
.StartsWith("<") && str
.EndsWith(">")) {
526 string uri
= GetAbsoluteUri(BaseUri
, str
.Substring(1, str
.Length
-2));
527 return GetResource(context
, uri
);
533 string uri
= str
.Substring(1);
536 Entity
var = GetResource(context
, uri
);
543 if (str
.IndexOf(":") != -1)
544 return ResolveQName(str
, context
, loc
);
549 Entity ret
= new Entity(null);
550 ReadWhitespace(context
.source
);
551 if (context
.source
.Peek() != ']') {
552 char bracket
= ReadPredicates(ret
, context
);
554 bracket
= ReadPunc(context
.source
);
556 OnError("Expected a close bracket but found '" + bracket
+ "'", loc
);
558 context
.source
.Read();
570 Resource res
= ReadResource(context
, out rev2
);
575 ent
= new Entity(null);
577 Entity sub
= new Entity(null);
578 Add(context
.store
, new Statement(ent
, entRDFREST
, sub
, context
.meta
), loc
);
582 Add(context
.store
, new Statement(ent
, entRDFFIRST
, res
, context
.meta
), loc
);
584 if (ent
== null) // No list items.
585 ent
= entRDFNIL
; // according to Turtle spec
587 Add(context
.store
, new Statement(ent
, entRDFREST
, entRDFNIL
, context
.meta
), loc
);
592 return null; // Should I use a more precise end-of-list return value?
598 ParseContext newcontext
= context
;
599 newcontext
.meta
= new Entity(null);
600 while (NextPunc(context
.source
) != '}' && ReadStatement(newcontext
)) { }
601 ReadWhitespace(context
.source
);
602 if (context
.source
.Peek() == '}') context
.source
.Read();
603 return newcontext
.meta
;
608 // In Turtle, numbers are restricted to [0-9]+, and are datatyped xsd:integer.
610 if (double.TryParse(str
, System
.Globalization
.NumberStyles
.Any
, null, out numval
))
611 return new Literal(numval
.ToString());
613 // If @keywords is used, alphanumerics that aren't keywords
614 // are local names in the default namespace.
615 if (context
.UsingKeywords
&& char.IsLetter(str
[0])) {
617 OnError("The document contains an unqualified name but no BaseUri was specified: \"" + str
+ "\"", loc
);
618 return GetResource(context
, BaseUri
+ str
);
623 OnError("Invalid token: " + str
, loc
);
627 private void Add(StatementSink store
, Statement statement
, Location position
) {
629 store
.Add(statement
);
630 } catch (Exception e
) {
631 OnError("Add failed on statement { " + statement + " }: " + e
.Message
, position
, e
);
635 private void OnError(string message
, Location position
) {
636 throw new ParserException(message
+ ", line " + position
.Line
+ " col " + position
.Col
);
638 private void OnError(string message
, Location position
, Exception cause
) {
639 throw new ParserException(message
+ ", line " + position
.Line
+ " col " + position
.Col
, cause
);
645 internal class MyReader
{
647 public MyReader(TextReader reader
) { r = reader; }
652 int[] peeked
= new int[2];
655 public Location Location { get { return new Location(Line, Col); }
}
658 if (peekCount
== 0) {
659 peeked
[0] = r
.Read();
667 if (peekCount
== 1) {
668 peeked
[1] = r
.Read();
679 peeked
[0] = peeked
[1];
685 if (c
== '\n') { Line++; Col = 0; }
692 internal struct Location
{
693 public readonly int Line
, Col
;
694 public Location(int line
, int col
) { Line = line; Col = col; }