configure.in, AssemblyInfo.cs: For those unfortunate earthlings without libchm, libwv...
[beagle.git] / Util / SemWeb / Sparql.cs
blob49e7a0f553a8dfa400819fc12160c554caf71b7d
1 using System;
2 using System.Collections;
3 using System.IO;
4 using System.Text;
6 using SemWeb;
7 using SemWeb.IO;
8 using SemWeb.Stores;
10 namespace SemWeb.Query {
12 public class SparqlParser {
13 MyReader reader;
15 NamespaceManager nsmgr = new NamespaceManager();
17 string baseuri = null;
18 SparqlQuestion question;
20 ArrayList variables = new ArrayList();
21 ArrayList selectvariables = new ArrayList();
22 bool selectdistinct = false;
23 bool selectall = false;
25 MemoryStore graph = new MemoryStore();
27 public SparqlParser(TextReader reader) {
28 this.reader = new MyReader(reader);
30 Parse();
33 public string BaseUri { get { return baseuri; } }
35 public SparqlQuestion Question { get { return question; } }
37 public bool Distinct { get { return selectdistinct; } }
39 public bool All { get { return selectall; } }
41 public Store Graph { get { return graph; } }
43 public IList Variables { get { return ArrayList.ReadOnly(variables); } }
45 public IList Select { get { return ArrayList.ReadOnly(selectvariables); } }
47 public enum SparqlQuestion {
48 Select,
49 Construct,
50 Describe,
51 Ask
54 public QueryEngine CreateQuery() {
55 QueryEngine query = new QueryEngine();
57 foreach (string var in variables) {
58 query.Select(new Entity(var));
61 foreach (Statement s in graph)
62 query.AddFilter(s);
64 //graph.Select(new N3Writer(Console.Out));
66 return query;
69 private void ReadWhitespace() {
70 while (true) {
71 while (char.IsWhiteSpace((char)reader.Peek()))
72 reader.Read();
74 if (reader.Peek() == '#') {
75 while (true) {
76 int c = reader.Read();
77 if (c == -1 || c == 10 || c == 13) break;
79 continue;
82 break;
86 private string ReadToken() {
87 ReadWhitespace();
89 StringBuilder b = new StringBuilder();
90 while (reader.Peek() != -1 && char.IsLetter((char)reader.Peek()))
91 b.Append((char)reader.Read());
93 return b.ToString();
96 private string ReadName() {
97 ReadWhitespace();
99 StringBuilder b = new StringBuilder();
100 while (true) {
101 int c = reader.Peek();
102 if (c == -1 || char.IsWhiteSpace((char)c)
103 || (!char.IsLetterOrDigit((char)c) && c != '?' && c != '$' && c != '-' && c != '_')) break;
104 c = reader.Read();
105 b.Append((char)c);
107 return b.ToString();
110 private string ReadQuotedUri() {
111 ReadWhitespace();
112 Location loc = reader.Location;
114 int open = reader.Read();
115 if (open != '<')
116 OnError("Expecting a quoted URI starting with a '<'", loc);
118 StringBuilder b = new StringBuilder();
119 while (true) {
120 int c = reader.Read();
122 if (c == -1)
123 OnError("End of file while reading a URI", loc);
124 if (c == '>')
125 break;
126 if (char.IsWhiteSpace((char)c))
127 OnError("White space cannot appear in a URI", loc);
129 b.Append((char)c);
132 return b.ToString();
135 private string ReadQNamePrefix() {
136 ReadWhitespace();
137 Location loc = reader.Location;
139 StringBuilder b = new StringBuilder();
140 while (true) {
141 int c = reader.Read();
143 if (c == -1)
144 OnError("End of file while reading a QName prefix", loc);
145 if (char.IsWhiteSpace((char)c))
146 OnError("Expecting a colon, " + b, loc);
148 b.Append((char)c);
150 if (c == ':')
151 break;
154 return b.ToString();
157 private string ReadUri() {
158 ReadWhitespace();
159 if (reader.Peek() == '<')
160 return ReadQuotedUri();
162 Location loc = reader.Location;
163 string prefix = ReadQNamePrefix();
164 string localname = ReadName();
166 if (prefix == ":") {
167 if (baseuri == null)
168 OnError("No BASE URI was specified", loc);
169 return baseuri + localname;
172 return nsmgr.Resolve(prefix + localname);
175 private string ReadVarOrUri() {
176 ReadWhitespace();
178 if (reader.Peek() == '?' || reader.Peek() == '$')
179 return ReadName();
181 return ReadUri();
184 private object ReadNumber() {
185 ReadWhitespace();
186 Location loc = reader.Location;
187 string num = ReadName();
188 try {
189 if (num.StartsWith("0x") || num.StartsWith("0X"))
190 return int.Parse(num.Substring(2), System.Globalization.NumberStyles.AllowHexSpecifier);
191 if (num.IndexOf(".") >= 0 || num.IndexOf("e") >= 0 || num.IndexOf("E") >= 0)
192 return decimal.Parse(num);
193 return long.Parse(num);
194 } catch (Exception e) {
195 OnError("Invalid number: " + num, loc);
196 return null;
200 private string ReadLiteralText() {
201 char quotechar = (char)reader.Read();
203 StringBuilder b = new StringBuilder();
205 bool escaped = false;
206 while (true) {
207 Location loc = reader.Location;
208 int c = reader.Read();
209 if (c == -1)
210 OnError("End of file while reading a text literal", loc);
212 if (escaped) {
213 switch ((char)c) {
214 case 'n': b.Append('\n'); break;
215 case 'r': b.Append('\r'); break;
216 default: b.Append(c); break;
218 escaped = false;
219 } else if (c == '\\') {
220 escaped = true;
221 } else if (c == quotechar) {
222 break;
223 } else {
224 b.Append((char)c);
228 return b.ToString();
231 private object ReadLiteral() {
232 ReadWhitespace();
233 int firstchar = reader.Peek();
234 if (firstchar == -1) OnError("End of file expecting a literal", reader.Location);
236 if (char.IsDigit((char)firstchar) || firstchar == '.')
237 return new Literal(ReadNumber().ToString(), null, null);
239 if (firstchar == '\'' || firstchar == '\"') {
240 string text = ReadLiteralText();
241 string lang = null, datatype = null;
242 if (reader.Peek() == '@') {
243 reader.Read();
244 lang = ReadName();
246 if (reader.Peek() == '^') {
247 reader.Read();
248 reader.Read();
249 datatype = ReadUri();
251 return new Literal(text, lang, datatype);
254 return ReadVarOrUri();
257 private Entity GetEntity(string name) {
258 if (name[0] == '?' || name[0] == '$') {
259 if (!variables.Contains(name))
260 variables.Add(name);
263 return new Entity(name);
266 private int ReadPatternGroup() {
267 //bool nextoptional = false;
269 while (true) {
270 ReadWhitespace();
271 Location loc = reader.Location;
273 int next = reader.Peek();
274 if (next == -1)
275 return -1;
277 if (next == 'U') {
278 string union = ReadToken();
279 if (union != "UNION")
280 OnError("Expecting UNION", loc);
282 OnError("UNION is not supported", loc);
284 } else if (next == 'G') {
285 string graph = ReadToken();
286 if (graph != "GRAPH")
287 OnError("Expecting GRAPH", loc);
289 OnError("GRAPH is not supported", loc);
291 } else if (next == 'O') {
292 string optional = ReadToken();
293 if (optional != "OPTIONAL")
294 OnError("Expecting OPTIONAL", loc);
296 OnError("OPTIONAL is not supported", loc);
298 //nextoptional = true;
299 continue;
301 } else if (next == 'A') {
302 string and = ReadToken();
303 if (and != "AND")
304 OnError("Expecting AND", loc);
306 loc = reader.Location;
307 OnError("Expressions are not supported", loc);
309 } else if (next == '(') {
310 // Triple Pattern
311 reader.Read(); // open paren
312 string subj = ReadVarOrUri();
313 string pred = ReadVarOrUri();
314 object obj = ReadLiteral();
316 ReadWhitespace();
317 loc = reader.Location;
318 int close = reader.Read();
319 if (close != ')')
320 OnError("Expecting close parenthesis: " + (char)close, loc);
322 graph.Add( new Statement (
323 GetEntity(subj),
324 GetEntity(pred),
325 obj is string ? (Resource)GetEntity((string)obj) : (Resource)obj
326 ) );
328 } else if (next == '{') {
329 ReadPatternGroup();
331 } else if (next == '}') {
332 return next;
334 } else {
335 return -1;
338 //nextoptional = false;
343 enum ReadState {
344 StartOfProlog,
345 Prolog,
346 Limits
349 private void Parse() {
350 ReadState state = ReadState.StartOfProlog;
352 while (true) {
353 Location loc = reader.Location;
354 string clause = ReadToken();
356 switch (clause) {
357 case "":
358 if (state <= ReadState.Prolog)
359 OnError("No query was given", loc);
360 return;
362 case "BASE":
363 if (state != ReadState.StartOfProlog)
364 OnError("BASE must be the first clause", loc);
365 baseuri = ReadQuotedUri();
366 state = ReadState.Prolog;
367 break;
369 case "PREFIX":
370 if (state > ReadState.Prolog)
371 OnError("PREFIX must occur in the prolog", loc);
372 string prefix = ReadQNamePrefix();
373 string uri = ReadQuotedUri();
374 nsmgr.AddNamespace(uri, prefix.Substring(0, prefix.Length-1)); // strip trailing ':'
375 state = ReadState.Prolog;
376 break;
378 case "SELECT":
379 if (state > ReadState.Prolog)
380 OnError("SELECT cannot occur here", loc);
381 state = ReadState.Limits;
383 question = SparqlQuestion.Select;
384 while (true) {
385 ReadWhitespace();
386 loc = reader.Location;
387 int next = reader.Peek();
388 if (next == ',') { reader.Read(); continue; }
389 if (next != 'D' && next != '?' && next != '$' && next != '*')
390 break;
391 string var = ReadName();
392 if (var == "DISTINCT") {
393 selectdistinct = true;
394 } else if (var[0] == '?' || var[0] == '$') {
395 if (selectall)
396 OnError("Cannot select * and also name other variables", loc);
397 if (!selectvariables.Contains(var))
398 selectvariables.Add(var);
399 } else if (var == "*") {
400 if (selectvariables.Count > 0)
401 OnError("Cannot select * and also name other variables", loc);
402 selectall = true;
403 break;
404 } else {
405 OnError("Invalid variable: " + var, loc);
409 break;
411 case "DESCRIBE":
412 if (state > ReadState.Prolog)
413 OnError("DESCRIBE cannot occur here", loc);
414 state = ReadState.Limits;
415 OnError("DESCRIBE is not supported", loc);
417 question = SparqlQuestion.Describe;
418 break;
420 case "CONSTRUCT":
421 if (state > ReadState.Prolog)
422 OnError("CONSTRUCT cannot occur here", loc);
423 state = ReadState.Limits;
424 OnError("CONSTRUCT is not supported", loc);
426 question = SparqlQuestion.Construct;
427 break;
429 case "ASK":
430 if (state > ReadState.Prolog)
431 OnError("ASK cannot occur here", loc);
432 state = ReadState.Limits;
434 question = SparqlQuestion.Ask;
435 break;
437 case "WITH":
438 case "FROM":
439 if (state != ReadState.Limits)
440 OnError("WITH cannot occur here", loc);
441 OnError("WITH is not supported", loc);
442 break;
444 case "WHERE":
445 if (state != ReadState.Limits)
446 OnError("WHERE cannot occur here", loc);
447 ReadPatternGroup();
448 break;
453 private void OnError(string message, Location position) {
454 throw new ParserException(message + ", line " + position.Line + " col " + position.Col);