Yet another. Init the gobject type system.
[beagle.git] / Util / SemWeb / SQLStore.cs
blobe30d93bf8bbe8d778446e1e24c845b4af8332050
1 using System;
2 using System.Collections;
3 using System.Data;
4 using System.IO;
5 using System.Text;
7 using SemWeb.Util;
9 namespace SemWeb.Stores {
10 // TODO: It's not safe to have two concurrent accesses to the same database
11 // because the creation of new entities will use the same IDs.
13 public abstract class SQLStore : Store {
14 string table;
16 bool firstUse = true;
17 IDictionary lockedIdCache = null;
18 int cachedNextId = -1;
20 Hashtable literalCache = new Hashtable();
21 int literalCacheSize = 0;
23 bool Debug = false;
25 StringBuilder cmdBuffer = new StringBuilder();
27 // Buffer statements to process together.
28 ArrayList addStatementBuffer = null;
30 string INSERT_INTO_LITERALS_VALUES,
31 INSERT_INTO_STATEMENTS_VALUES,
32 INSERT_INTO_ENTITIES_VALUES;
34 private class ResourceKey {
35 public int ResId;
37 public ResourceKey(int id) { ResId = id; }
39 public override int GetHashCode() { return ResId; }
40 public override bool Equals(object other) { return (other is ResourceKey) && ((ResourceKey)other).ResId == ResId; }
43 private static readonly string[] fourcols = new string[] { "subject", "predicate", "object", "meta" };
44 private static readonly string[] predcol = new string[] { "predicate" };
46 protected SQLStore(string table) {
47 this.table = table;
49 INSERT_INTO_LITERALS_VALUES = "INSERT INTO " + table + "_literals VALUES ";
50 INSERT_INTO_STATEMENTS_VALUES = "INSERT INTO " + table + "_statements VALUES ";
51 INSERT_INTO_ENTITIES_VALUES = "INSERT INTO " + table + "_entities VALUES ";
54 protected string TableName { get { return table; } }
56 protected abstract bool SupportsInsertCombined { get; }
57 protected abstract bool SupportsUseIndex { get; }
58 protected virtual bool SupportsFastJoin { get { return true; } }
60 protected abstract string CreateNullTest(string column);
62 private void Init() {
63 if (!firstUse) return;
64 firstUse = false;
66 CreateTable();
67 CreateIndexes();
70 public override int StatementCount { get { Init(); RunAddBuffer(); return RunScalarInt("select count(subject) from " + table + "_statements", 0); } }
72 private int NextId() {
73 if (lockedIdCache != null && cachedNextId != -1)
74 return ++cachedNextId;
76 RunAddBuffer();
78 // The 0 id is not used.
79 // The 1 id is reserved for Statement.DefaultMeta.
80 int nextid = 2;
82 CheckMax("select max(subject) from " + table + "_statements", ref nextid);
83 CheckMax("select max(predicate) from " + table + "_statements", ref nextid);
84 CheckMax("select max(object) from " + table + "_statements where objecttype=0", ref nextid);
85 CheckMax("select max(meta) from " + table + "_statements", ref nextid);
86 CheckMax("select max(id) from " + table + "_literals", ref nextid);
87 CheckMax("select max(id) from " + table + "_entities", ref nextid);
89 cachedNextId = nextid;
91 return nextid;
94 private void CheckMax(string command, ref int nextid) {
95 int maxid = RunScalarInt(command, 0);
96 if (maxid >= nextid) nextid = maxid + 1;
99 public override void Clear() {
100 Init();
101 if (addStatementBuffer != null) addStatementBuffer.Clear();
102 RunCommand("DELETE FROM " + table + "_statements;");
103 RunCommand("DELETE FROM " + table + "_literals;");
104 RunCommand("DELETE FROM " + table + "_entities;");
107 private int GetLiteralId(Literal literal, bool create, bool cacheIsComplete, StringBuilder buffer, bool insertCombined) {
108 // Returns the literal ID associated with the literal. If a literal
109 // doesn't exist and create is true, a new literal is created,
110 // otherwise 0 is returned.
112 if (literalCache.Count > 0) {
113 object ret = literalCache[literal];
114 if (ret != null) return (int)ret;
117 if (!cacheIsComplete) {
118 StringBuilder b = cmdBuffer; cmdBuffer.Length = 0;
119 b.Append("SELECT id FROM ");
120 b.Append(table);
121 b.Append("_literals WHERE ");
122 WhereLiteral(b, literal);
123 b.Append(" LIMIT 1;");
125 object id = RunScalar(b.ToString());
126 if (id != null) return AsInt(id);
129 if (create) {
130 int id = AddLiteral(literal.Value, literal.Language, literal.DataType, buffer, insertCombined);
131 if (literal.Value.Length < 75) {
132 literalCache[literal] = id;
133 literalCacheSize += literal.Value.Length;
135 if (literalCacheSize > 10000000 + 32*literalCache.Count) {
136 literalCacheSize = 0;
137 literalCache.Clear();
140 return id;
143 return 0;
146 private void WhereLiteral(StringBuilder b, Literal literal) {
147 b.Append("value = ");
148 EscapedAppend(b, literal.Value);
149 //b.Append(" AND BINARY value = ");
150 //EscapedAppend(b, literal.Value);
151 b.Append(" AND ");
152 if (literal.Language != null) {
153 b.Append("language = ");
154 EscapedAppend(b, literal.Language);
155 } else {
156 b.Append(CreateNullTest("language"));
158 b.Append(" AND ");
159 if (literal.DataType != null) {
160 b.Append("datatype = ");
161 EscapedAppend(b, literal.DataType);
162 } else {
163 b.Append(CreateNullTest("datatype"));
167 private int AddLiteral(string value, string language, string datatype, StringBuilder buffer, bool insertCombined) {
168 int id = NextId();
170 StringBuilder b;
171 if (buffer != null) {
172 b = buffer;
173 } else {
174 b = cmdBuffer; cmdBuffer.Length = 0;
177 if (!insertCombined) {
178 b.Append(INSERT_INTO_LITERALS_VALUES);
179 } else {
180 if (b.Length > 0)
181 b.Append(",");
183 b.Append("(");
184 b.Append(id);
185 b.Append(",");
186 EscapedAppend(b, value);
187 b.Append(",");
188 if (language != null)
189 EscapedAppend(b, language);
190 else
191 b.Append("NULL");
192 b.Append(",");
193 if (datatype != null)
194 EscapedAppend(b, datatype);
195 else
196 b.Append("NULL");
197 b.Append(")");
198 if (!insertCombined)
199 b.Append(";");
201 if (buffer == null)
202 RunCommand(b.ToString());
204 return id;
207 private int GetEntityId(string uri, bool create, StringBuilder entityInsertBuffer, bool insertCombined) {
208 // Returns the resource ID associated with the URI. If a resource
209 // doesn't exist and create is true, a new resource is created,
210 // otherwise 0 is returned.
212 int id;
214 if (lockedIdCache != null) {
215 object idobj = lockedIdCache[uri];
216 if (idobj == null && !create) return 0;
217 if (idobj != null) return (int)idobj;
218 } else {
219 StringBuilder cmd = cmdBuffer; cmdBuffer.Length = 0;
220 cmd.Append("SELECT id FROM ");
221 cmd.Append(table);
222 cmd.Append("_entities WHERE value =");
223 EscapedAppend(cmd, uri);
224 cmd.Append(" LIMIT 1;");
225 id = RunScalarInt(cmd.ToString(), 0);
226 if (id != 0 || !create) return id;
229 // If we got here, no such resource exists and create is true.
231 if (uri.Length > 255)
232 throw new NotSupportedException("URIs must be a maximum of 255 characters for this store due to indexing constraints (before MySQL 4.1.2).");
234 id = NextId();
236 StringBuilder b;
237 if (entityInsertBuffer != null) {
238 b = entityInsertBuffer;
239 } else {
240 b = cmdBuffer; cmdBuffer.Length = 0;
243 if (!insertCombined) {
244 b.Append(INSERT_INTO_ENTITIES_VALUES);
245 } else {
246 if (b.Length > 0)
247 b.Append(",");
249 b.Append("(");
250 b.Append(id);
251 b.Append(",");
252 EscapedAppend(b, uri);
253 b.Append(")");
254 if (!insertCombined)
255 b.Append(";");
257 if (entityInsertBuffer == null)
258 RunCommand(b.ToString());
260 // Add it to the URI map
262 if (lockedIdCache != null)
263 lockedIdCache[uri] = id;
265 return id;
268 private int GetResourceId(Resource resource, bool create) {
269 return GetResourceIdBuffer(resource, create, false, null, null, false);
272 private int GetResourceIdBuffer(Resource resource, bool create, bool literalCacheComplete, StringBuilder literalInsertBuffer, StringBuilder entityInsertBuffer, bool insertCombined) {
273 if (resource == null) return 0;
275 if (resource is Literal) {
276 Literal lit = (Literal)resource;
277 return GetLiteralId(lit, create, literalCacheComplete, literalInsertBuffer, insertCombined);
280 if (object.ReferenceEquals(resource, Statement.DefaultMeta))
281 return 1;
283 ResourceKey key = (ResourceKey)GetResourceKey(resource);
284 if (key != null) return key.ResId;
286 int id;
288 if (resource.Uri != null) {
289 id = GetEntityId(resource.Uri, create, entityInsertBuffer, insertCombined);
290 } else {
291 // This anonymous node didn't come from the database
292 // since it didn't have a resource key. If !create,
293 // then just return 0 to signal the resource doesn't exist.
294 if (!create) return 0;
296 if (lockedIdCache != null) {
297 // Can just increment the counter.
298 id = NextId();
299 } else {
300 // We need to reserve an id for this resource so that
301 // this function returns other ids for other anonymous
302 // resources. Don't know how to do this yet, so
303 // just throw an exception.
304 throw new NotImplementedException("Anonymous nodes cannot be added to this store outside of an Import operation.");
308 if (id != 0)
309 SetResourceKey(resource, new ResourceKey(id));
310 return id;
313 private int ObjectType(Resource r) {
314 if (r is Literal) return 1;
315 return 0;
318 private Entity MakeEntity(int resourceId, string uri, Hashtable cache) {
319 if (resourceId == 0)
320 return null;
321 if (resourceId == 1)
322 return Statement.DefaultMeta;
324 ResourceKey rk = new ResourceKey(resourceId);
326 if (cache != null && cache.ContainsKey(rk))
327 return (Entity)cache[rk];
329 Entity ent = new Entity(uri);
331 SetResourceKey(ent, rk);
333 if (cache != null)
334 cache[rk] = ent;
336 return ent;
339 public override void Add(Statement statement) {
340 if (statement.AnyNull) throw new ArgumentNullException();
342 if (addStatementBuffer != null) {
343 addStatementBuffer.Add(statement);
344 if (addStatementBuffer.Count >= 400)
345 RunAddBuffer();
346 return;
349 Init();
351 int subj = GetResourceId(statement.Subject, true);
352 int pred = GetResourceId(statement.Predicate, true);
353 int objtype = ObjectType(statement.Object);
354 int obj = GetResourceId(statement.Object, true);
355 int meta = GetResourceId(statement.Meta, true);
357 StringBuilder addBuffer = cmdBuffer; addBuffer.Length = 0;
359 addBuffer.Append(INSERT_INTO_STATEMENTS_VALUES);
360 addBuffer.Append("(");
362 addBuffer.Append(subj);
363 addBuffer.Append(", ");
364 addBuffer.Append(pred);
365 addBuffer.Append(", ");
366 addBuffer.Append(objtype);
367 addBuffer.Append(", ");
368 addBuffer.Append(obj);
369 addBuffer.Append(", ");
370 addBuffer.Append(meta);
371 addBuffer.Append("); ");
373 RunCommand(addBuffer.ToString());
376 private void RunAddBuffer() {
377 if (addStatementBuffer == null || addStatementBuffer.Count == 0) return;
379 bool insertCombined = SupportsInsertCombined;
381 Init();
383 // Prevent recursion through NextId=>StatementCount
384 ArrayList statements = addStatementBuffer;
385 addStatementBuffer = null;
387 // Prefetch the IDs of all literals that aren't
388 // in the literal map.
389 StringBuilder cmd = new StringBuilder();
390 cmd.Append("SELECT id, value, language, datatype FROM ");
391 cmd.Append(table);
392 cmd.Append("_literals WHERE 0 ");
393 bool hasLiterals = false;
394 foreach (Statement s in statements) {
395 Literal lit = s.Object as Literal;
396 if (lit == null) continue;
398 if (literalCache.ContainsKey(lit))
399 continue;
401 hasLiterals = true;
403 cmd.Append(" or (");
404 WhereLiteral(cmd, lit);
405 cmd.Append(")");
407 if (hasLiterals) {
408 cmd.Append(";");
409 IDataReader reader = RunReader(cmd.ToString());
410 try {
411 while (reader.Read()) {
412 int literalid = AsInt(reader[0]);
414 string val = AsString(reader[1]);
415 string lang = AsString(reader[2]);
416 string dt = AsString(reader[3]);
417 Literal lit = new Literal(val, lang, dt);
419 literalCache[lit] = literalid;
420 literalCacheSize += val.Length;
422 } finally {
423 reader.Close();
427 StringBuilder entityInsertions = new StringBuilder();
428 StringBuilder literalInsertions = new StringBuilder();
430 cmd = new StringBuilder();
431 if (insertCombined)
432 cmd.Append(INSERT_INTO_STATEMENTS_VALUES);
434 for (int i = 0; i < statements.Count; i++) {
435 Statement statement = (Statement)statements[i];
437 int subj = GetResourceIdBuffer(statement.Subject, true, true, literalInsertions, entityInsertions, insertCombined);
438 int pred = GetResourceIdBuffer(statement.Predicate, true, true, literalInsertions, entityInsertions, insertCombined);
439 int objtype = ObjectType(statement.Object);
440 int obj = GetResourceIdBuffer(statement.Object, true, true, literalInsertions, entityInsertions, insertCombined);
441 int meta = GetResourceIdBuffer(statement.Meta, true, true, literalInsertions, entityInsertions, insertCombined);
443 if (!insertCombined)
444 cmd.Append(INSERT_INTO_STATEMENTS_VALUES);
446 cmd.Append("(");
447 cmd.Append(subj);
448 cmd.Append(", ");
449 cmd.Append(pred);
450 cmd.Append(", ");
451 cmd.Append(objtype);
452 cmd.Append(", ");
453 cmd.Append(obj);
454 cmd.Append(", ");
455 cmd.Append(meta);
456 if (i == statements.Count-1 || !insertCombined)
457 cmd.Append(");");
458 else
459 cmd.Append("),");
462 if (literalInsertions.Length > 0) {
463 if (insertCombined) {
464 literalInsertions.Insert(0, INSERT_INTO_LITERALS_VALUES);
465 literalInsertions.Append(';');
467 RunCommand(literalInsertions.ToString());
470 if (entityInsertions.Length > 0) {
471 if (insertCombined) {
472 entityInsertions.Insert(0, INSERT_INTO_ENTITIES_VALUES);
473 entityInsertions.Append(';');
475 RunCommand(entityInsertions.ToString());
478 RunCommand(cmd.ToString());
480 // Clear the array and reuse it.
481 statements.Clear();
482 addStatementBuffer = statements;
485 public override void Remove(Statement template) {
486 Init();
487 RunAddBuffer();
489 System.Text.StringBuilder cmd = new System.Text.StringBuilder("DELETE FROM ");
490 cmd.Append(table);
491 cmd.Append("_statements ");
492 if (!WhereClause(template, cmd)) return;
493 cmd.Append(";");
495 RunCommand(cmd.ToString());
498 public override Entity[] GetAllEntities() {
499 return GetAllEntities(fourcols);
502 public override Entity[] GetAllPredicates() {
503 return GetAllEntities(predcol);
506 private Entity[] GetAllEntities(string[] cols) {
507 RunAddBuffer();
508 ArrayList ret = new ArrayList();
509 Hashtable seen = new Hashtable();
510 foreach (string col in cols) {
511 IDataReader reader = RunReader("SELECT " + col + ", value FROM " + table + "_statements LEFT JOIN " + table + "_entities ON " + col + "=id " + (col == "object" ? " WHERE objecttype=0" : "") + " GROUP BY " + col + ";");
512 try {
513 while (reader.Read()) {
514 int id = AsInt(reader[0]);
515 if (id <= 1) continue; // don't return DefaultMeta.
517 if (seen.ContainsKey(id)) continue;
518 seen[id] = seen;
520 string uri = AsString(reader[1]);
521 ret.Add(MakeEntity(id, uri, null));
523 } finally {
524 reader.Close();
527 return (Entity[])ret.ToArray(typeof(Entity));;
530 private bool WhereItem(string col, Resource r, System.Text.StringBuilder cmd, bool and) {
531 if (and) cmd.Append(" and ");
533 if (col.EndsWith("object")) {
534 string colprefix = "";
535 if (col != "object")
536 colprefix = col.Substring(0, col.Length-"object".Length);
538 if (r is MultiRes) {
539 // Assumption that ID space of literals and entities are the same.
540 cmd.Append("( ");
541 cmd.Append(col);
542 cmd.Append(" IN (");
543 if (!AppendMultiRes((MultiRes)r, cmd)) return false;
544 cmd.Append(" ))");
545 } else if (r is Literal) {
546 Literal lit = (Literal)r;
547 int id = GetResourceId(lit, false);
548 if (id == 0) return false;
549 cmd.Append(" (");
550 cmd.Append(colprefix);
551 cmd.Append("objecttype = 1 and ");
552 cmd.Append(col);
553 cmd.Append(" = ");
554 cmd.Append(id);
555 cmd.Append(")");
556 } else {
557 int id = GetResourceId(r, false);
558 if (id == 0) return false;
559 cmd.Append(" (");
560 cmd.Append(colprefix);
561 cmd.Append("objecttype = 0 and ");
562 cmd.Append(col);
563 cmd.Append(" = ");
564 cmd.Append(id);
565 cmd.Append(")");
568 } else if (r is MultiRes) {
569 cmd.Append("( ");
570 cmd.Append(col);
571 cmd.Append(" IN (");
572 if (!AppendMultiRes((MultiRes)r, cmd)) return false;
573 cmd.Append(" ))");
575 } else {
576 int id = GetResourceId(r, false);
577 if (id == 0) return false;
579 cmd.Append("( ");
580 cmd.Append(col);
581 cmd.Append(" = ");
582 cmd.Append(id);
583 cmd.Append(" )");
586 return true;
589 private bool AppendMultiRes(MultiRes r, StringBuilder cmd) {
590 for (int i = 0; i < r.items.Count; i++) {
591 if (i != 0) cmd.Append(",");
592 int id = GetResourceId((Resource)r.items[i], false);
593 if (id == 0) return false;
594 cmd.Append(id);
596 return true;
599 private bool WhereClause(Statement template, System.Text.StringBuilder cmd) {
600 return WhereClause(template.Subject, template.Predicate, template.Object, template.Meta, cmd);
603 private bool WhereClause(Resource templateSubject, Resource templatePredicate, Resource templateObject, Resource templateMeta, System.Text.StringBuilder cmd) {
604 if (templateSubject == null && templatePredicate == null && templateObject == null && templateMeta == null)
605 return true;
607 cmd.Append(" WHERE ");
609 if (templateSubject != null)
610 if (!WhereItem("subject", templateSubject, cmd, false)) return false;
612 if (templatePredicate != null)
613 if (!WhereItem("predicate", templatePredicate, cmd, templateSubject != null)) return false;
615 if (templateObject != null)
616 if (!WhereItem("object", templateObject, cmd, templateSubject != null || templatePredicate != null)) return false;
618 if (templateMeta != null)
619 if (!WhereItem("meta", templateMeta, cmd, templateSubject != null || templatePredicate != null || templateObject != null)) return false;
621 return true;
624 private int AsInt(object r) {
625 if (r is int) return (int)r;
626 if (r is uint) return (int)(uint)r;
627 if (r is string) return int.Parse((string)r);
628 throw new ArgumentException(r.ToString());
631 private string AsString(object r) {
632 if (r == null)
633 return null;
634 else if (r is System.DBNull)
635 return null;
636 else if (r is string)
637 return (string)r;
638 else if (r is byte[])
639 return System.Text.Encoding.UTF8.GetString((byte[])r);
640 else
641 throw new FormatException("SQL store returned a literal value as " + r.GetType());
644 private struct SPOLM {
645 public int S, P, OT, OID, M;
648 private static void AppendComma(StringBuilder builder, string text, bool comma) {
649 if (comma)
650 builder.Append(", ");
651 builder.Append(text);
654 private static void SelectFilter(SelectPartialFilter partialFilter, StringBuilder cmd) {
655 bool f = true;
657 if (partialFilter.Subject) { cmd.Append("q.subject, suri.value"); f = false; }
658 if (partialFilter.Predicate) { AppendComma(cmd, "q.predicate, puri.value", !f); f = false; }
659 if (partialFilter.Object) { AppendComma(cmd, "q.objecttype, q.object, ouri.value", !f); f = false; }
660 if (partialFilter.Meta) { AppendComma(cmd, "q.meta, muri.value", !f); f = false; }
663 public override void Select(Statement[] templates, SelectPartialFilter partialFilter, StatementSink result) {
664 if (templates == null) throw new ArgumentNullException();
665 if (result == null) throw new ArgumentNullException();
666 if (templates.Length == 0) return;
668 bool first = true;
669 Resource sv = null, pv = null, ov = null, mv = null;
670 bool sm = false, pm = false, om = false, mm = false;
671 ArrayList sl = new ArrayList(), pl = new ArrayList(), ol = new ArrayList(), ml = new ArrayList();
672 foreach (Statement template in templates) {
673 if (first) {
674 first = false;
675 sv = template.Subject;
676 pv = template.Predicate;
677 ov = template.Object;
678 mv = template.Meta;
679 } else {
680 if (sv != template.Subject) sm = true;
681 if (pv != template.Predicate) pm = true;
682 if (ov != template.Object) om = true;
683 if (mv != template.Meta) mm = true;
685 if (template.Subject != null) sl.Add(template.Subject);
686 if (template.Predicate != null) pl.Add(template.Predicate);
687 if (template.Object != null) ol.Add(template.Object);
688 if (template.Meta != null) ml.Add(template.Meta);
691 if (!sm && !pm && !om && !mm) {
692 Select(templates[0], partialFilter, result);
693 return;
694 } else if (sm && !pm && !om && !mm) {
695 Select(new MultiRes(sl), pv, ov, mv, partialFilter, result);
696 } else if (!sm && pm && !om && !mm) {
697 Select(sv, new MultiRes(pl), ov, mv, partialFilter, result);
698 } else if (!sm && !pm && om && !mm) {
699 Select(sv, pv, new MultiRes(ol), mv, partialFilter, result);
700 } else if (!sm && !pm && !om && mm) {
701 Select(sv, pv, ov, new MultiRes(ml), partialFilter, result);
702 } else {
703 foreach (Statement template in templates)
704 Select(template, partialFilter, result);
708 private class MultiRes : Resource {
709 public MultiRes(ArrayList a) { items = a; }
710 public ArrayList items;
711 public override string Uri { get { return null; } }
714 public override void Select(Statement template, SelectPartialFilter partialFilter, StatementSink result) {
715 if (result == null) throw new ArgumentNullException();
716 Select(template.Subject, template.Predicate, template.Object, template.Meta, partialFilter, result);
719 private void Select(Resource templateSubject, Resource templatePredicate, Resource templateObject, Resource templateMeta, SelectPartialFilter partialFilter, StatementSink result) {
720 if (result == null) throw new ArgumentNullException();
722 Init();
723 RunAddBuffer();
725 bool limitOne = partialFilter.SelectFirst;
727 // Don't select on columns that we already know from the template
728 partialFilter = new SelectPartialFilter(
729 (partialFilter.Subject && templateSubject == null) || templateSubject is MultiRes,
730 (partialFilter.Predicate && templatePredicate == null) || templatePredicate is MultiRes,
731 (partialFilter.Object && templateObject == null) || templateObject is MultiRes,
732 (partialFilter.Meta && templateMeta == null) || templateMeta is MultiRes
735 if (partialFilter.SelectNone)
736 partialFilter = SelectPartialFilter.All;
738 // SQLite has a problem with LEFT JOIN: When a condition is made on the
739 // first table in the ON clause (q.objecttype=0/1), when it fails,
740 // it excludes the row from the first table, whereas it should only
741 // exclude the results of the join, but include the row. Thus, the space
742 // of IDs between literals and entities must be shared!
744 System.Text.StringBuilder cmd = new System.Text.StringBuilder("SELECT ");
745 SelectFilter(partialFilter, cmd);
746 if (partialFilter.Object)
747 cmd.Append(", lit.value, lit.language, lit.datatype");
748 cmd.Append(" FROM ");
749 cmd.Append(table);
750 cmd.Append("_statements AS q");
751 if (SupportsUseIndex) {
752 // When selecting on mutliple resources at once, assume that it's faster
753 // to select for each resource, rather than based on another index (say,
754 // the predicate that the templates share).
755 if (templateSubject is MultiRes) cmd.Append(" USE INDEX(subject_index)");
756 if (templatePredicate is MultiRes) cmd.Append(" USE INDEX(predicate_index)");
757 if (templateObject is MultiRes) cmd.Append(" USE INDEX(object_index)");
758 if (templateMeta is MultiRes) cmd.Append(" USE INDEX(meta_index)");
761 if (partialFilter.Object) {
762 cmd.Append(" LEFT JOIN ");
763 cmd.Append(table);
764 //cmd.Append("_literals AS lit ON q.objecttype=1 AND q.object=lit.id LEFT JOIN ");
765 cmd.Append("_literals AS lit ON q.object=lit.id");
767 if (partialFilter.Subject) {
768 cmd.Append(" LEFT JOIN ");
769 cmd.Append(table);
770 cmd.Append("_entities AS suri ON q.subject = suri.id");
772 if (partialFilter.Predicate) {
773 cmd.Append(" LEFT JOIN ");
774 cmd.Append(table);
775 cmd.Append("_entities AS puri ON q.predicate = puri.id");
777 if (partialFilter.Object) {
778 cmd.Append(" LEFT JOIN ");
779 cmd.Append(table);
780 //cmd.Append("_entities AS ouri ON q.objecttype=0 AND q.object = ouri.id LEFT JOIN ");
781 cmd.Append("_entities AS ouri ON q.object = ouri.id");
783 if (partialFilter.Meta) {
784 cmd.Append(" LEFT JOIN ");
785 cmd.Append(table);
786 cmd.Append("_entities AS muri ON q.meta = muri.id");
788 cmd.Append(' ');
789 if (!WhereClause(templateSubject, templatePredicate, templateObject, templateMeta, cmd)) return;
790 cmd.Append(";");
792 if (limitOne)
793 cmd.Append(" LIMIT 1");
795 if (Debug || false) {
796 string cmd2 = cmd.ToString();
797 //if (cmd2.Length > 80) cmd2 = cmd2.Substring(0, 80);
798 Console.Error.WriteLine(cmd2);
801 IDataReader reader = RunReader(cmd.ToString());
803 Hashtable entMap = new Hashtable();
805 try {
806 while (reader.Read()) {
807 int col = 0;
808 int sid = -1, pid = -1, ot = -1, oid = -1, mid = -1;
809 string suri = null, puri = null, ouri = null, muri = null;
811 if (partialFilter.Subject) { sid = AsInt(reader[col++]); suri = AsString(reader[col++]); }
812 if (partialFilter.Predicate) { pid = AsInt(reader[col++]); puri = AsString(reader[col++]); }
813 if (partialFilter.Object) { ot = AsInt(reader[col++]); oid = AsInt(reader[col++]); ouri = AsString(reader[col++]); }
814 if (partialFilter.Meta) { mid = AsInt(reader[col++]); muri = AsString(reader[col++]); }
816 string lv = null, ll = null, ld = null;
817 if (ot == 1 && partialFilter.Object) {
818 lv = AsString(reader[col++]);
819 ll = AsString(reader[col++]);
820 ld = AsString(reader[col++]);
823 bool ret = result.Add(new Statement(
824 !partialFilter.Subject ? (Entity)templateSubject : MakeEntity(sid, suri, entMap),
825 !partialFilter.Predicate ? (Entity)templatePredicate : MakeEntity(pid, puri, entMap),
826 !partialFilter.Object ? templateObject :
827 (ot == 0 ? (Resource)MakeEntity(oid, ouri, entMap)
828 : (Resource)new Literal(lv, ll, ld)),
829 (!partialFilter.Meta || mid == 0) ? (Entity)templateMeta : MakeEntity(mid, muri, entMap)
831 if (!ret) break;
834 } finally {
835 reader.Close();
839 private string Escape(string str, bool quotes) {
840 if (str == null) return "NULL";
841 StringBuilder b = new StringBuilder();
842 EscapedAppend(b, str, quotes);
843 return b.ToString();
846 protected virtual void EscapedAppend(StringBuilder b, string str) {
847 EscapedAppend(b, str, true);
850 protected virtual void EscapedAppend(StringBuilder b, string str, bool quotes) {
851 if (quotes) b.Append('"');
852 for (int i = 0; i < str.Length; i++) {
853 char c = str[i];
854 switch (c) {
855 case '\n': b.Append("\\n"); break;
856 case '\\':
857 case '\"':
858 case '%':
859 case '*':
860 b.Append('\\');
861 b.Append(c);
862 break;
863 default:
864 b.Append(c);
865 break;
868 if (quotes) b.Append('"');
871 internal static void Escape(StringBuilder b) {
872 b.Replace("\\", "\\\\");
873 b.Replace("\"", "\\\"");
874 b.Replace("\n", "\\n");
875 b.Replace("%", "\\%");
876 b.Replace("*", "\\*");
879 public override void Import(StatementSource source) {
880 if (source == null) throw new ArgumentNullException();
881 if (lockedIdCache != null) throw new InvalidOperationException("Store is already importing.");
883 Init();
884 RunAddBuffer();
886 cachedNextId = -1;
887 lockedIdCache = new UriMap();
888 addStatementBuffer = new ArrayList();
890 IDataReader reader = RunReader("SELECT id, value from " + table + "_entities;");
891 try {
892 while (reader.Read())
893 lockedIdCache[AsString(reader[1])] = AsInt(reader[0]);
894 } finally {
895 reader.Close();
898 BeginTransaction();
900 try {
901 base.Import(source);
902 } finally {
903 RunAddBuffer();
904 EndTransaction();
906 lockedIdCache = null;
907 addStatementBuffer = null;
909 // Remove duplicate literals
911 while (true) {
912 bool foundDupLiteral = false;
913 StringBuilder litdupremove = new StringBuilder("DELETE FROM " + table + "_literals WHERE id IN (");
914 StringBuilder litdupreplace = new StringBuilder();
915 Console.Error.WriteLine("X");
916 reader = RunReader("select a.id, b.id from " + table + "_literals as a inner join " + table + "_literals as b on a.value=b.value and a.language<=>b.language and a.datatype <=> b.datatype and a.id<b.id LIMIT 10000");
917 while (reader.Read()) {
918 int lit1 = AsInt(reader[0]);
919 int lit2 = AsInt(reader[1]);
921 if (foundDupLiteral) litdupremove.Append(",");
922 litdupremove.Append(lit2);
924 litdupreplace.Append("UPDATE " + table + "_statements SET object = " + lit1 + " WHERE objecttype=1 AND object=" + lit2 + "; ");
926 foundDupLiteral = true;
928 reader.Close();
929 if (!foundDupLiteral) break;
930 litdupremove.Append(");");
931 RunCommand(litdupremove.ToString());
932 RunCommand(litdupreplace.ToString());
936 literalCache.Clear();
937 literalCacheSize = 0;
941 public override void Replace(Entity a, Entity b) {
942 Init();
943 RunAddBuffer();
944 int id = GetResourceId(b, true);
946 foreach (string col in fourcols) {
947 StringBuilder cmd = new StringBuilder();
948 cmd.Append("UPDATE ");
949 cmd.Append(table);
950 cmd.Append("_statements SET ");
951 cmd.Append(col);
952 cmd.Append("=");
953 cmd.Append(id);
954 if (!WhereItem(col, a, cmd, false)) return;
955 cmd.Append(";");
956 RunCommand(cmd.ToString());
960 public override void Replace(Statement find, Statement replacement) {
961 if (find.AnyNull) throw new ArgumentNullException("find");
962 if (replacement.AnyNull) throw new ArgumentNullException("replacement");
963 if (find == replacement) return;
965 Init();
966 RunAddBuffer();
968 int subj = GetResourceId(replacement.Subject, true);
969 int pred = GetResourceId(replacement.Predicate, true);
970 int objtype = ObjectType(replacement.Object);
971 int obj = GetResourceId(replacement.Object, true);
972 int meta = GetResourceId(replacement.Meta, true);
974 StringBuilder cmd = cmdBuffer; cmd.Length = 0;
976 cmd.Append("UPDATE ");
977 cmd.Append(table);
978 cmd.Append("_statements SET subject=");
979 cmd.Append(subj);
980 cmd.Append(", predicate=");
981 cmd.Append(pred);
982 cmd.Append(", objecttype=");
983 cmd.Append(objtype);
984 cmd.Append(", object=");
985 cmd.Append(obj);
986 cmd.Append(", meta=");
987 cmd.Append(meta);
988 cmd.Append(" ");
990 if (!WhereClause(find, cmd))
991 return;
993 RunCommand(cmd.ToString());
996 public override Entity[] FindEntities(Statement[] filters) {
997 if (filters.Length == 0) return new Entity[0];
999 if (!SupportsFastJoin)
1000 return base.FindEntities(filters);
1002 Init();
1004 string f1pos = is_spom(filters[0]);
1005 if (f1pos == null) throw new ArgumentException("Null must appear in every statement.");
1007 StringBuilder cmd = new StringBuilder();
1008 cmd.Append("SELECT s.");
1009 cmd.Append(f1pos);
1010 cmd.Append(", uri.value FROM ");
1011 cmd.Append(table);
1012 cmd.Append("_statements AS s LEFT JOIN ");
1013 cmd.Append(table);
1014 cmd.Append("_entities AS uri ON uri.id=s.");
1015 cmd.Append(f1pos);
1017 if (isliteralmatch(filters[0].Object))
1018 appendLiteralMatch(cmd, "l0", "s", ((Literal)filters[0].Object).Value);
1020 for (int i = 1; i < filters.Length; i++) {
1021 cmd.Append(" INNER JOIN ");
1022 cmd.Append(table);
1023 cmd.Append("_statements AS f");
1024 cmd.Append(i);
1025 cmd.Append(" ON s.");
1026 cmd.Append(f1pos);
1027 cmd.Append("=f");
1028 cmd.Append(i);
1029 cmd.Append(".");
1030 string fipos = is_spom(filters[i]);
1031 if (fipos == null) throw new ArgumentException("Null must appear in every statement.");
1032 cmd.Append(fipos);
1034 if (filters[i].Subject != null && filters[i].Subject != null)
1035 if (!WhereItem("f" + i + ".subject", filters[i].Subject, cmd, true)) return new Entity[0];
1036 if (filters[i].Predicate != null && filters[i].Predicate != null)
1037 if (!WhereItem("f" + i + ".predicate", filters[i].Predicate, cmd, true)) return new Entity[0];
1038 if (filters[i].Object != null && filters[i].Object != null && !isliteralmatch(filters[i].Object))
1039 if (!WhereItem("f" + i + ".object", filters[i].Object, cmd, true)) return new Entity[0];
1040 if (filters[i].Meta != null && filters[i].Meta != null)
1041 if (!WhereItem("f" + i + ".meta", filters[i].Meta, cmd, true)) return new Entity[0];
1043 if (filters[i].Object == null)
1044 cmd.Append("AND f" + i + ".objecttype=0 ");
1046 if (isliteralmatch(filters[i].Object)) {
1047 cmd.Append("AND f" + i + ".objecttype=1 ");
1048 appendLiteralMatch(cmd, "l" + i, "f" + i, ((Literal)filters[i].Object).Value);
1052 cmd.Append(" WHERE 1 ");
1054 if (filters[0].Subject != null && filters[0].Subject != null)
1055 if (!WhereItem("s.subject", filters[0].Subject, cmd, true)) return new Entity[0];
1056 if (filters[0].Predicate != null && filters[0].Predicate != null)
1057 if (!WhereItem("s.predicate", filters[0].Predicate, cmd, true)) return new Entity[0];
1058 if (filters[0].Object != null && filters[0].Object != null && !isliteralmatch(filters[0].Object))
1059 if (!WhereItem("s.object", filters[0].Object, cmd, true)) return new Entity[0];
1060 if (isliteralmatch(filters[0].Object))
1061 cmd.Append("AND s.objecttype=1 ");
1062 if (filters[0].Meta != null && filters[0].Meta != null)
1063 if (!WhereItem("s.meta", filters[0].Meta, cmd, true)) return new Entity[0];
1065 if (filters[0].Object == null)
1066 cmd.Append(" AND s.objecttype=0");
1068 cmd.Append(";");
1070 //Console.Error.WriteLine(cmd.ToString());
1072 IDataReader reader = RunReader(cmd.ToString());
1073 ArrayList entities = new ArrayList();
1074 Hashtable seen = new Hashtable();
1075 try {
1076 while (reader.Read()) {
1077 int id = AsInt(reader[0]);
1078 string uri = AsString(reader[1]);
1079 if (seen.ContainsKey(id)) continue;
1080 seen[id] = seen;
1081 entities.Add(MakeEntity(id, uri, null));
1083 } finally {
1084 reader.Close();
1087 return (Entity[])entities.ToArray(typeof(Entity));
1090 private string is_spom(Statement s) {
1091 if (s.Subject == null) return "subject";
1092 if (s.Predicate == null) return "predicate";
1093 if (s.Object == null) return "object";
1094 if (s.Meta == null) return "meta";
1095 return null;
1098 private bool isliteralmatch(Resource r) {
1099 if (r == null || !(r is Literal)) return false;
1100 return ((Literal)r).DataType == "SEMWEB::LITERAL::CONTAINS";
1103 private void appendLiteralMatch(StringBuilder cmd, string joinalias, string lefttable, string pattern) {
1104 cmd.Append(" INNER JOIN ");
1105 cmd.Append(table);
1106 cmd.Append("_literals AS ");
1107 cmd.Append(joinalias);
1108 cmd.Append(" ON ");
1109 cmd.Append(joinalias);
1110 cmd.Append(".id=");
1111 cmd.Append(lefttable);
1112 cmd.Append(".object");
1113 cmd.Append(" AND ");
1114 cmd.Append(joinalias);
1115 cmd.Append(".value LIKE \"%");
1116 cmd.Append(Escape(pattern, false));
1117 cmd.Append("%\" ");
1120 protected abstract void RunCommand(string sql);
1121 protected abstract object RunScalar(string sql);
1122 protected abstract IDataReader RunReader(string sql);
1124 private int RunScalarInt(string sql, int def) {
1125 object ret = RunScalar(sql);
1126 if (ret == null) return def;
1127 if (ret is int) return (int)ret;
1128 try {
1129 return int.Parse(ret.ToString());
1130 } catch (FormatException e) {
1131 return def;
1136 private string RunScalarString(string sql) {
1137 object ret = RunScalar(sql);
1138 if (ret == null) return null;
1139 if (ret is string) return (string)ret;
1140 if (ret is byte[]) return System.Text.Encoding.UTF8.GetString((byte[])ret);
1141 throw new FormatException("SQL store returned a literal value as " + ret);
1145 protected virtual void CreateTable() {
1146 foreach (string cmd in GetCreateTableCommands(table)) {
1147 try {
1148 RunCommand(cmd);
1149 } catch (Exception e) {
1154 protected virtual void CreateIndexes() {
1155 foreach (string cmd in GetCreateIndexCommands(table)) {
1156 try {
1157 RunCommand(cmd);
1158 } catch (Exception e) {
1163 protected virtual void BeginTransaction() { }
1164 protected virtual void EndTransaction() { }
1166 internal static string[] GetCreateTableCommands(string table) {
1167 return new string[] {
1168 "CREATE TABLE " + table + "_statements" +
1169 "(subject int UNSIGNED NOT NULL, predicate int UNSIGNED NOT NULL, objecttype int NOT NULL, object int UNSIGNED NOT NULL, meta int UNSIGNED NOT NULL);",
1171 "CREATE TABLE " + table + "_literals" +
1172 "(id INT NOT NULL, value BLOB NOT NULL, language TEXT, datatype TEXT, PRIMARY KEY(id));",
1174 "CREATE TABLE " + table + "_entities" +
1175 "(id INT NOT NULL, value BLOB NOT NULL, PRIMARY KEY(id));"
1179 internal static string[] GetCreateIndexCommands(string table) {
1180 return new string[] {
1181 "CREATE INDEX subject_index ON " + table + "_statements(subject);",
1182 "CREATE INDEX predicate_index ON " + table + "_statements(predicate);",
1183 "CREATE INDEX object_index ON " + table + "_statements(objecttype, object);",
1184 "CREATE INDEX meta_index ON " + table + "_statements(meta);",
1186 "CREATE INDEX literal_index ON " + table + "_literals(value(30));",
1187 "CREATE UNIQUE INDEX entity_index ON " + table + "_entities(value(255));"
1194 namespace SemWeb.IO {
1195 using SemWeb;
1196 using SemWeb.Stores;
1198 // NEEDS TO BE UPDATED
1199 /*class SQLWriter : RdfWriter {
1200 TextWriter writer;
1201 string table;
1203 int resourcecounter = 0;
1204 Hashtable resources = new Hashtable();
1206 NamespaceManager m = new NamespaceManager();
1208 string[,] fastmap = new string[3,2];
1210 public SQLWriter(string spec) : this(GetWriter("-"), spec) { }
1212 public SQLWriter(string file, string tablename) : this(GetWriter(file), tablename) { }
1214 public SQLWriter(TextWriter writer, string tablename) {
1215 this.writer = writer;
1216 this.table = tablename;
1218 foreach (string cmd in SQLStore.GetCreateTableCommands(table))
1219 writer.WriteLine(cmd);
1222 public override NamespaceManager Namespaces { get { return m; } }
1224 public override void WriteStatement(string subj, string pred, string obj) {
1225 writer.WriteLine("INSERT INTO {0}_statements VALUES ({1}, {2}, 0, {3}, 0);", table, ID(subj, 0), ID(pred, 1), ID(obj, 2));
1228 public override void WriteStatement(string subj, string pred, Literal literal) {
1229 writer.WriteLine("INSERT INTO {0}_statements VALUES ({1}, {2}, 1, {3}, 0);", table, ID(subj, 0), ID(pred, 1), ID(literal));
1232 public override string CreateAnonymousEntity() {
1233 int id = ++resourcecounter;
1234 string uri = "_anon:" + id;
1235 return uri;
1238 public override void Close() {
1239 base.Close();
1240 foreach (string cmd in SQLStore.GetCreateIndexCommands(table))
1241 writer.WriteLine(cmd);
1242 writer.Close();
1245 private string ID(Literal literal) {
1246 string id = (string)resources[literal];
1247 if (id == null) {
1248 id = (++resourcecounter).ToString();
1249 resources[literal] = id;
1250 writer.WriteLine("INSERT INTO {0}_literals VALUES ({1}, {2}, {3}, {4});", table, id, Escape(literal.Value), Escape(literal.Language), Escape(literal.DataType));
1252 return id;
1255 private string Escape(string str) {
1256 if (str == null) return "NULL";
1257 return "\"" + EscapeUnquoted(str) + "\"";
1260 StringBuilder EscapeUnquotedBuffer = new StringBuilder();
1261 private string EscapeUnquoted(string str) {
1262 StringBuilder b = EscapeUnquotedBuffer;
1263 b.Length = 0;
1264 b.Append(str);
1265 SQLStore.Escape(b);
1266 return b.ToString();
1269 private string ID(string uri, int x) {
1270 if (uri.StartsWith("_anon:")) return uri.Substring(6);
1272 // Make this faster when a subject, predicate, or object is repeated.
1273 if (fastmap[0,0] != null && uri == fastmap[0, 0]) return fastmap[0, 1];
1274 if (fastmap[1,0] != null && uri == fastmap[1, 0]) return fastmap[1, 1];
1275 if (fastmap[2,0] != null && uri == fastmap[2, 0]) return fastmap[2, 1];
1277 string id;
1279 if (resources.ContainsKey(uri)) {
1280 id = (string)resources[uri];
1281 } else {
1282 id = (++resourcecounter).ToString();
1283 resources[uri] = id;
1285 string literalid = ID(new Literal(uri));
1286 writer.WriteLine("INSERT INTO {0}_statements VALUES ({1}, 0, 1, {2}, 0);", table, id, literalid);
1289 fastmap[x, 0] = uri;
1290 fastmap[x, 1] = id;
1292 return id;