Fixed two bugs in the RTree insertion implementation.
[aesalon.git] / include / storage / RTree.h
bloba1e19b3b9de26408148558640833d41b85120bc9
1 /** Aesalon, a tool to visualize program behaviour in real time.
2 Copyright (C) 2009-2011, Aesalon development team.
4 Aesalon is distributed under the terms of the GNU GPLv3. See
5 the included file LICENSE for more information.
7 @file include/storage/RTree.h
8 */
10 #ifndef AesalonStorage_RTree_H
11 #define AesalonStorage_RTree_H
13 #include <vector>
15 #include "util/MessageSystem.h"
17 namespace Storage {
19 /** An n-dimensional R-tree.
20 Notes:
21 - Key must support a basic assignment specifier and also the element zero, plus all the usual arithmetic operators.
22 Using a non-elementary type is not a good idea.
23 - Value must be either a basic type or a pointer.
24 - Minimum <= Maximum/2. (Logic in code assumes this.)
25 - Maximum <= some reasonable number. Many linear searches through lists of this size take place.
26 A reasonable size is eight.
27 - Dimensions should be less than about six. Any more and you have other problems . . .
29 template<typename Key, typename Value, int Dimensions, int Maximum = 8, int Minimum = Maximum/2>
30 class RTree {
31 public:
32 class Range {
33 private:
34 bool m_valid;
35 Key m_start, m_end;
36 public:
37 Range() : m_valid(false) {}
38 Range(Key start, Key end) : m_valid(true), m_start(start), m_end(end) {}
40 bool valid() const { return m_valid; }
41 Key start() const { return m_start; }
42 Key end() const { return m_end; }
43 Key size() const { return m_end - m_start; }
45 bool overlaps(const Range &other) const {
46 Message(Debug, "Overlap test:");
47 Message(Debug, "\tm_start: " << m_start << "\t m_end: " << m_end);
48 Message(Debug, "\tother.m_start: " << other.m_start << "\tother.m_end: " << other.m_end);
49 if(m_start > other.m_end || other.m_start > m_end) return false;
50 return true;
53 Key enlargementToCover(const Range &other) const {
54 Key sum = 0;
55 if(m_start > other.m_start) sum += (m_start - other.m_start);
56 if(m_end < other.m_end) sum += (other.m_end - m_end);
57 return sum;
61 class Bound {
62 private:
63 Range m_ranges[Dimensions];
64 public:
65 Bound() {}
66 Bound(Range ranges[Dimensions]) {
67 for(int i = 0; i < Dimensions; i ++) m_ranges[i] = ranges[i];
70 void setRange(const Range &range, int dimension) { m_ranges[dimension] = range; }
71 Range &range(int dimension) { return m_ranges[dimension]; }
72 const Range &range(int dimension) const { return m_ranges[dimension]; }
74 bool overlaps(const Bound &other) {
75 for(int i = 0; i < Dimensions; i ++) {
76 if(!m_ranges[i].overlaps(other.m_ranges[i])) return false;
78 return true;
81 Key enlargementToCover(const Bound &other) const {
82 Key volume = 0;
84 for(int j = 0; j < Dimensions; j ++) {
85 Key difference = other.m_ranges[j].enlargementToCover(m_ranges[j]);
86 if(difference == 0) continue;
88 for(int k = 0; k < Dimensions; k ++) {
89 if(k == j) continue;
90 difference *= other.m_ranges[j].size();
92 volume += difference;
95 return volume;
98 void enclose(const Bound &other) {
99 for(int i = 0; i < Dimensions; i ++) {
100 if(m_ranges[i].valid() && other.m_ranges[i].valid()) {
101 m_ranges[i] = Range(
102 std::min(m_ranges[i].start(), other.m_ranges[i].start()),
103 std::max(m_ranges[i].end(), other.m_ranges[i].end()));
105 else if(other.m_ranges[i].valid()) {
106 m_ranges[i] = other.m_ranges[i];
108 // If other.m_ranges[i] is not valid, nothing needs to be done.
113 class SearchProcessor {
114 public:
115 virtual bool process(const Bound &bound, Value value) = 0;
117 protected:
118 class Node;
119 struct Branch {
120 Bound bound;
121 union {
122 Node *node;
123 Value value;
126 class Node {
127 private:
128 bool m_isLeaf;
129 Branch m_branches[Maximum];
130 int m_branchCount;
131 Node *m_parent;
132 public:
133 Node(bool isLeaf) : m_isLeaf(isLeaf), m_branchCount(0), m_parent(NULL) {}
134 bool isLeaf() const { return m_isLeaf; }
135 void setLeaf(bool leaf) { m_isLeaf = leaf; }
137 Branch &branch(int which) { return m_branches[which]; }
138 int branchCount() const { return m_branchCount; }
139 void setBranchCount(int count) { m_branchCount = count; }
140 Node *parent() const { return m_parent; }
141 void setParent(Node *node) { m_parent = node; }
143 Bound bound() const {
144 Bound result;
145 for(int i = 0; i < Dimensions; i ++) {
146 Key minimum, maximum;
147 for(int j = 0; j < branchCount(); j ++) {
148 if(j == 0) {
149 minimum = m_branches[j].bound.range(i).start();
150 maximum = m_branches[j].bound.range(i).end();
151 continue;
153 minimum = std::min(minimum, m_branches[j].bound.range(i).start());
154 maximum = std::max(maximum, m_branches[j].bound.range(i).end());
156 result.range(i) = Range(minimum, maximum);
158 return result;
161 private:
162 Node *m_root;
163 public:
164 RTree();
165 ~RTree();
167 void search(const Bound &bound, SearchProcessor *processor);
168 void insert(const Bound &bound, Value value);
169 private:
170 void search(const Bound &bound, Node *node, SearchProcessor *processor);
172 Node *chooseLeaf(const Bound &bound);
174 Node *adjustTree(Node *n, Node *nn);
176 Node *splitNode(Node *node, Branch &branch);
179 template<typename Key, typename Value, int Dimensions, int Maximum, int Minimum>
180 RTree<Key, Value, Dimensions, Maximum, Minimum>::RTree() {
181 m_root = new Node(true);
184 template<typename Key, typename Value, int Dimensions, int Maximum, int Minimum>
185 RTree<Key, Value, Dimensions, Maximum, Minimum>::~RTree() {
189 template<typename Key, typename Value, int Dimensions, int Maximum, int Minimum>
190 void RTree<Key, Value, Dimensions, Maximum, Minimum>::search(
191 const RTree<Key, Value, Dimensions, Maximum, Minimum>::Bound &bound,
192 RTree<Key, Value, Dimensions, Maximum, Minimum>::SearchProcessor *processor) {
194 Message(Debug, "Beginning search.");
196 search(bound, m_root, processor);
199 template<typename Key, typename Value, int Dimensions, int Maximum, int Minimum>
200 void RTree<Key, Value, Dimensions, Maximum, Minimum>::insert(
201 const RTree<Key, Value, Dimensions, Maximum, Minimum>::Bound &bound,
202 Value value) {
204 Message(Debug, "Beginning insertion of value " << value);
206 Message(Debug, "Calling chooseLeaf() . . .");
207 Node *leaf = chooseLeaf(bound), *ll = NULL;
208 int count = leaf->branchCount();
209 if(count < Maximum) {
210 Message(Debug, "Leaf node count is less than the maximum, inserting as usual.");
211 leaf->branch(count).bound = bound;
212 leaf->branch(count).value = value;
213 leaf->setBranchCount(count+1);
214 Message(Debug, "Leaf node new branch count: " << leaf->branchCount());
216 else {
217 Message(Debug, "Leaf node branch count is already at maximum, splitting node . . .");
218 Branch b;
219 b.bound = bound;
220 b.value = value;
221 ll = splitNode(leaf, b);
222 Message(Debug, "Node split, ll is " << ll);
225 Message(Debug, "Calling adjustTree() . . .");
226 Node *result = adjustTree(leaf, ll);
227 if(result != NULL) {
228 Message(Debug, "**** Creating new root node!");
229 Node *newRoot = new Node(false);
230 newRoot->setBranchCount(2);
231 newRoot->branch(0).bound = m_root->bound();
232 newRoot->branch(0).node = m_root;
233 newRoot->branch(1).bound = result->bound();
234 newRoot->branch(1).node = result;
235 m_root->setParent(newRoot);
236 result->setParent(newRoot);
237 m_root = newRoot;
240 Message(Debug, "Finished.");
243 template<typename Key, typename Value, int Dimensions, int Maximum, int Minimum>
244 void RTree<Key, Value, Dimensions, Maximum, Minimum>::search(
245 const RTree<Key, Value, Dimensions, Maximum, Minimum>::Bound &bound,
246 RTree<Key, Value, Dimensions, Maximum, Minimum>::Node *node,
247 RTree<Key, Value, Dimensions, Maximum, Minimum>::SearchProcessor *processor) {
249 if(node == NULL) return;
251 if(node->isLeaf()) {
252 Message(Debug, "At leaf node! Branch count: " << node->branchCount());
253 for(int i = 0; i < node->branchCount(); i ++) {
254 Branch &branch = node->branch(i);
256 Message(Debug, "Considering leaf branch #" << i);
257 if(branch.bound.overlaps(bound)) {
258 Message(Debug, "Leaf branch overlaps bound, processing . . .");
259 processor->process(branch.bound, branch.value);
261 else Message(Debug, "Leaf branch does not overlap bound, ignoring.");
264 else {
265 Message(Debug, "Not in leaf node. Branch count: " << node->branchCount());
266 for(int i = 0; i < node->branchCount(); i ++) {
267 Branch &branch = node->branch(i);
268 Message(Debug, "Considering non-leaf branch #" << i);
269 Message(Debug, "\tbranchCount():" << branch.node->branchCount());
270 if(branch.bound.overlaps(bound)) {
271 Message(Debug, "Non-leaf branch overlaps bound, processing . . .");
272 search(bound, branch.node, processor);
274 else {
275 Message(Debug, "Non-leaf branch does not overlap bound, ignoring.");
281 template<typename Key, typename Value, int Dimensions, int Maximum, int Minimum>
282 typename RTree<Key, Value, Dimensions, Maximum, Minimum>::Node *
283 RTree<Key, Value, Dimensions, Maximum, Minimum>::chooseLeaf(
284 const RTree<Key, Value, Dimensions, Maximum, Minimum>::Bound &bound) {
286 Node *node = m_root;
288 while(!node->isLeaf()) {
289 Message(Debug, "Node is not leaf node, continuing downwards . . .");
290 Node *smallestNode = NULL;
291 Key smallestVolume = 0;
292 for(int i = 0; i < node->branchCount(); i ++) {
293 Key volume = bound.enlargementToCover(node->branch(i).bound);
295 if(volume < smallestVolume || smallestNode == NULL) {
296 smallestNode = node->branch(i).node;
297 smallestVolume = volume;
299 else if(volume == smallestVolume) {
300 /* TODO: implement tiebreaker. */
301 Message(Debug, "RTree: smallestVolume tie-breaker not implemented!");
305 node = smallestNode;
308 Message(Debug, "Found leaf node.");
309 Message(Debug, "Leaf node branch count: " << node->branchCount());
311 return node;
314 template<typename Key, typename Value, int Dimensions, int Maximum, int Minimum>
315 typename RTree<Key, Value, Dimensions, Maximum, Minimum>::Node *
316 RTree<Key, Value, Dimensions, Maximum, Minimum>::adjustTree(
317 RTree<Key, Value, Dimensions, Maximum, Minimum>::Node *n,
318 RTree<Key, Value, Dimensions, Maximum, Minimum>::Node *nn) {
320 Message(Debug, "Starting tree adjustment, nn is " << nn);
322 while(n != m_root) {
323 Message(Debug, "*** Adjusting tree node . . .");
324 Node *p = n->parent();
326 int i = 0;
327 for(; i < p->branchCount(); i ++) {
328 if(p->branch(i).node == n) {
329 break;
333 Bound &bound = p->branch(i).bound;
335 for(; i < n->branchCount(); i ++) {
336 bound.enclose(p->branch(i).bound);
339 if(nn != NULL) {
340 // Add nn to p.
341 int count = p->branchCount();
342 if(count < Maximum) {
343 for(i = 0; i < nn->branchCount(); i ++) {
344 p->branch(count).bound.enclose(nn->branch(i).bound);
346 p->branch(count).node = nn;
347 nn = NULL;
349 else {
350 // Invoke splitNode. TBI.
351 Message(Fatal, "Splitting nodes in adjustTree NYI.");
352 //nn = splitNode(n);
356 n = p;
359 return nn;
362 template<typename Key, typename Value, int Dimensions, int Maximum, int Minimum>
363 typename RTree<Key, Value, Dimensions, Maximum, Minimum>::Node *
364 RTree<Key, Value, Dimensions, Maximum, Minimum>::splitNode(
365 RTree<Key, Value, Dimensions, Maximum, Minimum>::Node *node,
366 RTree<Key, Value, Dimensions, Maximum, Minimum>::Branch &branch) {
368 // Linear-time algorithm specified in original R-tree publication.
370 Node *nn = new Node(node->isLeaf());
371 nn->setParent(node->parent());
373 Branch list[Maximum+1];
374 int listSize = Maximum+1;
376 for(int i = 0; i < Maximum; i ++) {
377 list[i] = node->branch(i);
379 list[Maximum] = branch;
381 node->setBranchCount(0);
382 nn->setBranchCount(0);
384 int highestStart[Dimensions];
385 int lowestEnd[Dimensions];
386 Key lowest[Dimensions];
387 Key highest[Dimensions];
389 for(int j = 0; j < Maximum+1; j ++) {
390 for(int i = 0; i < Dimensions; i ++) {
391 const Range &iRange = list[j].bound.range(i);
392 if(j == 0) {
393 lowest[i] = iRange.start();
394 highest[i] = iRange.end();
395 highestStart[i] = 0;
396 lowestEnd[i] = 0;
397 continue;
399 lowest[i] = std::min(lowest[i], iRange.start());
400 highest[i] = std::max(highest[i], iRange.end());
402 Range &itemRange = list[j].bound.range(i);
403 if(list[highestStart[i]].bound.range(i).start() > itemRange.start()) {
404 highestStart[i] = j;
406 if(list[lowestEnd[i]].bound.range(i).end() < itemRange.end()) {
407 lowestEnd[i] = j;
412 Key maxSeparation = 0;
413 int maxIndex = -1;
415 /* Create normalized separations. */
416 for(int i = 0; i < Dimensions; i ++) {
417 Key separation =
418 (list[lowestEnd[i]].bound.range(i).end() - list[highestStart[i]].bound.range(i).start())
419 / (highest[i]-lowest[i]);
421 if(maxIndex == -1 || separation > maxSeparation) maxIndex = i, maxSeparation = separation;
424 node->branch(0) = list[highestStart[maxIndex]];
425 node->setBranchCount(1);
426 nn->branch(0) = list[lowestEnd[maxIndex]];
427 nn->setBranchCount(1);
429 Bound nodeBound = list[highestStart[maxIndex]].bound;
430 Bound nnBound = list[lowestEnd[maxIndex]].bound;
432 /* Do the removal in the correct order . . . */
433 if(highestStart[maxIndex] > lowestEnd[maxIndex]) {
434 list[highestStart[maxIndex]] = list[listSize-1];
435 list[lowestEnd[maxIndex]] = list[listSize-2];
436 listSize -= 2;
438 else if(lowestEnd[maxIndex] > highestStart[maxIndex]) {
439 list[lowestEnd[maxIndex]] = list[listSize-1];
440 list[highestStart[maxIndex]] = list[listSize-2];
441 listSize -= 2;
443 /* They are one and the same . . . this should not happen! */
444 else {
445 Message(Fatal, "R-tree: degeneracy case, all entries are the same. Support NYI.");
448 while(true) {
449 if(listSize == 0) break;
450 else if((node->branchCount() + listSize) == Minimum) {
451 Message(Debug, "Node+listSize == Minimum, filling . . .");
452 Message(Debug, "listSize: " << listSize);
453 Message(Debug, "Before: " << node->branchCount());
454 for(int i = 0; i < listSize; i ++) {
455 node->branch(node->branchCount()) = list[i];
456 node->setBranchCount(node->branchCount()+1);
458 Message(Debug, "After: " << node->branchCount());
459 break;
461 else if((nn->branchCount() + listSize) == Minimum) {
462 Message(Debug, "nn+listSize == Minimum, filling . . .");
463 for(int i = 0; i < listSize; i ++) {
464 nn->branch(node->branchCount()) = list[i];
465 nn->setBranchCount(nn->branchCount()+1);
467 break;
469 /* PickNext algorithm for linear-time is to simply pick any item. Choose the last. */
470 Key volume1 = nodeBound.enlargementToCover(list[listSize-1].bound);
471 Key volume2 = nnBound.enlargementToCover(list[listSize-1].bound);
473 if(volume1 < volume2) {
474 Message(Debug, "Adding to node . . .");
475 node->branch(node->branchCount()) = list[listSize-1];
476 node->setBranchCount(node->branchCount()+1);
477 nodeBound.enclose(list[listSize-1].bound);
479 else if(volume2 < volume1) {
480 Message(Debug, "Adding to nn . . .");
481 nn->branch(node->branchCount()) = list[listSize-1];
482 nn->setBranchCount(nn->branchCount()+1);
483 nnBound.enclose(list[listSize-1].bound);
485 else {
486 Message(Warning, "RTree: splitNode tie-breaker NYI. Defaulting to node.");
487 node->branch(node->branchCount()) = list[listSize-1];
488 node->setBranchCount(node->branchCount()+1);
489 nodeBound.enclose(list[listSize-1].bound);
491 listSize --;
494 Message(Debug, "\tBefore splitting, there were " << Maximum+1 << " elements.");
495 Message(Debug, "\tAfter splitting, there are " << node->branchCount() << " + " << nn->branchCount()
496 << " elements.");
498 return nn;
501 } // namespace Storage
503 #endif