BPicture: Fix archive constructor.
[haiku.git] / src / add-ons / kernel / file_systems / ramfs / Query.cpp
blob19fbcc0a70d9aa5b7974c8e1dbed41f3638b43f8
1 /* Query - query parsing and evaluation
3 * The pattern matching is roughly based on code originally written
4 * by J. Kercheval, and on code written by Kenneth Almquist, though
5 * it shares no code.
7 * Copyright 2001-2006, Axel Dörfler, axeld@pinc-software.de.
8 * This file may be used under the terms of the MIT License.
9 */
11 // Adjusted by Ingo Weinhold <bonefish@cs.tu-berlin.de> for usage in RAM FS.
14 #include "Query.h"
15 #include "Debug.h"
16 #include "Directory.h"
17 #include "Entry.h"
18 #include "Misc.h"
19 #include "Node.h"
20 #include "Volume.h"
21 #include "Index.h"
23 #include <SupportDefs.h>
24 #include <TypeConstants.h>
25 #include <AppDefs.h>
26 #include <fs_query.h>
28 #include <malloc.h>
29 #include <stdio.h>
30 #include <string.h>
33 // IndexWrapper
35 // constructor
36 IndexWrapper::IndexWrapper(Volume *volume)
37 : fVolume(volume),
38 fIndex(NULL)
42 // SetTo
43 status_t
44 IndexWrapper::SetTo(const char *name)
46 fIndex = NULL;
47 if (fVolume)
48 fIndex = fVolume->FindIndex(name);
49 return (fIndex ? B_OK : B_ENTRY_NOT_FOUND);
52 // Unset
53 void
54 IndexWrapper::Unset()
56 fIndex = NULL;
59 // Type
60 uint32
61 IndexWrapper::Type() const
63 return (fIndex ? fIndex->GetType() : 0);
66 // GetSize
67 off_t
68 IndexWrapper::GetSize() const
70 // Compute a fake "index size" based on the number of entries
71 // (1024 + 16 * entry count), so we don't need to adjust the code using it.
72 return 1024LL + (fIndex ? fIndex->CountEntries() : 0) * 16LL;
75 // KeySize
76 int32
77 IndexWrapper::KeySize() const
79 return (fIndex ? fIndex->GetKeyLength() : 0);
83 // IndexIterator
85 // constructor
86 IndexIterator::IndexIterator(IndexWrapper *indexWrapper)
87 : fIndexWrapper(indexWrapper),
88 fIterator(),
89 fInitialized(false)
93 // Find
94 status_t
95 IndexIterator::Find(const uint8 *const key, size_t keyLength)
97 status_t error = B_ENTRY_NOT_FOUND;
98 if (fIndexWrapper && fIndexWrapper->fIndex) {
99 // TODO: We actually don't want an exact Find() here, but rather a
100 // FindClose().
101 fInitialized = fIndexWrapper->fIndex->Find(key, keyLength, &fIterator);
102 if (fInitialized)
103 error = B_OK;
105 return error;
108 // Rewind
109 status_t
110 IndexIterator::Rewind()
112 status_t error = B_ENTRY_NOT_FOUND;
113 if (fIndexWrapper && fIndexWrapper->fIndex) {
114 fInitialized = fIndexWrapper->fIndex->GetIterator(&fIterator);
115 if (fInitialized)
116 error = B_OK;
118 return error;
121 // GetNextEntry
122 status_t
123 IndexIterator::GetNextEntry(uint8 *buffer, uint16 *_keyLength,
124 size_t /*bufferSize*/, Entry **_entry)
126 status_t error = B_ENTRY_NOT_FOUND;
127 if (fIndexWrapper && fIndexWrapper->fIndex) {
128 // init iterator, if not done yet
129 if (!fInitialized) {
130 fIndexWrapper->fIndex->GetIterator(&fIterator);
131 fInitialized = true;
134 // get key
135 size_t keyLength;
136 if (Entry *entry = fIterator.GetCurrent(buffer, &keyLength)) {
137 *_keyLength = keyLength;
138 *_entry = entry;
139 error = B_OK;
142 // get next entry
143 fIterator.GetNext();
145 return error;
149 // compare_integral
150 template<typename Key>
151 static inline
153 compare_integral(const Key &a, const Key &b)
155 if (a < b)
156 return -1;
157 else if (a > b)
158 return 1;
159 return 0;
162 // compare_keys
163 static
165 compare_keys(const uint8 *key1, size_t length1, const uint8 *key2,
166 size_t length2, uint32 type)
168 switch (type) {
169 case B_INT32_TYPE:
170 return compare_integral(*(int32*)key1, *(int32*)key2);
171 case B_UINT32_TYPE:
172 return compare_integral(*(uint32*)key1, *(uint32*)key2);
173 case B_INT64_TYPE:
174 return compare_integral(*(int64*)key1, *(int64*)key2);
175 case B_UINT64_TYPE:
176 return compare_integral(*(uint64*)key1, *(uint64*)key2);
177 case B_FLOAT_TYPE:
178 return compare_integral(*(float*)key1, *(float*)key2);
179 case B_DOUBLE_TYPE:
180 return compare_integral(*(double*)key1, *(double*)key2);
181 case B_STRING_TYPE:
183 int result = strncmp((const char*)key1, (const char*)key2,
184 min(length1, length2));
185 if (result == 0) {
186 result = compare_integral(strnlen((const char*)key1, length1),
187 strnlen((const char*)key2, length2));
189 return result;
192 return -1;
195 // compareKeys
196 static inline
198 compareKeys(uint32 type, const uint8 *key1, size_t length1, const uint8 *key2,
199 size_t length2)
201 return compare_keys(key1, length1, key2, length2, type);
208 // The parser has a very static design, but it will do what is required.
210 // ParseOr(), ParseAnd(), ParseEquation() are guarantying the operator
211 // precedence, that is =,!=,>,<,>=,<= .. && .. ||.
212 // Apparently, the "!" (not) can only be used with brackets.
214 // If you think that there are too few NULL pointer checks in some places
215 // of the code, just read the beginning of the query constructor.
216 // The API is not fully available, just the Query and the Expression class
217 // are.
220 enum ops {
221 OP_NONE,
223 OP_AND,
224 OP_OR,
226 OP_EQUATION,
228 OP_EQUAL,
229 OP_UNEQUAL,
230 OP_GREATER_THAN,
231 OP_LESS_THAN,
232 OP_GREATER_THAN_OR_EQUAL,
233 OP_LESS_THAN_OR_EQUAL,
236 enum match {
237 NO_MATCH = 0,
238 MATCH_OK = 1,
240 MATCH_BAD_PATTERN = -2,
241 MATCH_INVALID_CHARACTER
244 // return values from isValidPattern()
245 enum {
246 PATTERN_INVALID_ESCAPE = -3,
247 PATTERN_INVALID_RANGE,
248 PATTERN_INVALID_SET
251 union value {
252 int64 Int64;
253 uint64 Uint64;
254 int32 Int32;
255 uint32 Uint32;
256 float Float;
257 double Double;
258 char CString[kMaxIndexKeyLength];
261 // B_MIME_STRING_TYPE is defined in storage/Mime.h, but we
262 // don't need the whole file here; the type can't change anyway
263 #ifndef _MIME_H
264 # define B_MIME_STRING_TYPE 'MIMS'
265 #endif
267 class Term {
268 public:
269 Term(int8 op) : fOp(op), fParent(NULL) {}
270 virtual ~Term() {}
272 int8 Op() const { return fOp; }
274 void SetParent(Term *parent) { fParent = parent; }
275 Term *Parent() const { return fParent; }
277 virtual status_t Match(Entry *entry, Node* node,
278 const char *attribute = NULL, int32 type = 0,
279 const uint8 *key = NULL, size_t size = 0) = 0;
280 virtual void Complement() = 0;
282 virtual void CalculateScore(IndexWrapper &index) = 0;
283 virtual int32 Score() const = 0;
285 virtual status_t InitCheck() = 0;
287 virtual bool NeedsEntry() = 0;
289 #ifdef DEBUG
290 virtual void PrintToStream() = 0;
291 #endif
293 protected:
294 int8 fOp;
295 Term *fParent;
298 // Although an Equation object is quite independent from the volume on which
299 // the query is run, there are some dependencies that are produced while
300 // querying:
301 // The type/size of the value, the score, and if it has an index or not.
302 // So you could run more than one query on the same volume, but it might return
303 // wrong values when it runs concurrently on another volume.
304 // That's not an issue right now, because we run single-threaded and don't use
305 // queries more than once.
307 class Equation : public Term {
308 public:
309 Equation(char **expr);
310 virtual ~Equation();
312 virtual status_t InitCheck();
314 status_t ParseQuotedString(char **_start, char **_end);
315 char *CopyString(char *start, char *end);
317 virtual status_t Match(Entry *entry, Node* node,
318 const char *attribute = NULL, int32 type = 0,
319 const uint8 *key = NULL, size_t size = 0);
320 virtual void Complement();
322 status_t PrepareQuery(Volume *volume, IndexWrapper &index, IndexIterator **iterator,
323 bool queryNonIndexed);
324 status_t GetNextMatching(Volume *volume, IndexIterator *iterator,
325 struct dirent *dirent, size_t bufferSize);
327 virtual void CalculateScore(IndexWrapper &index);
328 virtual int32 Score() const { return fScore; }
330 virtual bool NeedsEntry();
332 #ifdef DEBUG
333 virtual void PrintToStream();
334 #endif
336 private:
337 Equation(const Equation &);
338 Equation &operator=(const Equation &);
339 // no implementation
341 status_t ConvertValue(type_code type);
342 bool CompareTo(const uint8 *value, uint16 size);
343 uint8 *Value() const { return (uint8 *)&fValue; }
344 status_t MatchEmptyString();
346 char *fAttribute;
347 char *fString;
348 union value fValue;
349 type_code fType;
350 size_t fSize;
351 bool fIsPattern;
353 int32 fScore;
354 bool fHasIndex;
357 class Operator : public Term {
358 public:
359 Operator(Term *,int8,Term *);
360 virtual ~Operator();
362 Term *Left() const { return fLeft; }
363 Term *Right() const { return fRight; }
365 virtual status_t Match(Entry *entry, Node* node,
366 const char *attribute = NULL, int32 type = 0,
367 const uint8 *key = NULL, size_t size = 0);
368 virtual void Complement();
370 virtual void CalculateScore(IndexWrapper &index);
371 virtual int32 Score() const;
373 virtual status_t InitCheck();
375 virtual bool NeedsEntry();
377 //Term *Copy() const;
378 #ifdef DEBUG
379 virtual void PrintToStream();
380 #endif
382 private:
383 Operator(const Operator &);
384 Operator &operator=(const Operator &);
385 // no implementation
387 Term *fLeft,*fRight;
391 //---------------------------------
394 void
395 skipWhitespace(char **expr, int32 skip = 0)
397 char *string = (*expr) + skip;
398 while (*string == ' ' || *string == '\t') string++;
399 *expr = string;
403 void
404 skipWhitespaceReverse(char **expr,char *stop)
406 char *string = *expr;
407 while (string > stop && (*string == ' ' || *string == '\t')) string--;
408 *expr = string;
412 // #pragma mark -
415 uint32
416 utf8ToUnicode(char **string)
418 uint8 *bytes = (uint8 *)*string;
419 int32 length;
420 uint8 mask = 0x1f;
422 switch (bytes[0] & 0xf0) {
423 case 0xc0:
424 case 0xd0: length = 2; break;
425 case 0xe0: length = 3; break;
426 case 0xf0:
427 mask = 0x0f;
428 length = 4;
429 break;
430 default:
431 // valid 1-byte character
432 // and invalid characters
433 (*string)++;
434 return bytes[0];
436 uint32 c = bytes[0] & mask;
437 int32 i = 1;
438 for (;i < length && (bytes[i] & 0x80) > 0;i++)
439 c = (c << 6) | (bytes[i] & 0x3f);
441 if (i < length) {
442 // invalid character
443 (*string)++;
444 return (uint32)bytes[0];
446 *string += length;
447 return c;
451 int32
452 getFirstPatternSymbol(char *string)
454 char c;
456 for (int32 index = 0;(c = *string++);index++) {
457 if (c == '*' || c == '?' || c == '[')
458 return index;
460 return -1;
464 bool
465 isPattern(char *string)
467 return getFirstPatternSymbol(string) >= 0 ? true : false;
471 status_t
472 isValidPattern(char *pattern)
474 while (*pattern) {
475 switch (*pattern++) {
476 case '\\':
477 // the escape character must not be at the end of the pattern
478 if (!*pattern++)
479 return PATTERN_INVALID_ESCAPE;
480 break;
482 case '[':
483 if (pattern[0] == ']' || !pattern[0])
484 return PATTERN_INVALID_SET;
486 while (*pattern != ']') {
487 if (*pattern == '\\' && !*++pattern)
488 return PATTERN_INVALID_ESCAPE;
490 if (!*pattern)
491 return PATTERN_INVALID_SET;
493 if (pattern[0] == '-' && pattern[1] == '-')
494 return PATTERN_INVALID_RANGE;
496 pattern++;
498 break;
501 return B_OK;
505 /** Matches the string against the given wildcard pattern.
506 * Returns either MATCH_OK, or NO_MATCH when everything went fine,
507 * or values < 0 (see enum at the top of Query.cpp) if an error
508 * occurs
511 status_t
512 matchString(char *pattern, char *string)
514 while (*pattern) {
515 // end of string == valid end of pattern?
516 if (!string[0]) {
517 while (pattern[0] == '*')
518 pattern++;
519 return !pattern[0] ? MATCH_OK : NO_MATCH;
522 switch (*pattern++) {
523 case '?':
525 // match exactly one UTF-8 character; we are
526 // not interested in the result
527 utf8ToUnicode(&string);
528 break;
531 case '*':
533 // compact pattern
534 while (true) {
535 if (pattern[0] == '?') {
536 if (!*++string)
537 return NO_MATCH;
538 } else if (pattern[0] != '*')
539 break;
541 pattern++;
544 // if the pattern is done, we have matched the string
545 if (!pattern[0])
546 return MATCH_OK;
548 while(true) {
549 // we have removed all occurences of '*' and '?'
550 if (pattern[0] == string[0]
551 || pattern[0] == '['
552 || pattern[0] == '\\') {
553 status_t status = matchString(pattern,string);
554 if (status < B_OK || status == MATCH_OK)
555 return status;
558 // we could be nice here and just jump to the next
559 // UTF-8 character - but we wouldn't gain that much
560 // and it'd be slower (since we're checking for
561 // equality before entering the recursion)
562 if (!*++string)
563 return NO_MATCH;
565 break;
568 case '[':
570 bool invert = false;
571 if (pattern[0] == '^' || pattern[0] == '!') {
572 invert = true;
573 pattern++;
576 if (!pattern[0] || pattern[0] == ']')
577 return MATCH_BAD_PATTERN;
579 uint32 c = utf8ToUnicode(&string);
580 bool matched = false;
582 while (pattern[0] != ']') {
583 if (!pattern[0])
584 return MATCH_BAD_PATTERN;
586 if (pattern[0] == '\\')
587 pattern++;
589 uint32 first = utf8ToUnicode(&pattern);
591 // Does this character match, or is this a range?
592 if (first == c) {
593 matched = true;
594 break;
595 } else if (pattern[0] == '-' && pattern[1] != ']' && pattern[1]) {
596 pattern++;
598 if (pattern[0] == '\\') {
599 pattern++;
600 if (!pattern[0])
601 return MATCH_BAD_PATTERN;
603 uint32 last = utf8ToUnicode(&pattern);
605 if (c >= first && c <= last) {
606 matched = true;
607 break;
612 if (invert)
613 matched = !matched;
615 if (matched) {
616 while (pattern[0] != ']') {
617 if (!pattern[0])
618 return MATCH_BAD_PATTERN;
619 pattern++;
621 pattern++;
622 break;
624 return NO_MATCH;
627 case '\\':
628 if (!pattern[0])
629 return MATCH_BAD_PATTERN;
630 // supposed to fall through
631 default:
632 if (pattern[-1] != string[0])
633 return NO_MATCH;
634 string++;
635 break;
639 if (string[0])
640 return NO_MATCH;
642 return MATCH_OK;
646 // #pragma mark -
649 Equation::Equation(char **expr)
650 : Term(OP_EQUATION),
651 fAttribute(NULL),
652 fString(NULL),
653 fType(0),
654 fIsPattern(false)
656 char *string = *expr;
657 char *start = string;
658 char *end = NULL;
660 // Since the equation is the integral part of any query, we're just parsing
661 // the whole thing here.
662 // The whitespace at the start is already removed in Expression::ParseEquation()
664 if (*start == '"' || *start == '\'') {
665 // string is quoted (start has to be on the beginning of a string)
666 if (ParseQuotedString(&start, &end) < B_OK)
667 return;
669 // set string to a valid start of the equation symbol
670 string = end + 2;
671 skipWhitespace(&string);
672 if (*string != '=' && *string != '<' && *string != '>' && *string != '!') {
673 *expr = string;
674 return;
676 } else {
677 // search the (in)equation for the actual equation symbol (and for other operators
678 // in case the equation is malformed)
679 while (*string && *string != '=' && *string != '<' && *string != '>' && *string != '!'
680 && *string != '&' && *string != '|')
681 string++;
683 // get the attribute string (and trim whitespace), in case
684 // the string was not quoted
685 end = string - 1;
686 skipWhitespaceReverse(&end, start);
689 // attribute string is empty (which is not allowed)
690 if (start > end)
691 return;
693 // at this point, "start" points to the beginning of the string, "end" points
694 // to the last character of the string, and "string" points to the first
695 // character of the equation symbol
697 // test for the right symbol (as this doesn't need any memory)
698 switch (*string) {
699 case '=':
700 fOp = OP_EQUAL;
701 break;
702 case '>':
703 fOp = *(string + 1) == '=' ? OP_GREATER_THAN_OR_EQUAL : OP_GREATER_THAN;
704 break;
705 case '<':
706 fOp = *(string + 1) == '=' ? OP_LESS_THAN_OR_EQUAL : OP_LESS_THAN;
707 break;
708 case '!':
709 if (*(string + 1) != '=')
710 return;
711 fOp = OP_UNEQUAL;
712 break;
714 // any invalid characters will be rejected
715 default:
716 *expr = string;
717 return;
719 // lets change "start" to point to the first character after the symbol
720 if (*(string + 1) == '=')
721 string++;
722 string++;
723 skipWhitespace(&string);
725 // allocate & copy the attribute string
727 fAttribute = CopyString(start, end);
728 if (fAttribute == NULL)
729 return;
731 start = string;
732 if (*start == '"' || *start == '\'') {
733 // string is quoted (start has to be on the beginning of a string)
734 if (ParseQuotedString(&start, &end) < B_OK)
735 return;
737 string = end + 2;
738 skipWhitespace(&string);
739 } else {
740 while (*string && *string != '&' && *string != '|' && *string != ')')
741 string++;
743 end = string - 1;
744 skipWhitespaceReverse(&end, start);
747 // at this point, "start" will point to the first character of the value,
748 // "end" will point to its last character, and "start" to the first non-
749 // whitespace character after the value string
751 fString = CopyString(start, end);
752 if (fString == NULL)
753 return;
755 // patterns are only allowed for these operations (and strings)
756 if (fOp == OP_EQUAL || fOp == OP_UNEQUAL) {
757 fIsPattern = isPattern(fString);
758 if (fIsPattern && isValidPattern(fString) < B_OK) {
759 // we only want to have valid patterns; setting fString
760 // to NULL will cause InitCheck() to fail
761 free(fString);
762 fString = NULL;
766 *expr = string;
770 Equation::~Equation()
772 if (fAttribute != NULL)
773 free(fAttribute);
774 if (fString != NULL)
775 free(fString);
779 status_t
780 Equation::InitCheck()
782 if (fAttribute == NULL
783 || fString == NULL
784 || fOp == OP_NONE)
785 return B_BAD_VALUE;
787 return B_OK;
791 status_t
792 Equation::ParseQuotedString(char **_start, char **_end)
794 char *start = *_start;
795 char quote = *start++;
796 char *end = start;
798 for (;*end && *end != quote;end++) {
799 if (*end == '\\')
800 end++;
802 if (*end == '\0')
803 return B_BAD_VALUE;
805 *_start = start;
806 *_end = end - 1;
808 return B_OK;
812 char *
813 Equation::CopyString(char *start, char *end)
815 // end points to the last character of the string - and the length
816 // also has to include the null-termination
817 int32 length = end + 2 - start;
818 // just to make sure; since that's the max. attribute name length and
819 // the max. string in an index, it make sense to have it that way
820 if (length > (int32)kMaxIndexKeyLength || length <= 0)
821 return NULL;
823 char *copy = (char *)malloc(length);
824 if (copy == NULL)
825 return NULL;
827 memcpy(copy,start,length - 1);
828 copy[length - 1] = '\0';
830 return copy;
834 status_t
835 Equation::ConvertValue(type_code type)
837 // Has the type already been converted?
838 if (type == fType)
839 return B_OK;
841 char *string = fString;
843 switch (type) {
844 case B_MIME_STRING_TYPE:
845 type = B_STRING_TYPE;
846 // supposed to fall through
847 case B_STRING_TYPE:
848 strncpy(fValue.CString, string, kMaxIndexKeyLength);
849 fValue.CString[kMaxIndexKeyLength - 1] = '\0';
850 fSize = strlen(fValue.CString);
851 break;
852 case B_INT32_TYPE:
853 fValue.Int32 = strtol(string, &string, 0);
854 fSize = sizeof(int32);
855 break;
856 case B_UINT32_TYPE:
857 fValue.Int32 = strtoul(string, &string, 0);
858 fSize = sizeof(uint32);
859 break;
860 case B_INT64_TYPE:
861 fValue.Int64 = strtoll(string, &string, 0);
862 fSize = sizeof(int64);
863 break;
864 case B_UINT64_TYPE:
865 fValue.Uint64 = strtoull(string, &string, 0);
866 fSize = sizeof(uint64);
867 break;
868 case B_FLOAT_TYPE:
869 fValue.Float = strtod(string, &string);
870 fSize = sizeof(float);
871 break;
872 case B_DOUBLE_TYPE:
873 fValue.Double = strtod(string, &string);
874 fSize = sizeof(double);
875 break;
876 default:
877 FATAL(("query value conversion to 0x%lx requested!\n", type));
878 // should we fail here or just do a safety int32 conversion?
879 return B_ERROR;
882 fType = type;
884 // patterns are only allowed for string types
885 if (fType != B_STRING_TYPE && fIsPattern)
886 fIsPattern = false;
888 return B_OK;
892 /** Returns true when the key matches the equation. You have to
893 * call ConvertValue() before this one.
896 bool
897 Equation::CompareTo(const uint8 *value, uint16 size)
899 int32 compare;
901 // fIsPattern is only true if it's a string type, and fOp OP_EQUAL, or OP_UNEQUAL
902 if (fIsPattern) {
903 // we have already validated the pattern, so we don't check for failing
904 // here - if something is broken, and matchString() returns an error,
905 // we just don't match
906 compare = matchString(fValue.CString, (char *)value) == MATCH_OK ? 0 : 1;
907 } else
908 compare = compareKeys(fType, value, size, Value(), fSize);
910 switch (fOp) {
911 case OP_EQUAL:
912 return compare == 0;
913 case OP_UNEQUAL:
914 return compare != 0;
915 case OP_LESS_THAN:
916 return compare < 0;
917 case OP_LESS_THAN_OR_EQUAL:
918 return compare <= 0;
919 case OP_GREATER_THAN:
920 return compare > 0;
921 case OP_GREATER_THAN_OR_EQUAL:
922 return compare >= 0;
924 FATAL(("Unknown/Unsupported operation: %d\n", fOp));
925 return false;
929 void
930 Equation::Complement()
932 D(if (fOp <= OP_EQUATION || fOp > OP_LESS_THAN_OR_EQUAL) {
933 FATAL(("op out of range!"));
934 return;
937 int8 complementOp[] = {OP_UNEQUAL, OP_EQUAL, OP_LESS_THAN_OR_EQUAL,
938 OP_GREATER_THAN_OR_EQUAL, OP_LESS_THAN, OP_GREATER_THAN};
939 fOp = complementOp[fOp - OP_EQUAL];
943 status_t
944 Equation::MatchEmptyString()
946 // there is no matching attribute, we will just bail out if we
947 // already know that our value is not of a string type.
948 // If not, it will be converted to a string - and then be compared with "".
949 // That's why we have to call ConvertValue() here - but it will be
950 // a cheap call for the next time
951 // Should we do this only for OP_UNEQUAL?
952 if (fType != 0 && fType != B_STRING_TYPE)
953 return NO_MATCH;
955 status_t status = ConvertValue(B_STRING_TYPE);
956 if (status == B_OK)
957 status = CompareTo((const uint8 *)"", fSize) ? MATCH_OK : NO_MATCH;
959 return status;
963 /** Matches the inode's attribute value with the equation.
964 * Returns MATCH_OK if it matches, NO_MATCH if not, < 0 if something went wrong
967 status_t
968 Equation::Match(Entry *entry, Node* node, const char *attributeName, int32 type,
969 const uint8 *key, size_t size)
971 // get a pointer to the attribute in question
972 union value value;
973 const uint8 *buffer;
975 // first, check if we are matching for a live query and use that value
976 if (attributeName != NULL && !strcmp(fAttribute, attributeName)) {
977 if (key == NULL) {
978 if (type == B_STRING_TYPE) {
979 // special case: a NULL "name" means the entry has been removed
980 // or not yet been added -- we refuse to match, whatever the
981 // pattern
982 if (!strcmp(fAttribute, "name"))
983 return NO_MATCH;
985 return MatchEmptyString();
988 return NO_MATCH;
990 buffer = const_cast<uint8 *>(key);
991 } else if (!strcmp(fAttribute, "name")) {
992 // if not, check for "fake" attributes, "name", "size", "last_modified",
993 if (!entry)
994 return B_ERROR;
995 buffer = (uint8 *)entry->GetName();
996 if (buffer == NULL)
997 return B_ERROR;
999 type = B_STRING_TYPE;
1000 size = strlen((const char *)buffer);
1001 } else if (!strcmp(fAttribute,"size")) {
1002 value.Int64 = node->GetSize();
1003 buffer = (uint8 *)&value;
1004 type = B_INT64_TYPE;
1005 } else if (!strcmp(fAttribute,"last_modified")) {
1006 value.Int32 = node->GetMTime();
1007 buffer = (uint8 *)&value;
1008 type = B_INT32_TYPE;
1009 } else {
1010 // then for attributes
1011 Attribute *attribute = NULL;
1013 if (node->FindAttribute(fAttribute, &attribute) == B_OK) {
1014 attribute->GetKey(&buffer, &size);
1015 type = attribute->GetType();
1016 } else
1017 return MatchEmptyString();
1019 // prepare own value for use, if it is possible to convert it
1020 status_t status = ConvertValue(type);
1021 if (status == B_OK)
1022 status = CompareTo(buffer, size) ? MATCH_OK : NO_MATCH;
1024 RETURN_ERROR(status);
1028 void
1029 Equation::CalculateScore(IndexWrapper &index)
1031 // As always, these values could be tuned and refined.
1032 // And the code could also need some real world testing :-)
1034 // do we have to operate on a "foreign" index?
1035 if (fOp == OP_UNEQUAL || index.SetTo(fAttribute) < B_OK) {
1036 fScore = 0;
1037 return;
1040 // if we have a pattern, how much does it help our search?
1041 if (fIsPattern)
1042 fScore = getFirstPatternSymbol(fString) << 3;
1043 else {
1044 // Score by operator
1045 if (fOp == OP_EQUAL)
1046 // higher than pattern="255 chars+*"
1047 fScore = 2048;
1048 else
1049 // the pattern search is regarded cheaper when you have at
1050 // least one character to set your index to
1051 fScore = 5;
1054 // take index size into account (1024 is the current node size
1055 // in our B+trees)
1056 // 2048 * 2048 == 4194304 is the maximum score (for an empty
1057 // tree, since the header + 1 node are already 2048 bytes)
1058 fScore = fScore * ((2048 * 1024LL) / index.GetSize());
1062 status_t
1063 Equation::PrepareQuery(Volume */*volume*/, IndexWrapper &index, IndexIterator **iterator, bool queryNonIndexed)
1065 status_t status = index.SetTo(fAttribute);
1067 // if we should query attributes without an index, we can just proceed here
1068 if (status < B_OK && !queryNonIndexed)
1069 return B_ENTRY_NOT_FOUND;
1071 type_code type;
1073 // special case for OP_UNEQUAL - it will always operate through the whole index
1074 // but we need the call to the original index to get the correct type
1075 if (status < B_OK || fOp == OP_UNEQUAL) {
1076 // Try to get an index that holds all files (name)
1077 // Also sets the default type for all attributes without index
1078 // to string.
1079 type = status < B_OK ? B_STRING_TYPE : index.Type();
1081 if (index.SetTo("name") < B_OK)
1082 return B_ENTRY_NOT_FOUND;
1084 fHasIndex = false;
1085 } else {
1086 fHasIndex = true;
1087 type = index.Type();
1090 if (ConvertValue(type) < B_OK)
1091 return B_BAD_VALUE;
1093 *iterator = new IndexIterator(&index);
1094 if (*iterator == NULL)
1095 return B_NO_MEMORY;
1097 if ((fOp == OP_EQUAL || fOp == OP_GREATER_THAN || fOp == OP_GREATER_THAN_OR_EQUAL
1098 || fIsPattern)
1099 && fHasIndex) {
1100 // set iterator to the exact position
1102 int32 keySize = index.KeySize();
1104 // at this point, fIsPattern is only true if it's a string type, and fOp
1105 // is either OP_EQUAL or OP_UNEQUAL
1106 if (fIsPattern) {
1107 // let's see if we can use the beginning of the key for positioning
1108 // the iterator and adjust the key size; if not, just leave the
1109 // iterator at the start and return success
1110 keySize = getFirstPatternSymbol(fString);
1111 if (keySize <= 0)
1112 return B_OK;
1115 if (keySize == 0) {
1116 // B_STRING_TYPE doesn't have a fixed length, so it was set
1117 // to 0 before - we compute the correct value here
1118 if (fType == B_STRING_TYPE) {
1119 keySize = strlen(fValue.CString);
1121 // The empty string is a special case - we normally don't check
1122 // for the trailing null byte, in the case for the empty string
1123 // we do it explicitly, because there can't be keys in the B+tree
1124 // with a length of zero
1125 if (keySize == 0)
1126 keySize = 1;
1127 } else
1128 RETURN_ERROR(B_ENTRY_NOT_FOUND);
1131 status = (*iterator)->Find(Value(), keySize);
1132 if (fOp == OP_EQUAL && !fIsPattern)
1133 return status;
1134 else if (status == B_ENTRY_NOT_FOUND
1135 && (fIsPattern || fOp == OP_GREATER_THAN
1136 || fOp == OP_GREATER_THAN_OR_EQUAL)) {
1137 return (*iterator)->Rewind();
1140 RETURN_ERROR(status);
1143 return B_OK;
1147 status_t
1148 Equation::GetNextMatching(Volume *volume, IndexIterator *iterator,
1149 struct dirent *dirent, size_t bufferSize)
1151 while (true) {
1152 union value indexValue;
1153 uint16 keyLength;
1154 Entry *entry = NULL;
1156 status_t status = iterator->GetNextEntry((uint8*)&indexValue, &keyLength,
1157 (uint16)sizeof(indexValue), &entry);
1158 if (status < B_OK)
1159 return status;
1161 // only compare against the index entry when this is the correct
1162 // index for the equation
1163 if (fHasIndex && !CompareTo((uint8 *)&indexValue, keyLength)) {
1164 // They aren't equal? let the operation decide what to do
1165 // Since we always start at the beginning of the index (or the correct
1166 // position), only some needs to be stopped if the entry doesn't fit.
1167 if (fOp == OP_LESS_THAN
1168 || fOp == OP_LESS_THAN_OR_EQUAL
1169 || (fOp == OP_EQUAL && !fIsPattern))
1170 return B_ENTRY_NOT_FOUND;
1172 continue;
1175 // ToDo: check user permissions here - but which one?!
1176 // we could filter out all those where we don't have
1177 // read access... (we should check for every parent
1178 // directory if the X_OK is allowed)
1179 // Although it's quite expensive to open all parents,
1180 // it's likely that the application that runs the
1181 // query will do something similar (and we don't have
1182 // to do it for root, either).
1184 // go up in the tree until a &&-operator is found, and check if the
1185 // inode matches with the rest of the expression - we don't have to
1186 // check ||-operators for that
1187 Term *term = this;
1188 status = MATCH_OK;
1190 if (!fHasIndex)
1191 status = Match(entry, entry->GetNode());
1193 while (term != NULL && status == MATCH_OK) {
1194 Operator *parent = (Operator *)term->Parent();
1195 if (parent == NULL)
1196 break;
1198 if (parent->Op() == OP_AND) {
1199 // choose the other child of the parent
1200 Term *other = parent->Right();
1201 if (other == term)
1202 other = parent->Left();
1204 if (other == NULL) {
1205 FATAL(("&&-operator has only one child... (parent = %p)\n", parent));
1206 break;
1208 status = other->Match(entry, entry->GetNode());
1209 if (status < 0) {
1210 REPORT_ERROR(status);
1211 status = NO_MATCH;
1214 term = (Term *)parent;
1217 if (status == MATCH_OK) {
1218 size_t nameLen = strlen(entry->GetName());
1220 // check, whether the entry fits into the buffer,
1221 // and fill it in
1222 size_t length = (dirent->d_name + nameLen + 1) - (char*)dirent;
1223 if (length > bufferSize)
1224 RETURN_ERROR(B_BUFFER_OVERFLOW);
1226 dirent->d_dev = volume->GetID();
1227 dirent->d_ino = entry->GetNode()->GetID();
1228 dirent->d_pdev = volume->GetID();
1229 dirent->d_pino = entry->GetParent()->GetID();
1231 memcpy(dirent->d_name, entry->GetName(), nameLen);
1232 dirent->d_name[nameLen] = '\0';
1233 dirent->d_reclen = length;
1236 if (status == MATCH_OK)
1237 return B_OK;
1239 RETURN_ERROR(B_ERROR);
1243 bool
1244 Equation::NeedsEntry()
1246 return strcmp(fAttribute, "name") == 0;
1250 // #pragma mark -
1253 Operator::Operator(Term *left, int8 op, Term *right)
1254 : Term(op),
1255 fLeft(left),
1256 fRight(right)
1258 if (left)
1259 left->SetParent(this);
1260 if (right)
1261 right->SetParent(this);
1265 Operator::~Operator()
1267 delete fLeft;
1268 delete fRight;
1272 status_t
1273 Operator::Match(Entry *entry, Node* node, const char *attribute,
1274 int32 type, const uint8 *key, size_t size)
1276 if (fOp == OP_AND) {
1277 status_t status = fLeft->Match(entry, node, attribute, type, key, size);
1278 if (status != MATCH_OK)
1279 return status;
1281 return fRight->Match(entry, node, attribute, type, key, size);
1282 } else {
1283 // choose the term with the better score for OP_OR
1284 if (fRight->Score() > fLeft->Score()) {
1285 status_t status = fRight->Match(entry, node, attribute, type, key,
1286 size);
1287 if (status != NO_MATCH)
1288 return status;
1290 return fLeft->Match(entry, node, attribute, type, key, size);
1295 void
1296 Operator::Complement()
1298 if (fOp == OP_AND)
1299 fOp = OP_OR;
1300 else
1301 fOp = OP_AND;
1303 fLeft->Complement();
1304 fRight->Complement();
1308 void
1309 Operator::CalculateScore(IndexWrapper &index)
1311 fLeft->CalculateScore(index);
1312 fRight->CalculateScore(index);
1316 int32
1317 Operator::Score() const
1319 if (fOp == OP_AND) {
1320 // return the one with the better score
1321 if (fRight->Score() > fLeft->Score())
1322 return fRight->Score();
1324 return fLeft->Score();
1327 // for OP_OR, be honest, and return the one with the worse score
1328 if (fRight->Score() < fLeft->Score())
1329 return fRight->Score();
1331 return fLeft->Score();
1335 status_t
1336 Operator::InitCheck()
1338 if ((fOp != OP_AND && fOp != OP_OR)
1339 || fLeft == NULL || fLeft->InitCheck() < B_OK
1340 || fRight == NULL || fRight->InitCheck() < B_OK)
1341 return B_ERROR;
1343 return B_OK;
1347 bool
1348 Operator::NeedsEntry()
1350 return ((fLeft && fLeft->NeedsEntry()) || (fRight && fRight->NeedsEntry()));
1354 #if 0
1355 Term *
1356 Operator::Copy() const
1358 if (fEquation != NULL) {
1359 Equation *equation = new Equation(*fEquation);
1360 if (equation == NULL)
1361 return NULL;
1363 Term *term = new Term(equation);
1364 if (term == NULL)
1365 delete equation;
1367 return term;
1370 Term *left = NULL, *right = NULL;
1372 if (fLeft != NULL && (left = fLeft->Copy()) == NULL)
1373 return NULL;
1374 if (fRight != NULL && (right = fRight->Copy()) == NULL) {
1375 delete left;
1376 return NULL;
1379 Term *term = new Term(left,fOp,right);
1380 if (term == NULL) {
1381 delete left;
1382 delete right;
1383 return NULL;
1385 return term;
1387 #endif
1390 // #pragma mark -
1392 #ifdef DEBUG
1393 void
1394 Operator::PrintToStream()
1396 D(__out("( "));
1397 if (fLeft != NULL)
1398 fLeft->PrintToStream();
1400 const char* op;
1401 switch (fOp) {
1402 case OP_OR: op = "OR"; break;
1403 case OP_AND: op = "AND"; break;
1404 default: op = "?"; break;
1406 D(__out(" %s ", op));
1408 if (fRight != NULL)
1409 fRight->PrintToStream();
1411 D(__out(" )"));
1415 void
1416 Equation::PrintToStream()
1418 const char* op;
1419 switch (fOp) {
1420 case OP_EQUAL: op = "=="; break;
1421 case OP_UNEQUAL: op = "!="; break;
1422 case OP_GREATER_THAN: op = ">"; break;
1423 case OP_GREATER_THAN_OR_EQUAL: op = ">="; break;
1424 case OP_LESS_THAN: op = "<"; break;
1425 case OP_LESS_THAN_OR_EQUAL: op = "<="; break;
1426 default: op = "???"; break;
1428 D(__out("[\"%s\" %s \"%s\"]", fAttribute, op, fString));
1432 #endif /* DEBUG */
1434 // #pragma mark -
1437 Expression::Expression(char *expr)
1439 if (expr == NULL)
1440 return;
1442 fTerm = ParseOr(&expr);
1443 if (fTerm != NULL && fTerm->InitCheck() < B_OK) {
1444 FATAL(("Corrupt tree in expression!\n"));
1445 delete fTerm;
1446 fTerm = NULL;
1448 D(if (fTerm != NULL) {
1449 fTerm->PrintToStream();
1450 D(__out("\n"));
1451 if (*expr != '\0')
1452 PRINT(("Unexpected end of string: \"%s\"!\n", expr));
1454 fPosition = expr;
1458 Expression::~Expression()
1460 delete fTerm;
1464 Term *
1465 Expression::ParseEquation(char **expr)
1467 skipWhitespace(expr);
1469 bool nott = false; // note: not is a C++ keyword
1470 if (**expr == '!') {
1471 skipWhitespace(expr, 1);
1472 if (**expr != '(')
1473 return NULL;
1475 nott = true;
1478 if (**expr == ')') {
1479 // shouldn't be handled here
1480 return NULL;
1481 } else if (**expr == '(') {
1482 skipWhitespace(expr, 1);
1484 Term *term = ParseOr(expr);
1486 skipWhitespace(expr);
1488 if (**expr != ')') {
1489 delete term;
1490 return NULL;
1493 // If the term is negated, we just complement the tree, to get
1494 // rid of the not, a.k.a. DeMorgan's Law.
1495 if (nott)
1496 term->Complement();
1498 skipWhitespace(expr, 1);
1500 return term;
1503 Equation *equation = new Equation(expr);
1504 if (equation == NULL || equation->InitCheck() < B_OK) {
1505 delete equation;
1506 return NULL;
1508 return equation;
1512 Term *
1513 Expression::ParseAnd(char **expr)
1515 Term *left = ParseEquation(expr);
1516 if (left == NULL)
1517 return NULL;
1519 while (IsOperator(expr,'&')) {
1520 Term *right = ParseAnd(expr);
1521 Term *newParent = NULL;
1523 if (right == NULL || (newParent = new Operator(left, OP_AND, right)) == NULL) {
1524 delete left;
1525 delete right;
1527 return NULL;
1529 left = newParent;
1532 return left;
1536 Term *
1537 Expression::ParseOr(char **expr)
1539 Term *left = ParseAnd(expr);
1540 if (left == NULL)
1541 return NULL;
1543 while (IsOperator(expr,'|')) {
1544 Term *right = ParseAnd(expr);
1545 Term *newParent = NULL;
1547 if (right == NULL || (newParent = new Operator(left, OP_OR, right)) == NULL) {
1548 delete left;
1549 delete right;
1551 return NULL;
1553 left = newParent;
1556 return left;
1560 bool
1561 Expression::IsOperator(char **expr, char op)
1563 char *string = *expr;
1565 if (*string == op && *(string + 1) == op) {
1566 *expr += 2;
1567 return true;
1569 return false;
1573 status_t
1574 Expression::InitCheck()
1576 if (fTerm == NULL)
1577 return B_BAD_VALUE;
1579 return B_OK;
1583 // #pragma mark -
1586 Query::Query(Volume *volume, Expression *expression, uint32 flags)
1588 fVolume(volume),
1589 fExpression(expression),
1590 fCurrent(NULL),
1591 fIterator(NULL),
1592 fIndex(volume),
1593 fFlags(flags),
1594 fPort(-1),
1595 fNeedsEntry(false)
1597 // if the expression has a valid root pointer, the whole tree has
1598 // already passed the sanity check, so that we don't have to check
1599 // every pointer
1600 if (volume == NULL || expression == NULL || expression->Root() == NULL)
1601 return;
1603 // create index on the stack and delete it afterwards
1604 fExpression->Root()->CalculateScore(fIndex);
1605 fIndex.Unset();
1607 fNeedsEntry = fExpression->Root()->NeedsEntry();
1609 Rewind();
1611 if (fFlags & B_LIVE_QUERY)
1612 volume->AddQuery(this);
1616 Query::~Query()
1618 if (fFlags & B_LIVE_QUERY)
1619 fVolume->RemoveQuery(this);
1623 status_t
1624 Query::Rewind()
1626 // free previous stuff
1628 fStack.MakeEmpty();
1630 delete fIterator;
1631 fIterator = NULL;
1632 fCurrent = NULL;
1634 // put the whole expression on the stack
1636 Stack<Term *> stack;
1637 stack.Push(fExpression->Root());
1639 Term *term;
1640 while (stack.Pop(&term)) {
1641 if (term->Op() < OP_EQUATION) {
1642 Operator *op = (Operator *)term;
1644 if (op->Op() == OP_OR) {
1645 stack.Push(op->Left());
1646 stack.Push(op->Right());
1647 } else {
1648 // For OP_AND, we can use the scoring system to decide which path to add
1649 if (op->Right()->Score() > op->Left()->Score())
1650 stack.Push(op->Right());
1651 else
1652 stack.Push(op->Left());
1654 } else if (term->Op() == OP_EQUATION || fStack.Push((Equation *)term) < B_OK)
1655 FATAL(("Unknown term on stack or stack error"));
1658 return B_OK;
1662 status_t
1663 Query::GetNextEntry(struct dirent *dirent, size_t size)
1665 // If we don't have an equation to use yet/anymore, get a new one
1666 // from the stack
1667 while (true) {
1668 if (fIterator == NULL) {
1669 if (!fStack.Pop(&fCurrent)
1670 || fCurrent == NULL
1671 || fCurrent->PrepareQuery(fVolume, fIndex, &fIterator,
1672 fFlags & B_QUERY_NON_INDEXED) < B_OK)
1673 return B_ENTRY_NOT_FOUND;
1675 if (fCurrent == NULL)
1676 RETURN_ERROR(B_ERROR);
1678 status_t status = fCurrent->GetNextMatching(fVolume, fIterator, dirent, size);
1679 if (status < B_OK) {
1680 delete fIterator;
1681 fIterator = NULL;
1682 fCurrent = NULL;
1683 } else {
1684 // only return if we have another entry
1685 return B_OK;
1691 void
1692 Query::SetLiveMode(port_id port, int32 token)
1694 fPort = port;
1695 fToken = token;
1697 if ((fFlags & B_LIVE_QUERY) == 0) {
1698 // you can decide at any point to set the live query mode,
1699 // only live queries have to be updated by attribute changes
1700 fFlags |= B_LIVE_QUERY;
1701 fVolume->AddQuery(this);
1706 static void
1707 send_entry_notification(port_id port, int32 token, Volume* volume, Entry* entry,
1708 bool created)
1710 if (created) {
1711 notify_query_entry_created(port, token, volume->GetID(),
1712 entry->GetParent()->GetID(), entry->GetName(),
1713 entry->GetNode()->GetID());
1714 } else {
1715 notify_query_entry_removed(port, token, volume->GetID(),
1716 entry->GetParent()->GetID(), entry->GetName(),
1717 entry->GetNode()->GetID());
1722 void
1723 Query::LiveUpdate(Entry *entry, Node* node, const char *attribute, int32 type,
1724 const uint8 *oldKey, size_t oldLength, const uint8 *newKey,
1725 size_t newLength)
1727 PRINT(("%p->Query::LiveUpdate(%p, %p, \"%s\", 0x%lx, %p, %lu, %p, %lu)\n",
1728 this, entry, node, attribute, type, oldKey, oldLength, newKey, newLength));
1729 if (fPort < 0 || fExpression == NULL || node == NULL || attribute == NULL)
1730 return;
1732 // ToDo: check if the attribute is part of the query at all...
1734 // If no entry has been supplied, but the we need one for the evaluation
1735 // (i.e. the "name" attribute is used), we invoke ourselves for all entries
1736 // referring to the given node.
1737 if (!entry && fNeedsEntry) {
1738 entry = node->GetFirstReferrer();
1739 while (entry) {
1740 LiveUpdate(entry, node, attribute, type, oldKey, oldLength, newKey,
1741 newLength);
1742 entry = node->GetNextReferrer(entry);
1744 return;
1747 status_t oldStatus = fExpression->Root()->Match(entry, node, attribute,
1748 type, oldKey, oldLength);
1749 status_t newStatus = fExpression->Root()->Match(entry, node, attribute,
1750 type, newKey, newLength);
1751 PRINT((" oldStatus: 0x%lx, newStatus: 0x%lx\n", oldStatus, newStatus));
1753 bool created;
1754 if (oldStatus == MATCH_OK && newStatus == MATCH_OK) {
1755 // only send out a notification if the name was changed
1756 if (oldKey == NULL || strcmp(attribute,"name"))
1757 return;
1759 if (entry) {
1760 // entry should actually always be given, when the changed
1761 // attribute is the entry name
1762 PRINT(("notification: old: removed\n"));
1763 notify_query_entry_removed(fPort, fToken, fVolume->GetID(),
1764 entry->GetParent()->GetID(), (const char *)oldKey,
1765 entry->GetNode()->GetID());
1767 created = true;
1768 } else if (oldStatus != MATCH_OK && newStatus != MATCH_OK) {
1769 // nothing has changed
1770 return;
1771 } else if (oldStatus == MATCH_OK && newStatus != MATCH_OK)
1772 created = false;
1773 else
1774 created = true;
1776 // We send a notification for the given entry, if any, or otherwise for
1777 // all entries referring to the node;
1778 if (entry) {
1779 PRINT(("notification: new: %s\n", (created ? "created" : "removed")));
1780 send_entry_notification(fPort, fToken, fVolume, entry, created);
1781 } else {
1782 entry = node->GetFirstReferrer();
1783 while (entry) {
1784 send_entry_notification(fPort, fToken, fVolume, entry, created);
1785 entry = node->GetNextReferrer(entry);