ctdb-server: Remove duplicate logic
[samba4-gss.git] / source3 / rpc_server / mdssvc / es_parser.y
blob023f8790a148f3e0dfbfd09d2395ae54cbeb56b4
1 /*
2 Unix SMB/CIFS implementation.
3 Main metadata server / Spotlight routines / Elasticsearch backend
5 Copyright (C) Ralph Boehme 2019
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 #include "includes.h"
23 #include "rpc_server/mdssvc/mdssvc.h"
24 #include "rpc_server/mdssvc/mdssvc_es.h"
25 #include "rpc_server/mdssvc/es_parser.tab.h"
26 #include "rpc_server/mdssvc/es_mapping.h"
27 #include "lib/util/smb_strtox.h"
28 #include <jansson.h>
31 * allow building with -O3 -Wp,-D_FORTIFY_SOURCE=2
33 * /tmp/samba-testbase/.../mdssvc/es_parser.y: In function
34 * ‘mdsyylparse’:
35 * es_parser.tab.c:1124:6: error: assuming pointer wraparound
36 * does not occur when comparing P +- C1 with P +- C2
37 * [-Werror=strict-overflow]
39 * The generated code in es_parser.tab.c looks like this:
41 * if (yyss + yystacksize - 1 <= yyssp)
43 #pragma GCC diagnostic ignored "-Wstrict-overflow"
45 #define YYMALLOC SMB_MALLOC
46 #define YYREALLOC SMB_REALLOC
48 struct yy_buffer_state;
49 typedef struct yy_buffer_state *YY_BUFFER_STATE;
50 int mdsyyllex(void);
51 void mdsyylerror(char const *);
52 void *mdsyylterminate(void);
53 YY_BUFFER_STATE mdsyyl_scan_string(const char *str);
54 void mdsyyl_delete_buffer(YY_BUFFER_STATE buffer);
56 /* forward declarations */
57 static char *isodate_to_sldate(const char *s);
58 static char *map_expr(const struct es_attr_map *attr,
59 char op,
60 const char *val1,
61 const char *val2);
63 /* global vars, eg needed by the lexer */
64 struct es_parser_state {
65 TALLOC_CTX *frame;
66 json_t *kmd_map;
67 json_t *mime_map;
68 bool ignore_unknown_attribute;
69 bool ignore_unknown_type;
70 bool type_error;
71 YY_BUFFER_STATE s;
72 const char *result;
73 } *global_es_parser_state;
76 %code provides {
77 #include <stdbool.h>
78 #include <jansson.h>
79 #include "rpc_server/mdssvc/mdssvc.h"
81 /* 2001-01-01T00:00:00Z - Unix Epoch = SP_RAW_TIME_OFFSET */
82 #define SP_RAW_TIME_OFFSET 978307200
84 int mdsyylwrap(void);
85 bool map_spotlight_to_es_query(TALLOC_CTX *mem_ctx,
86 json_t *mappings,
87 const char *path_scope,
88 const char *query_string,
89 char **_es_query);
92 %union {
93 bool bval;
94 const char *sval;
95 struct es_attr_map *attr_map;
98 %define api.prefix {mdsyyl}
99 %expect 1
100 %define parse.error verbose
102 %type <sval> match expr line function value isodate
103 %type <attr_map> attribute
105 %token <sval> WORD PHRASE
106 %token <bval> BOOLEAN
107 %token FUNC_INRANGE
108 %token DATE_ISO
109 %token OBRACE CBRACE EQUAL UNEQUAL GT LT COMMA QUOTE
110 %left OR
111 %left AND
114 input:
115 /* empty */
116 | input line
119 line:
120 expr {
121 if ($1 == NULL) {
122 YYABORT;
124 if (global_es_parser_state->type_error) {
125 YYABORT;
127 global_es_parser_state->result = $1;
131 expr:
132 OBRACE expr CBRACE {
133 if ($2 == NULL) {
134 $$ = NULL;
135 } else {
136 $$ = talloc_asprintf(talloc_tos(), "(%s)", $2);
137 if ($$ == NULL) YYABORT;
140 | expr AND expr {
141 if ($1 == NULL && $3 == NULL) {
142 $$ = NULL;
143 } else if ($1 == NULL) {
144 $$ = $3;
145 } else if ($3 == NULL) {
146 $$ = $1;
147 } else {
148 $$ = talloc_asprintf(talloc_tos(), "(%s) AND (%s)", $1, $3);
149 if ($$ == NULL) YYABORT;
152 | expr OR expr {
153 if ($1 == NULL && $3 == NULL) {
154 $$ = NULL;
155 } else if ($1 == NULL) {
156 $$ = $3;
157 } else if ($3 == NULL) {
158 $$ = $1;
159 } else {
160 $$ = talloc_asprintf(talloc_tos(), "%s OR %s", $1, $3);
161 if ($$ == NULL) YYABORT;
164 | match {
165 $$ = $1;
167 | BOOLEAN {
169 * We can't properly handle these in expressions, fortunately this
170 * is probably only ever used by OS X as sole element in an
171 * expression ie "False" (when Finder window selected our share
172 * but no search string entered yet). Packet traces showed that OS
173 * X Spotlight server then returns a failure (ie -1) which is what
174 * we do here too by calling YYABORT.
176 YYABORT;
179 match:
180 attribute EQUAL value {
181 if ($1 == NULL) {
182 $$ = NULL;
183 } else {
184 $$ = map_expr($1, '=', $3, NULL);
187 | attribute UNEQUAL value {
188 if ($1 == NULL) {
189 $$ = NULL;
190 } else {
191 $$ = map_expr($1, '!', $3, NULL);
194 | attribute LT value {
195 if ($1 == NULL) {
196 $$ = NULL;
197 } else {
198 $$ = map_expr($1, '<', $3, NULL);
201 | attribute GT value {
202 if ($1 == NULL) {
203 $$ = NULL;
204 } else {
205 $$ = map_expr($1, '>', $3, NULL);
208 | function {
209 $$ = $1;
211 | match WORD {
212 $$ = $1;
215 function:
216 FUNC_INRANGE OBRACE attribute COMMA WORD COMMA WORD CBRACE {
217 if ($3 == NULL) {
218 $$ = NULL;
219 } else {
220 $$ = map_expr($3, '~', $5, $7);
224 attribute:
225 WORD {
226 $$ = es_map_sl_attr(global_es_parser_state->frame,
227 global_es_parser_state->kmd_map,
228 $1);
229 if ($$ == NULL &&
230 !global_es_parser_state->ignore_unknown_attribute)
232 YYABORT;
236 value:
237 PHRASE {
238 $$ = $1;
240 | isodate {
241 $$ = $1;
244 isodate:
245 DATE_ISO OBRACE WORD CBRACE {
246 $$ = isodate_to_sldate($3);
247 if ($$ == NULL) YYABORT;
253 * Spotlight has two date formats:
254 * - seconds since 2001-01-01 00:00:00Z
255 * - as string "$time.iso(%Y-%m-%dT%H:%M:%SZ)"
256 * This function converts the latter to the former as string, so the parser
257 * can work on a uniform format.
259 static char *isodate_to_sldate(const char *isodate)
261 struct es_parser_state *s = global_es_parser_state;
262 struct tm tm = {};
263 const char *p = NULL;
264 char *tstr = NULL;
265 time_t t;
267 p = strptime(isodate, "%Y-%m-%dT%H:%M:%SZ", &tm);
268 if (p == NULL) {
269 DBG_ERR("strptime [%s] failed\n", isodate);
270 return NULL;
273 t = timegm(&tm);
274 t -= SP_RAW_TIME_OFFSET;
276 tstr = talloc_asprintf(s->frame, "%jd", (intmax_t)t);
277 if (tstr == NULL) {
278 return NULL;
281 return tstr;
284 static char *map_type(const struct es_attr_map *attr,
285 char op,
286 const char *val)
288 struct es_parser_state *s = global_es_parser_state;
289 const char *mime_type_list = NULL;
290 char *esc_mime_type_list = NULL;
291 const char *not = NULL;
292 const char *end = NULL;
293 char *es = NULL;
295 mime_type_list = es_map_sl_type(s->mime_map, val);
296 if (mime_type_list == NULL) {
297 DBG_DEBUG("Mapping type [%s] failed\n", val);
298 if (!s->ignore_unknown_type) {
299 s->type_error = true;
301 return NULL;
304 esc_mime_type_list = es_escape_str(s->frame,
305 mime_type_list,
306 "* ");
307 if (esc_mime_type_list == NULL) {
308 return NULL;
311 switch (op) {
312 case '=':
313 not = "";
314 end = "";
315 break;
316 case '!':
317 not = "(NOT ";
318 end = ")";
319 break;
320 default:
321 DBG_ERR("Mapping type [%s] unexpected op [%c]\n", val, op);
322 return NULL;
324 es = talloc_asprintf(s->frame,
325 "%s%s:(%s)%s",
326 not,
327 attr->name,
328 esc_mime_type_list,
329 end);
330 if (es == NULL) {
331 return NULL;
334 return es;
337 static char *map_num(const struct es_attr_map *attr,
338 char op,
339 const char *val1,
340 const char *val2)
342 struct es_parser_state *s = global_es_parser_state;
343 char *es = NULL;
345 switch (op) {
346 case '>':
347 es = talloc_asprintf(s->frame,
348 "%s:{%s TO *}",
349 attr->name,
350 val1);
351 break;
352 case '<':
353 es = talloc_asprintf(s->frame,
354 "%s:{* TO %s}",
355 attr->name,
356 val1);
357 break;
358 case '~':
359 es = talloc_asprintf(s->frame,
360 "%s:[%s TO %s]",
361 attr->name,
362 val1,
363 val2);
364 break;
365 case '=':
366 es = talloc_asprintf(s->frame,
367 "%s:%s",
368 attr->name,
369 val1);
370 break;
371 case '!':
372 es = talloc_asprintf(s->frame,
373 "(NOT %s:%s)",
374 attr->name,
375 val1);
376 break;
377 default:
378 DBG_ERR("Mapping num unexpected op [%c]\n", op);
379 return NULL;
381 if (es == NULL) {
382 return NULL;
385 return es;
388 static char *map_fts(const struct es_attr_map *attr,
389 char op,
390 const char *val)
392 struct es_parser_state *s = global_es_parser_state;
393 const char *not = NULL;
394 const char *end = NULL;
395 char *esval = NULL;
396 char *es = NULL;
398 esval = es_escape_str(s->frame, val, "*\\\"");
399 if (esval == NULL) {
400 yyerror("es_escape_str failed");
401 return NULL;
404 switch (op) {
405 case '=':
406 not = "";
407 end = "";
408 break;
409 case '!':
410 not = "(NOT ";
411 end = ")";
412 break;
413 default:
414 DBG_ERR("Mapping fts [%s] unexpected op [%c]\n", val, op);
415 return NULL;
417 es = talloc_asprintf(s->frame,
418 "%s%s%s",
419 not,
420 esval,
421 end);
422 if (es == NULL) {
423 return NULL;
425 return es;
428 static char *map_str(const struct es_attr_map *attr,
429 char op,
430 const char *val)
432 struct es_parser_state *s = global_es_parser_state;
433 char *esval = NULL;
434 char *es = NULL;
435 const char *not = NULL;
436 const char *end = NULL;
438 esval = es_escape_str(s->frame, val, "*\\\"");
439 if (esval == NULL) {
440 yyerror("es_escape_str failed");
441 return NULL;
444 switch (op) {
445 case '=':
446 not = "";
447 end = "";
448 break;
449 case '!':
450 not = "(NOT ";
451 end = ")";
452 break;
453 default:
454 DBG_ERR("Mapping string [%s] unexpected op [%c]\n", val, op);
455 return NULL;
458 es = talloc_asprintf(s->frame,
459 "%s%s:%s%s",
460 not,
461 attr->name,
462 esval,
463 end);
464 if (es == NULL) {
465 return NULL;
467 return es;
471 * Convert Spotlight date seconds since 2001-01-01 00:00:00Z
472 * to a date string in the format %Y-%m-%dT%H:%M:%SZ.
474 static char *map_sldate_to_esdate(TALLOC_CTX *mem_ctx,
475 const char *sldate)
477 struct tm *tm = NULL;
478 char *esdate = NULL;
479 char buf[21];
480 size_t len;
481 time_t t;
482 int error;
484 t = (time_t)smb_strtoull(sldate, NULL, 10, &error, SMB_STR_STANDARD);
485 if (error != 0) {
486 DBG_ERR("smb_strtoull [%s] failed\n", sldate);
487 return NULL;
489 t += SP_RAW_TIME_OFFSET;
491 tm = gmtime(&t);
492 if (tm == NULL) {
493 DBG_ERR("localtime [%s] failed\n", sldate);
494 return NULL;
497 len = strftime(buf, sizeof(buf),
498 "%Y-%m-%dT%H:%M:%SZ", tm);
499 if (len != 20) {
500 DBG_ERR("strftime [%s] failed\n", sldate);
501 return NULL;
504 esdate = es_escape_str(mem_ctx, buf, NULL);
505 if (esdate == NULL) {
506 yyerror("es_escape_str failed");
507 return NULL;
509 return esdate;
512 static char *map_date(const struct es_attr_map *attr,
513 char op,
514 const char *sldate1,
515 const char *sldate2)
517 struct es_parser_state *s = global_es_parser_state;
518 char *esdate1 = NULL;
519 char *esdate2 = NULL;
520 char *es = NULL;
522 if (op == '~' && sldate2 == NULL) {
523 DBG_ERR("Date range query, but second date is NULL\n");
524 return NULL;
527 esdate1 = map_sldate_to_esdate(s->frame, sldate1);
528 if (esdate1 == NULL) {
529 DBG_ERR("map_sldate_to_esdate [%s] failed\n", sldate1);
530 return NULL;
532 if (sldate2 != NULL) {
533 esdate2 = map_sldate_to_esdate(s->frame, sldate2);
534 if (esdate2 == NULL) {
535 DBG_ERR("map_sldate_to_esdate [%s] failed\n", sldate2);
536 return NULL;
540 switch (op) {
541 case '>':
542 es = talloc_asprintf(s->frame,
543 "%s:{%s TO *}",
544 attr->name,
545 esdate1);
546 break;
547 case '<':
548 es = talloc_asprintf(s->frame,
549 "%s:{* TO %s}",
550 attr->name,
551 esdate1);
552 break;
553 case '~':
554 es = talloc_asprintf(s->frame,
555 "%s:[%s TO %s]",
556 attr->name,
557 esdate1,
558 esdate2);
559 break;
560 case '=':
561 es = talloc_asprintf(s->frame,
562 "%s:%s",
563 attr->name,
564 esdate1);
565 break;
566 case '!':
567 es = talloc_asprintf(s->frame,
568 "(NOT %s:%s)",
569 attr->name,
570 esdate1);
571 break;
573 if (es == NULL) {
574 return NULL;
576 return es;
579 static char *map_expr(const struct es_attr_map *attr,
580 char op,
581 const char *val1,
582 const char *val2)
584 char *es = NULL;
586 switch (attr->type) {
587 case ssmt_type:
588 es = map_type(attr, op, val1);
589 break;
590 case ssmt_num:
591 es = map_num(attr, op, val1, val2);
592 break;
593 case ssmt_fts:
594 es = map_fts(attr, op, val1);
595 break;
596 case ssmt_str:
597 es = map_str(attr, op, val1);
598 break;
599 case ssmt_date:
600 es = map_date(attr, op, val1, val2);
601 break;
602 default:
603 break;
605 if (es == NULL) {
606 DBG_DEBUG("Mapping [%s %c %s (%s)] failed\n",
607 attr->name, op, val1, val2 ? val2 : "");
608 return NULL;
611 return es;
614 void mdsyylerror(const char *str)
616 DBG_ERR("Parser failed: %s\n", str);
619 int mdsyylwrap(void)
621 return 1;
625 * Map a Spotlight RAW query string to a ES query string
627 bool map_spotlight_to_es_query(TALLOC_CTX *mem_ctx,
628 json_t *mappings,
629 const char *path_scope,
630 const char *query_string,
631 char **_es_query)
633 struct es_parser_state s = {
634 .frame = talloc_stackframe(),
636 int result;
637 char *es_query = NULL;
639 s.kmd_map = json_object_get(mappings, "attribute_mappings");
640 if (s.kmd_map == NULL) {
641 DBG_ERR("Failed to load attribute_mappings from JSON\n");
642 return false;
644 s.mime_map = json_object_get(mappings, "mime_mappings");
645 if (s.mime_map == NULL) {
646 DBG_ERR("Failed to load mime_mappings from JSON\n");
647 return false;
650 s.s = mdsyyl_scan_string(query_string);
651 if (s.s == NULL) {
652 DBG_WARNING("Failed to parse [%s]\n", query_string);
653 TALLOC_FREE(s.frame);
654 return false;
657 s.ignore_unknown_attribute = lp_parm_bool(GLOBAL_SECTION_SNUM,
658 "elasticsearch",
659 "ignore unknown attribute",
660 false);
661 s.ignore_unknown_type = lp_parm_bool(GLOBAL_SECTION_SNUM,
662 "elasticsearch",
663 "ignore unknown type",
664 false);
666 global_es_parser_state = &s;
667 result = mdsyylparse();
668 global_es_parser_state = NULL;
669 mdsyyl_delete_buffer(s.s);
671 if (result != 0) {
672 TALLOC_FREE(s.frame);
673 return false;
676 es_query = talloc_asprintf(mem_ctx,
677 "(%s) AND path.real.fulltext:\\\"%s\\\"",
678 s.result, path_scope);
679 TALLOC_FREE(s.frame);
680 if (es_query == NULL) {
681 return false;
684 *_es_query = es_query;
685 return true;