ctdb-server: Remove duplicate logic
[samba4-gss.git] / source3 / rpc_server / mdssvc / es_mapping.c
blobe8d181daae1aa6335b38fa2d10752fb9cabad9d7
1 /*
2 Unix SMB/CIFS implementation.
3 Main metadata server / Spotlight routines / Elasticsearch backend
5 Copyright (C) Ralph Boehme 2019
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>.
21 #include "includes.h"
22 #include "es_mapping.h"
25 * Escaping of special characters in Lucene query syntax across HTTP and JSON
26 * ==========================================================================
28 * These characters in Lucene queries need escaping [1]:
30 * + - & | ! ( ) { } [ ] ^ " ~ * ? : \ /
32 * Additionally JSON requires escaping of:
34 * " \
36 * Characters already escaped by the mdssvc client:
38 * * " \
40 * The following table contains the resulting escaped strings, beginning with the
41 * search term, the corresponding Spotlight query and the final string that gets
42 * sent to the target Elasticsearch server.
44 * string | mdfind | http
45 * -------+--------+------
46 * x!x x!x x\\!x
47 * x&x x&x x\\&x
48 * x+x x+x x\\+x
49 * x-x x-x x\\-x
50 * x.x x.x x\\.x
51 * x<x x<x x\\<x
52 * x>x x>x x\\>x
53 * x=x x=x x\\=x
54 * x?x x?x x\\?x
55 * x[x x[x x\\[x
56 * x]x x]x x\\]x
57 * x^x x^x x\\^x
58 * x{x x{x x\\{x
59 * x}x x}x x\\}x
60 * x|x x|x x\\|x
61 * x x x x x\\ x
62 * x*x x\*x x\\*x
63 * x\x x\\x x\\\\x
64 * x"x x\"x x\\\"x
66 * Special cases:
67 * x y It's not possible to search for terms including spaces, Spotlight
68 * will search for x OR y.
69 * x(x Search for terms including ( and ) does not work with Spotlight.
71 * [1] <http://lucene.apache.org/core/8_2_0/queryparser/org/apache/lucene/queryparser/classic/package-summary.html#Escaping_Special_Characters>
74 static char *escape_str(TALLOC_CTX *mem_ctx,
75 const char *in,
76 const char *escape_list,
77 const char *escape_exceptions)
79 char *out = NULL;
80 size_t in_len;
81 size_t new_len;
82 size_t in_pos;
83 size_t out_pos = 0;
85 if (in == NULL) {
86 return NULL;
88 in_len = strlen(in);
90 if (escape_list == NULL) {
91 escape_list = "";
93 if (escape_exceptions == NULL) {
94 escape_exceptions = "";
98 * Allocate enough space for the worst case: every char needs to be
99 * escaped and requires an additional char.
101 new_len = (in_len * 2) + 1;
102 if (new_len <= in_len) {
103 return NULL;
106 out = talloc_zero_array(mem_ctx, char, new_len);
107 if (out == NULL) {
108 return NULL;
111 for (in_pos = 0, out_pos = 0; in_pos < in_len; in_pos++, out_pos++) {
112 if (strchr(escape_list, in[in_pos]) != NULL &&
113 strchr(escape_exceptions, in[in_pos]) == NULL)
115 out[out_pos++] = '\\';
117 out[out_pos] = in[in_pos];
120 return out;
123 char *es_escape_str(TALLOC_CTX *mem_ctx,
124 const char *in,
125 const char *exceptions)
127 const char *lucene_escape_list = "+-&|!(){}[]^\"~*?:\\/ ";
128 const char *json_escape_list = "\\\"";
129 char *lucene_escaped = NULL;
130 char *full_escaped = NULL;
132 lucene_escaped = escape_str(mem_ctx,
134 lucene_escape_list,
135 exceptions);
136 if (lucene_escaped == NULL) {
137 return NULL;
140 full_escaped = escape_str(mem_ctx,
141 lucene_escaped,
142 json_escape_list,
143 NULL);
144 TALLOC_FREE(lucene_escaped);
145 return full_escaped;
148 struct es_attr_map *es_map_sl_attr(TALLOC_CTX *mem_ctx,
149 json_t *kmd_map,
150 const char *sl_attr)
152 struct es_attr_map *es_map = NULL;
153 const char *typestr = NULL;
154 enum ssm_type type = ssmt_bool;
155 char *es_attr = NULL;
156 size_t i;
157 int cmp;
158 int ret;
160 static struct {
161 const char *typestr;
162 enum ssm_type typeval;
163 } ssmt_type_map[] = {
164 {"bool", ssmt_bool},
165 {"num", ssmt_num},
166 {"str", ssmt_str},
167 {"fts", ssmt_fts},
168 {"date", ssmt_date},
169 {"type", ssmt_type},
172 if (sl_attr == NULL) {
173 return NULL;
176 ret = json_unpack(kmd_map,
177 "{s: {s: s}}",
178 sl_attr,
179 "type",
180 &typestr);
181 if (ret != 0) {
182 DBG_DEBUG("No JSON type mapping for [%s]\n", sl_attr);
183 return NULL;
186 ret = json_unpack(kmd_map,
187 "{s: {s: s}}",
188 sl_attr,
189 "attribute",
190 &es_attr);
191 if (ret != 0) {
192 DBG_ERR("No JSON attribute mapping for [%s]\n", sl_attr);
193 return NULL;
196 for (i = 0; i < ARRAY_SIZE(ssmt_type_map); i++) {
197 cmp = strcmp(typestr, ssmt_type_map[i].typestr);
198 if (cmp == 0) {
199 type = ssmt_type_map[i].typeval;
200 break;
203 if (i == ARRAY_SIZE(ssmt_type_map)) {
204 return NULL;
207 es_map = talloc_zero(mem_ctx, struct es_attr_map);
208 if (es_map == NULL) {
209 return NULL;
211 es_map->type = type;
213 es_map->name = es_escape_str(es_map, es_attr, NULL);
214 if (es_map->name == NULL) {
215 TALLOC_FREE(es_map);
216 return false;
219 return es_map;
222 const char *es_map_sl_type(json_t *mime_map,
223 const char *sl_type)
225 const char *mime_type = NULL;
226 int ret;
228 if (sl_type == NULL) {
229 return NULL;
232 ret = json_unpack(mime_map,
233 "{s: s}",
234 sl_type,
235 &mime_type);
236 if (ret != 0) {
237 return NULL;
240 return mime_type;