2 Unix SMB/CIFS implementation.
3 Main metadata server / Spotlight routines / Elasticsearch backend
5 Copyright (C) Ralph Boehme 2019
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 #include "es_mapping.h"
25 * Escaping of special characters in Lucene query syntax across HTTP and JSON
26 * ==========================================================================
28 * These characters in Lucene queries need escaping [1]:
30 * + - & | ! ( ) { } [ ] ^ " ~ * ? : \ /
32 * Additionally JSON requires escaping of:
36 * Characters already escaped by the mdssvc client:
40 * The following table contains the resulting escaped strings, beginning with the
41 * search term, the corresponding Spotlight query and the final string that gets
42 * sent to the target Elasticsearch server.
44 * string | mdfind | http
45 * -------+--------+------
67 * x y It's not possible to search for terms including spaces, Spotlight
68 * will search for x OR y.
69 * x(x Search for terms including ( and ) does not work with Spotlight.
71 * [1] <http://lucene.apache.org/core/8_2_0/queryparser/org/apache/lucene/queryparser/classic/package-summary.html#Escaping_Special_Characters>
74 static char *escape_str(TALLOC_CTX
*mem_ctx
,
76 const char *escape_list
,
77 const char *escape_exceptions
)
90 if (escape_list
== NULL
) {
93 if (escape_exceptions
== NULL
) {
94 escape_exceptions
= "";
98 * Allocate enough space for the worst case: every char needs to be
99 * escaped and requires an additional char.
101 new_len
= (in_len
* 2) + 1;
102 if (new_len
<= in_len
) {
106 out
= talloc_zero_array(mem_ctx
, char, new_len
);
111 for (in_pos
= 0, out_pos
= 0; in_pos
< in_len
; in_pos
++, out_pos
++) {
112 if (strchr(escape_list
, in
[in_pos
]) != NULL
&&
113 strchr(escape_exceptions
, in
[in_pos
]) == NULL
)
115 out
[out_pos
++] = '\\';
117 out
[out_pos
] = in
[in_pos
];
123 char *es_escape_str(TALLOC_CTX
*mem_ctx
,
125 const char *exceptions
)
127 const char *lucene_escape_list
= "+-&|!(){}[]^\"~*?:\\/ ";
128 const char *json_escape_list
= "\\\"";
129 char *lucene_escaped
= NULL
;
130 char *full_escaped
= NULL
;
132 lucene_escaped
= escape_str(mem_ctx
,
136 if (lucene_escaped
== NULL
) {
140 full_escaped
= escape_str(mem_ctx
,
144 TALLOC_FREE(lucene_escaped
);
148 struct es_attr_map
*es_map_sl_attr(TALLOC_CTX
*mem_ctx
,
152 struct es_attr_map
*es_map
= NULL
;
153 const char *typestr
= NULL
;
154 enum ssm_type type
= ssmt_bool
;
155 char *es_attr
= NULL
;
162 enum ssm_type typeval
;
163 } ssmt_type_map
[] = {
172 if (sl_attr
== NULL
) {
176 ret
= json_unpack(kmd_map
,
182 DBG_DEBUG("No JSON type mapping for [%s]\n", sl_attr
);
186 ret
= json_unpack(kmd_map
,
192 DBG_ERR("No JSON attribute mapping for [%s]\n", sl_attr
);
196 for (i
= 0; i
< ARRAY_SIZE(ssmt_type_map
); i
++) {
197 cmp
= strcmp(typestr
, ssmt_type_map
[i
].typestr
);
199 type
= ssmt_type_map
[i
].typeval
;
203 if (i
== ARRAY_SIZE(ssmt_type_map
)) {
207 es_map
= talloc_zero(mem_ctx
, struct es_attr_map
);
208 if (es_map
== NULL
) {
213 es_map
->name
= es_escape_str(es_map
, es_attr
, NULL
);
214 if (es_map
->name
== NULL
) {
222 const char *es_map_sl_type(json_t
*mime_map
,
225 const char *mime_type
= NULL
;
228 if (sl_type
== NULL
) {
232 ret
= json_unpack(mime_map
,