jsonpath scanner: reentrant scanner
[pgsql.git] / src / common / saslprep.c
blob5d84d13c6d2d5946a0635369c911cd17bc96afa0
1 /*-------------------------------------------------------------------------
2 * saslprep.c
3 * SASLprep normalization, for SCRAM authentication
5 * The SASLprep algorithm is used to process a user-supplied password into
6 * canonical form. For more details, see:
8 * [RFC3454] Preparation of Internationalized Strings ("stringprep"),
9 * http://www.ietf.org/rfc/rfc3454.txt
11 * [RFC4013] SASLprep: Stringprep Profile for User Names and Passwords
12 * http://www.ietf.org/rfc/rfc4013.txt
15 * Portions Copyright (c) 2017-2024, PostgreSQL Global Development Group
17 * IDENTIFICATION
18 * src/common/saslprep.c
20 *-------------------------------------------------------------------------
22 #ifndef FRONTEND
23 #include "postgres.h"
24 #include "utils/memutils.h"
25 #else
26 #include "postgres_fe.h"
27 #endif
29 #include "common/saslprep.h"
30 #include "common/string.h"
31 #include "common/unicode_norm.h"
32 #include "mb/pg_wchar.h"
35 * In backend, we will use palloc/pfree. In frontend, use malloc, and
36 * return SASLPREP_OOM on out-of-memory.
38 #ifndef FRONTEND
39 #define STRDUP(s) pstrdup(s)
40 #define ALLOC(size) palloc(size)
41 #define FREE(size) pfree(size)
42 #else
43 #define STRDUP(s) strdup(s)
44 #define ALLOC(size) malloc(size)
45 #define FREE(size) free(size)
46 #endif
48 /* Prototypes for local functions */
49 static int codepoint_range_cmp(const void *a, const void *b);
50 static bool is_code_in_table(pg_wchar code, const pg_wchar *map, int mapsize);
51 static int pg_utf8_string_len(const char *source);
54 * Stringprep Mapping Tables.
56 * The stringprep specification includes a number of tables of Unicode
57 * codepoints, used in different parts of the algorithm. They are below,
58 * as arrays of codepoint ranges. Each range is a pair of codepoints,
59 * for the first and last codepoint included the range (inclusive!).
63 * C.1.2 Non-ASCII space characters
65 * These are all mapped to the ASCII space character (U+00A0).
67 static const pg_wchar non_ascii_space_ranges[] =
69 0x00A0, 0x00A0,
70 0x1680, 0x1680,
71 0x2000, 0x200B,
72 0x202F, 0x202F,
73 0x205F, 0x205F,
74 0x3000, 0x3000
78 * B.1 Commonly mapped to nothing
80 * If any of these appear in the input, they are removed.
82 static const pg_wchar commonly_mapped_to_nothing_ranges[] =
84 0x00AD, 0x00AD,
85 0x034F, 0x034F,
86 0x1806, 0x1806,
87 0x180B, 0x180D,
88 0x200B, 0x200D,
89 0x2060, 0x2060,
90 0xFE00, 0xFE0F,
91 0xFEFF, 0xFEFF
95 * prohibited_output_ranges is a union of all the characters from
96 * the following tables:
98 * C.1.2 Non-ASCII space characters
99 * C.2.1 ASCII control characters
100 * C.2.2 Non-ASCII control characters
101 * C.3 Private Use characters
102 * C.4 Non-character code points
103 * C.5 Surrogate code points
104 * C.6 Inappropriate for plain text characters
105 * C.7 Inappropriate for canonical representation characters
106 * C.7 Change display properties or deprecated characters
107 * C.8 Tagging characters
109 * These are the tables that are listed as "prohibited output"
110 * characters in the SASLprep profile.
112 * The comment after each code range indicates which source table
113 * the code came from. Note that there is some overlap in the source
114 * tables, so one code might originate from multiple source tables.
115 * Adjacent ranges have also been merged together, to save space.
117 static const pg_wchar prohibited_output_ranges[] =
119 0x0000, 0x001F, /* C.2.1 */
120 0x007F, 0x00A0, /* C.1.2, C.2.1, C.2.2 */
121 0x0340, 0x0341, /* C.8 */
122 0x06DD, 0x06DD, /* C.2.2 */
123 0x070F, 0x070F, /* C.2.2 */
124 0x1680, 0x1680, /* C.1.2 */
125 0x180E, 0x180E, /* C.2.2 */
126 0x2000, 0x200F, /* C.1.2, C.2.2, C.8 */
127 0x2028, 0x202F, /* C.1.2, C.2.2, C.8 */
128 0x205F, 0x2063, /* C.1.2, C.2.2 */
129 0x206A, 0x206F, /* C.2.2, C.8 */
130 0x2FF0, 0x2FFB, /* C.7 */
131 0x3000, 0x3000, /* C.1.2 */
132 0xD800, 0xF8FF, /* C.3, C.5 */
133 0xFDD0, 0xFDEF, /* C.4 */
134 0xFEFF, 0xFEFF, /* C.2.2 */
135 0xFFF9, 0xFFFF, /* C.2.2, C.4, C.6 */
136 0x1D173, 0x1D17A, /* C.2.2 */
137 0x1FFFE, 0x1FFFF, /* C.4 */
138 0x2FFFE, 0x2FFFF, /* C.4 */
139 0x3FFFE, 0x3FFFF, /* C.4 */
140 0x4FFFE, 0x4FFFF, /* C.4 */
141 0x5FFFE, 0x5FFFF, /* C.4 */
142 0x6FFFE, 0x6FFFF, /* C.4 */
143 0x7FFFE, 0x7FFFF, /* C.4 */
144 0x8FFFE, 0x8FFFF, /* C.4 */
145 0x9FFFE, 0x9FFFF, /* C.4 */
146 0xAFFFE, 0xAFFFF, /* C.4 */
147 0xBFFFE, 0xBFFFF, /* C.4 */
148 0xCFFFE, 0xCFFFF, /* C.4 */
149 0xDFFFE, 0xDFFFF, /* C.4 */
150 0xE0001, 0xE0001, /* C.9 */
151 0xE0020, 0xE007F, /* C.9 */
152 0xEFFFE, 0xEFFFF, /* C.4 */
153 0xF0000, 0xFFFFF, /* C.3, C.4 */
154 0x100000, 0x10FFFF /* C.3, C.4 */
157 /* A.1 Unassigned code points in Unicode 3.2 */
158 static const pg_wchar unassigned_codepoint_ranges[] =
160 0x0221, 0x0221,
161 0x0234, 0x024F,
162 0x02AE, 0x02AF,
163 0x02EF, 0x02FF,
164 0x0350, 0x035F,
165 0x0370, 0x0373,
166 0x0376, 0x0379,
167 0x037B, 0x037D,
168 0x037F, 0x0383,
169 0x038B, 0x038B,
170 0x038D, 0x038D,
171 0x03A2, 0x03A2,
172 0x03CF, 0x03CF,
173 0x03F7, 0x03FF,
174 0x0487, 0x0487,
175 0x04CF, 0x04CF,
176 0x04F6, 0x04F7,
177 0x04FA, 0x04FF,
178 0x0510, 0x0530,
179 0x0557, 0x0558,
180 0x0560, 0x0560,
181 0x0588, 0x0588,
182 0x058B, 0x0590,
183 0x05A2, 0x05A2,
184 0x05BA, 0x05BA,
185 0x05C5, 0x05CF,
186 0x05EB, 0x05EF,
187 0x05F5, 0x060B,
188 0x060D, 0x061A,
189 0x061C, 0x061E,
190 0x0620, 0x0620,
191 0x063B, 0x063F,
192 0x0656, 0x065F,
193 0x06EE, 0x06EF,
194 0x06FF, 0x06FF,
195 0x070E, 0x070E,
196 0x072D, 0x072F,
197 0x074B, 0x077F,
198 0x07B2, 0x0900,
199 0x0904, 0x0904,
200 0x093A, 0x093B,
201 0x094E, 0x094F,
202 0x0955, 0x0957,
203 0x0971, 0x0980,
204 0x0984, 0x0984,
205 0x098D, 0x098E,
206 0x0991, 0x0992,
207 0x09A9, 0x09A9,
208 0x09B1, 0x09B1,
209 0x09B3, 0x09B5,
210 0x09BA, 0x09BB,
211 0x09BD, 0x09BD,
212 0x09C5, 0x09C6,
213 0x09C9, 0x09CA,
214 0x09CE, 0x09D6,
215 0x09D8, 0x09DB,
216 0x09DE, 0x09DE,
217 0x09E4, 0x09E5,
218 0x09FB, 0x0A01,
219 0x0A03, 0x0A04,
220 0x0A0B, 0x0A0E,
221 0x0A11, 0x0A12,
222 0x0A29, 0x0A29,
223 0x0A31, 0x0A31,
224 0x0A34, 0x0A34,
225 0x0A37, 0x0A37,
226 0x0A3A, 0x0A3B,
227 0x0A3D, 0x0A3D,
228 0x0A43, 0x0A46,
229 0x0A49, 0x0A4A,
230 0x0A4E, 0x0A58,
231 0x0A5D, 0x0A5D,
232 0x0A5F, 0x0A65,
233 0x0A75, 0x0A80,
234 0x0A84, 0x0A84,
235 0x0A8C, 0x0A8C,
236 0x0A8E, 0x0A8E,
237 0x0A92, 0x0A92,
238 0x0AA9, 0x0AA9,
239 0x0AB1, 0x0AB1,
240 0x0AB4, 0x0AB4,
241 0x0ABA, 0x0ABB,
242 0x0AC6, 0x0AC6,
243 0x0ACA, 0x0ACA,
244 0x0ACE, 0x0ACF,
245 0x0AD1, 0x0ADF,
246 0x0AE1, 0x0AE5,
247 0x0AF0, 0x0B00,
248 0x0B04, 0x0B04,
249 0x0B0D, 0x0B0E,
250 0x0B11, 0x0B12,
251 0x0B29, 0x0B29,
252 0x0B31, 0x0B31,
253 0x0B34, 0x0B35,
254 0x0B3A, 0x0B3B,
255 0x0B44, 0x0B46,
256 0x0B49, 0x0B4A,
257 0x0B4E, 0x0B55,
258 0x0B58, 0x0B5B,
259 0x0B5E, 0x0B5E,
260 0x0B62, 0x0B65,
261 0x0B71, 0x0B81,
262 0x0B84, 0x0B84,
263 0x0B8B, 0x0B8D,
264 0x0B91, 0x0B91,
265 0x0B96, 0x0B98,
266 0x0B9B, 0x0B9B,
267 0x0B9D, 0x0B9D,
268 0x0BA0, 0x0BA2,
269 0x0BA5, 0x0BA7,
270 0x0BAB, 0x0BAD,
271 0x0BB6, 0x0BB6,
272 0x0BBA, 0x0BBD,
273 0x0BC3, 0x0BC5,
274 0x0BC9, 0x0BC9,
275 0x0BCE, 0x0BD6,
276 0x0BD8, 0x0BE6,
277 0x0BF3, 0x0C00,
278 0x0C04, 0x0C04,
279 0x0C0D, 0x0C0D,
280 0x0C11, 0x0C11,
281 0x0C29, 0x0C29,
282 0x0C34, 0x0C34,
283 0x0C3A, 0x0C3D,
284 0x0C45, 0x0C45,
285 0x0C49, 0x0C49,
286 0x0C4E, 0x0C54,
287 0x0C57, 0x0C5F,
288 0x0C62, 0x0C65,
289 0x0C70, 0x0C81,
290 0x0C84, 0x0C84,
291 0x0C8D, 0x0C8D,
292 0x0C91, 0x0C91,
293 0x0CA9, 0x0CA9,
294 0x0CB4, 0x0CB4,
295 0x0CBA, 0x0CBD,
296 0x0CC5, 0x0CC5,
297 0x0CC9, 0x0CC9,
298 0x0CCE, 0x0CD4,
299 0x0CD7, 0x0CDD,
300 0x0CDF, 0x0CDF,
301 0x0CE2, 0x0CE5,
302 0x0CF0, 0x0D01,
303 0x0D04, 0x0D04,
304 0x0D0D, 0x0D0D,
305 0x0D11, 0x0D11,
306 0x0D29, 0x0D29,
307 0x0D3A, 0x0D3D,
308 0x0D44, 0x0D45,
309 0x0D49, 0x0D49,
310 0x0D4E, 0x0D56,
311 0x0D58, 0x0D5F,
312 0x0D62, 0x0D65,
313 0x0D70, 0x0D81,
314 0x0D84, 0x0D84,
315 0x0D97, 0x0D99,
316 0x0DB2, 0x0DB2,
317 0x0DBC, 0x0DBC,
318 0x0DBE, 0x0DBF,
319 0x0DC7, 0x0DC9,
320 0x0DCB, 0x0DCE,
321 0x0DD5, 0x0DD5,
322 0x0DD7, 0x0DD7,
323 0x0DE0, 0x0DF1,
324 0x0DF5, 0x0E00,
325 0x0E3B, 0x0E3E,
326 0x0E5C, 0x0E80,
327 0x0E83, 0x0E83,
328 0x0E85, 0x0E86,
329 0x0E89, 0x0E89,
330 0x0E8B, 0x0E8C,
331 0x0E8E, 0x0E93,
332 0x0E98, 0x0E98,
333 0x0EA0, 0x0EA0,
334 0x0EA4, 0x0EA4,
335 0x0EA6, 0x0EA6,
336 0x0EA8, 0x0EA9,
337 0x0EAC, 0x0EAC,
338 0x0EBA, 0x0EBA,
339 0x0EBE, 0x0EBF,
340 0x0EC5, 0x0EC5,
341 0x0EC7, 0x0EC7,
342 0x0ECE, 0x0ECF,
343 0x0EDA, 0x0EDB,
344 0x0EDE, 0x0EFF,
345 0x0F48, 0x0F48,
346 0x0F6B, 0x0F70,
347 0x0F8C, 0x0F8F,
348 0x0F98, 0x0F98,
349 0x0FBD, 0x0FBD,
350 0x0FCD, 0x0FCE,
351 0x0FD0, 0x0FFF,
352 0x1022, 0x1022,
353 0x1028, 0x1028,
354 0x102B, 0x102B,
355 0x1033, 0x1035,
356 0x103A, 0x103F,
357 0x105A, 0x109F,
358 0x10C6, 0x10CF,
359 0x10F9, 0x10FA,
360 0x10FC, 0x10FF,
361 0x115A, 0x115E,
362 0x11A3, 0x11A7,
363 0x11FA, 0x11FF,
364 0x1207, 0x1207,
365 0x1247, 0x1247,
366 0x1249, 0x1249,
367 0x124E, 0x124F,
368 0x1257, 0x1257,
369 0x1259, 0x1259,
370 0x125E, 0x125F,
371 0x1287, 0x1287,
372 0x1289, 0x1289,
373 0x128E, 0x128F,
374 0x12AF, 0x12AF,
375 0x12B1, 0x12B1,
376 0x12B6, 0x12B7,
377 0x12BF, 0x12BF,
378 0x12C1, 0x12C1,
379 0x12C6, 0x12C7,
380 0x12CF, 0x12CF,
381 0x12D7, 0x12D7,
382 0x12EF, 0x12EF,
383 0x130F, 0x130F,
384 0x1311, 0x1311,
385 0x1316, 0x1317,
386 0x131F, 0x131F,
387 0x1347, 0x1347,
388 0x135B, 0x1360,
389 0x137D, 0x139F,
390 0x13F5, 0x1400,
391 0x1677, 0x167F,
392 0x169D, 0x169F,
393 0x16F1, 0x16FF,
394 0x170D, 0x170D,
395 0x1715, 0x171F,
396 0x1737, 0x173F,
397 0x1754, 0x175F,
398 0x176D, 0x176D,
399 0x1771, 0x1771,
400 0x1774, 0x177F,
401 0x17DD, 0x17DF,
402 0x17EA, 0x17FF,
403 0x180F, 0x180F,
404 0x181A, 0x181F,
405 0x1878, 0x187F,
406 0x18AA, 0x1DFF,
407 0x1E9C, 0x1E9F,
408 0x1EFA, 0x1EFF,
409 0x1F16, 0x1F17,
410 0x1F1E, 0x1F1F,
411 0x1F46, 0x1F47,
412 0x1F4E, 0x1F4F,
413 0x1F58, 0x1F58,
414 0x1F5A, 0x1F5A,
415 0x1F5C, 0x1F5C,
416 0x1F5E, 0x1F5E,
417 0x1F7E, 0x1F7F,
418 0x1FB5, 0x1FB5,
419 0x1FC5, 0x1FC5,
420 0x1FD4, 0x1FD5,
421 0x1FDC, 0x1FDC,
422 0x1FF0, 0x1FF1,
423 0x1FF5, 0x1FF5,
424 0x1FFF, 0x1FFF,
425 0x2053, 0x2056,
426 0x2058, 0x205E,
427 0x2064, 0x2069,
428 0x2072, 0x2073,
429 0x208F, 0x209F,
430 0x20B2, 0x20CF,
431 0x20EB, 0x20FF,
432 0x213B, 0x213C,
433 0x214C, 0x2152,
434 0x2184, 0x218F,
435 0x23CF, 0x23FF,
436 0x2427, 0x243F,
437 0x244B, 0x245F,
438 0x24FF, 0x24FF,
439 0x2614, 0x2615,
440 0x2618, 0x2618,
441 0x267E, 0x267F,
442 0x268A, 0x2700,
443 0x2705, 0x2705,
444 0x270A, 0x270B,
445 0x2728, 0x2728,
446 0x274C, 0x274C,
447 0x274E, 0x274E,
448 0x2753, 0x2755,
449 0x2757, 0x2757,
450 0x275F, 0x2760,
451 0x2795, 0x2797,
452 0x27B0, 0x27B0,
453 0x27BF, 0x27CF,
454 0x27EC, 0x27EF,
455 0x2B00, 0x2E7F,
456 0x2E9A, 0x2E9A,
457 0x2EF4, 0x2EFF,
458 0x2FD6, 0x2FEF,
459 0x2FFC, 0x2FFF,
460 0x3040, 0x3040,
461 0x3097, 0x3098,
462 0x3100, 0x3104,
463 0x312D, 0x3130,
464 0x318F, 0x318F,
465 0x31B8, 0x31EF,
466 0x321D, 0x321F,
467 0x3244, 0x3250,
468 0x327C, 0x327E,
469 0x32CC, 0x32CF,
470 0x32FF, 0x32FF,
471 0x3377, 0x337A,
472 0x33DE, 0x33DF,
473 0x33FF, 0x33FF,
474 0x4DB6, 0x4DFF,
475 0x9FA6, 0x9FFF,
476 0xA48D, 0xA48F,
477 0xA4C7, 0xABFF,
478 0xD7A4, 0xD7FF,
479 0xFA2E, 0xFA2F,
480 0xFA6B, 0xFAFF,
481 0xFB07, 0xFB12,
482 0xFB18, 0xFB1C,
483 0xFB37, 0xFB37,
484 0xFB3D, 0xFB3D,
485 0xFB3F, 0xFB3F,
486 0xFB42, 0xFB42,
487 0xFB45, 0xFB45,
488 0xFBB2, 0xFBD2,
489 0xFD40, 0xFD4F,
490 0xFD90, 0xFD91,
491 0xFDC8, 0xFDCF,
492 0xFDFD, 0xFDFF,
493 0xFE10, 0xFE1F,
494 0xFE24, 0xFE2F,
495 0xFE47, 0xFE48,
496 0xFE53, 0xFE53,
497 0xFE67, 0xFE67,
498 0xFE6C, 0xFE6F,
499 0xFE75, 0xFE75,
500 0xFEFD, 0xFEFE,
501 0xFF00, 0xFF00,
502 0xFFBF, 0xFFC1,
503 0xFFC8, 0xFFC9,
504 0xFFD0, 0xFFD1,
505 0xFFD8, 0xFFD9,
506 0xFFDD, 0xFFDF,
507 0xFFE7, 0xFFE7,
508 0xFFEF, 0xFFF8,
509 0x10000, 0x102FF,
510 0x1031F, 0x1031F,
511 0x10324, 0x1032F,
512 0x1034B, 0x103FF,
513 0x10426, 0x10427,
514 0x1044E, 0x1CFFF,
515 0x1D0F6, 0x1D0FF,
516 0x1D127, 0x1D129,
517 0x1D1DE, 0x1D3FF,
518 0x1D455, 0x1D455,
519 0x1D49D, 0x1D49D,
520 0x1D4A0, 0x1D4A1,
521 0x1D4A3, 0x1D4A4,
522 0x1D4A7, 0x1D4A8,
523 0x1D4AD, 0x1D4AD,
524 0x1D4BA, 0x1D4BA,
525 0x1D4BC, 0x1D4BC,
526 0x1D4C1, 0x1D4C1,
527 0x1D4C4, 0x1D4C4,
528 0x1D506, 0x1D506,
529 0x1D50B, 0x1D50C,
530 0x1D515, 0x1D515,
531 0x1D51D, 0x1D51D,
532 0x1D53A, 0x1D53A,
533 0x1D53F, 0x1D53F,
534 0x1D545, 0x1D545,
535 0x1D547, 0x1D549,
536 0x1D551, 0x1D551,
537 0x1D6A4, 0x1D6A7,
538 0x1D7CA, 0x1D7CD,
539 0x1D800, 0x1FFFD,
540 0x2A6D7, 0x2F7FF,
541 0x2FA1E, 0x2FFFD,
542 0x30000, 0x3FFFD,
543 0x40000, 0x4FFFD,
544 0x50000, 0x5FFFD,
545 0x60000, 0x6FFFD,
546 0x70000, 0x7FFFD,
547 0x80000, 0x8FFFD,
548 0x90000, 0x9FFFD,
549 0xA0000, 0xAFFFD,
550 0xB0000, 0xBFFFD,
551 0xC0000, 0xCFFFD,
552 0xD0000, 0xDFFFD,
553 0xE0000, 0xE0000,
554 0xE0002, 0xE001F,
555 0xE0080, 0xEFFFD
558 /* D.1 Characters with bidirectional property "R" or "AL" */
559 static const pg_wchar RandALCat_codepoint_ranges[] =
561 0x05BE, 0x05BE,
562 0x05C0, 0x05C0,
563 0x05C3, 0x05C3,
564 0x05D0, 0x05EA,
565 0x05F0, 0x05F4,
566 0x061B, 0x061B,
567 0x061F, 0x061F,
568 0x0621, 0x063A,
569 0x0640, 0x064A,
570 0x066D, 0x066F,
571 0x0671, 0x06D5,
572 0x06DD, 0x06DD,
573 0x06E5, 0x06E6,
574 0x06FA, 0x06FE,
575 0x0700, 0x070D,
576 0x0710, 0x0710,
577 0x0712, 0x072C,
578 0x0780, 0x07A5,
579 0x07B1, 0x07B1,
580 0x200F, 0x200F,
581 0xFB1D, 0xFB1D,
582 0xFB1F, 0xFB28,
583 0xFB2A, 0xFB36,
584 0xFB38, 0xFB3C,
585 0xFB3E, 0xFB3E,
586 0xFB40, 0xFB41,
587 0xFB43, 0xFB44,
588 0xFB46, 0xFBB1,
589 0xFBD3, 0xFD3D,
590 0xFD50, 0xFD8F,
591 0xFD92, 0xFDC7,
592 0xFDF0, 0xFDFC,
593 0xFE70, 0xFE74,
594 0xFE76, 0xFEFC
597 /* D.2 Characters with bidirectional property "L" */
598 static const pg_wchar LCat_codepoint_ranges[] =
600 0x0041, 0x005A,
601 0x0061, 0x007A,
602 0x00AA, 0x00AA,
603 0x00B5, 0x00B5,
604 0x00BA, 0x00BA,
605 0x00C0, 0x00D6,
606 0x00D8, 0x00F6,
607 0x00F8, 0x0220,
608 0x0222, 0x0233,
609 0x0250, 0x02AD,
610 0x02B0, 0x02B8,
611 0x02BB, 0x02C1,
612 0x02D0, 0x02D1,
613 0x02E0, 0x02E4,
614 0x02EE, 0x02EE,
615 0x037A, 0x037A,
616 0x0386, 0x0386,
617 0x0388, 0x038A,
618 0x038C, 0x038C,
619 0x038E, 0x03A1,
620 0x03A3, 0x03CE,
621 0x03D0, 0x03F5,
622 0x0400, 0x0482,
623 0x048A, 0x04CE,
624 0x04D0, 0x04F5,
625 0x04F8, 0x04F9,
626 0x0500, 0x050F,
627 0x0531, 0x0556,
628 0x0559, 0x055F,
629 0x0561, 0x0587,
630 0x0589, 0x0589,
631 0x0903, 0x0903,
632 0x0905, 0x0939,
633 0x093D, 0x0940,
634 0x0949, 0x094C,
635 0x0950, 0x0950,
636 0x0958, 0x0961,
637 0x0964, 0x0970,
638 0x0982, 0x0983,
639 0x0985, 0x098C,
640 0x098F, 0x0990,
641 0x0993, 0x09A8,
642 0x09AA, 0x09B0,
643 0x09B2, 0x09B2,
644 0x09B6, 0x09B9,
645 0x09BE, 0x09C0,
646 0x09C7, 0x09C8,
647 0x09CB, 0x09CC,
648 0x09D7, 0x09D7,
649 0x09DC, 0x09DD,
650 0x09DF, 0x09E1,
651 0x09E6, 0x09F1,
652 0x09F4, 0x09FA,
653 0x0A05, 0x0A0A,
654 0x0A0F, 0x0A10,
655 0x0A13, 0x0A28,
656 0x0A2A, 0x0A30,
657 0x0A32, 0x0A33,
658 0x0A35, 0x0A36,
659 0x0A38, 0x0A39,
660 0x0A3E, 0x0A40,
661 0x0A59, 0x0A5C,
662 0x0A5E, 0x0A5E,
663 0x0A66, 0x0A6F,
664 0x0A72, 0x0A74,
665 0x0A83, 0x0A83,
666 0x0A85, 0x0A8B,
667 0x0A8D, 0x0A8D,
668 0x0A8F, 0x0A91,
669 0x0A93, 0x0AA8,
670 0x0AAA, 0x0AB0,
671 0x0AB2, 0x0AB3,
672 0x0AB5, 0x0AB9,
673 0x0ABD, 0x0AC0,
674 0x0AC9, 0x0AC9,
675 0x0ACB, 0x0ACC,
676 0x0AD0, 0x0AD0,
677 0x0AE0, 0x0AE0,
678 0x0AE6, 0x0AEF,
679 0x0B02, 0x0B03,
680 0x0B05, 0x0B0C,
681 0x0B0F, 0x0B10,
682 0x0B13, 0x0B28,
683 0x0B2A, 0x0B30,
684 0x0B32, 0x0B33,
685 0x0B36, 0x0B39,
686 0x0B3D, 0x0B3E,
687 0x0B40, 0x0B40,
688 0x0B47, 0x0B48,
689 0x0B4B, 0x0B4C,
690 0x0B57, 0x0B57,
691 0x0B5C, 0x0B5D,
692 0x0B5F, 0x0B61,
693 0x0B66, 0x0B70,
694 0x0B83, 0x0B83,
695 0x0B85, 0x0B8A,
696 0x0B8E, 0x0B90,
697 0x0B92, 0x0B95,
698 0x0B99, 0x0B9A,
699 0x0B9C, 0x0B9C,
700 0x0B9E, 0x0B9F,
701 0x0BA3, 0x0BA4,
702 0x0BA8, 0x0BAA,
703 0x0BAE, 0x0BB5,
704 0x0BB7, 0x0BB9,
705 0x0BBE, 0x0BBF,
706 0x0BC1, 0x0BC2,
707 0x0BC6, 0x0BC8,
708 0x0BCA, 0x0BCC,
709 0x0BD7, 0x0BD7,
710 0x0BE7, 0x0BF2,
711 0x0C01, 0x0C03,
712 0x0C05, 0x0C0C,
713 0x0C0E, 0x0C10,
714 0x0C12, 0x0C28,
715 0x0C2A, 0x0C33,
716 0x0C35, 0x0C39,
717 0x0C41, 0x0C44,
718 0x0C60, 0x0C61,
719 0x0C66, 0x0C6F,
720 0x0C82, 0x0C83,
721 0x0C85, 0x0C8C,
722 0x0C8E, 0x0C90,
723 0x0C92, 0x0CA8,
724 0x0CAA, 0x0CB3,
725 0x0CB5, 0x0CB9,
726 0x0CBE, 0x0CBE,
727 0x0CC0, 0x0CC4,
728 0x0CC7, 0x0CC8,
729 0x0CCA, 0x0CCB,
730 0x0CD5, 0x0CD6,
731 0x0CDE, 0x0CDE,
732 0x0CE0, 0x0CE1,
733 0x0CE6, 0x0CEF,
734 0x0D02, 0x0D03,
735 0x0D05, 0x0D0C,
736 0x0D0E, 0x0D10,
737 0x0D12, 0x0D28,
738 0x0D2A, 0x0D39,
739 0x0D3E, 0x0D40,
740 0x0D46, 0x0D48,
741 0x0D4A, 0x0D4C,
742 0x0D57, 0x0D57,
743 0x0D60, 0x0D61,
744 0x0D66, 0x0D6F,
745 0x0D82, 0x0D83,
746 0x0D85, 0x0D96,
747 0x0D9A, 0x0DB1,
748 0x0DB3, 0x0DBB,
749 0x0DBD, 0x0DBD,
750 0x0DC0, 0x0DC6,
751 0x0DCF, 0x0DD1,
752 0x0DD8, 0x0DDF,
753 0x0DF2, 0x0DF4,
754 0x0E01, 0x0E30,
755 0x0E32, 0x0E33,
756 0x0E40, 0x0E46,
757 0x0E4F, 0x0E5B,
758 0x0E81, 0x0E82,
759 0x0E84, 0x0E84,
760 0x0E87, 0x0E88,
761 0x0E8A, 0x0E8A,
762 0x0E8D, 0x0E8D,
763 0x0E94, 0x0E97,
764 0x0E99, 0x0E9F,
765 0x0EA1, 0x0EA3,
766 0x0EA5, 0x0EA5,
767 0x0EA7, 0x0EA7,
768 0x0EAA, 0x0EAB,
769 0x0EAD, 0x0EB0,
770 0x0EB2, 0x0EB3,
771 0x0EBD, 0x0EBD,
772 0x0EC0, 0x0EC4,
773 0x0EC6, 0x0EC6,
774 0x0ED0, 0x0ED9,
775 0x0EDC, 0x0EDD,
776 0x0F00, 0x0F17,
777 0x0F1A, 0x0F34,
778 0x0F36, 0x0F36,
779 0x0F38, 0x0F38,
780 0x0F3E, 0x0F47,
781 0x0F49, 0x0F6A,
782 0x0F7F, 0x0F7F,
783 0x0F85, 0x0F85,
784 0x0F88, 0x0F8B,
785 0x0FBE, 0x0FC5,
786 0x0FC7, 0x0FCC,
787 0x0FCF, 0x0FCF,
788 0x1000, 0x1021,
789 0x1023, 0x1027,
790 0x1029, 0x102A,
791 0x102C, 0x102C,
792 0x1031, 0x1031,
793 0x1038, 0x1038,
794 0x1040, 0x1057,
795 0x10A0, 0x10C5,
796 0x10D0, 0x10F8,
797 0x10FB, 0x10FB,
798 0x1100, 0x1159,
799 0x115F, 0x11A2,
800 0x11A8, 0x11F9,
801 0x1200, 0x1206,
802 0x1208, 0x1246,
803 0x1248, 0x1248,
804 0x124A, 0x124D,
805 0x1250, 0x1256,
806 0x1258, 0x1258,
807 0x125A, 0x125D,
808 0x1260, 0x1286,
809 0x1288, 0x1288,
810 0x128A, 0x128D,
811 0x1290, 0x12AE,
812 0x12B0, 0x12B0,
813 0x12B2, 0x12B5,
814 0x12B8, 0x12BE,
815 0x12C0, 0x12C0,
816 0x12C2, 0x12C5,
817 0x12C8, 0x12CE,
818 0x12D0, 0x12D6,
819 0x12D8, 0x12EE,
820 0x12F0, 0x130E,
821 0x1310, 0x1310,
822 0x1312, 0x1315,
823 0x1318, 0x131E,
824 0x1320, 0x1346,
825 0x1348, 0x135A,
826 0x1361, 0x137C,
827 0x13A0, 0x13F4,
828 0x1401, 0x1676,
829 0x1681, 0x169A,
830 0x16A0, 0x16F0,
831 0x1700, 0x170C,
832 0x170E, 0x1711,
833 0x1720, 0x1731,
834 0x1735, 0x1736,
835 0x1740, 0x1751,
836 0x1760, 0x176C,
837 0x176E, 0x1770,
838 0x1780, 0x17B6,
839 0x17BE, 0x17C5,
840 0x17C7, 0x17C8,
841 0x17D4, 0x17DA,
842 0x17DC, 0x17DC,
843 0x17E0, 0x17E9,
844 0x1810, 0x1819,
845 0x1820, 0x1877,
846 0x1880, 0x18A8,
847 0x1E00, 0x1E9B,
848 0x1EA0, 0x1EF9,
849 0x1F00, 0x1F15,
850 0x1F18, 0x1F1D,
851 0x1F20, 0x1F45,
852 0x1F48, 0x1F4D,
853 0x1F50, 0x1F57,
854 0x1F59, 0x1F59,
855 0x1F5B, 0x1F5B,
856 0x1F5D, 0x1F5D,
857 0x1F5F, 0x1F7D,
858 0x1F80, 0x1FB4,
859 0x1FB6, 0x1FBC,
860 0x1FBE, 0x1FBE,
861 0x1FC2, 0x1FC4,
862 0x1FC6, 0x1FCC,
863 0x1FD0, 0x1FD3,
864 0x1FD6, 0x1FDB,
865 0x1FE0, 0x1FEC,
866 0x1FF2, 0x1FF4,
867 0x1FF6, 0x1FFC,
868 0x200E, 0x200E,
869 0x2071, 0x2071,
870 0x207F, 0x207F,
871 0x2102, 0x2102,
872 0x2107, 0x2107,
873 0x210A, 0x2113,
874 0x2115, 0x2115,
875 0x2119, 0x211D,
876 0x2124, 0x2124,
877 0x2126, 0x2126,
878 0x2128, 0x2128,
879 0x212A, 0x212D,
880 0x212F, 0x2131,
881 0x2133, 0x2139,
882 0x213D, 0x213F,
883 0x2145, 0x2149,
884 0x2160, 0x2183,
885 0x2336, 0x237A,
886 0x2395, 0x2395,
887 0x249C, 0x24E9,
888 0x3005, 0x3007,
889 0x3021, 0x3029,
890 0x3031, 0x3035,
891 0x3038, 0x303C,
892 0x3041, 0x3096,
893 0x309D, 0x309F,
894 0x30A1, 0x30FA,
895 0x30FC, 0x30FF,
896 0x3105, 0x312C,
897 0x3131, 0x318E,
898 0x3190, 0x31B7,
899 0x31F0, 0x321C,
900 0x3220, 0x3243,
901 0x3260, 0x327B,
902 0x327F, 0x32B0,
903 0x32C0, 0x32CB,
904 0x32D0, 0x32FE,
905 0x3300, 0x3376,
906 0x337B, 0x33DD,
907 0x33E0, 0x33FE,
908 0x3400, 0x4DB5,
909 0x4E00, 0x9FA5,
910 0xA000, 0xA48C,
911 0xAC00, 0xD7A3,
912 0xD800, 0xFA2D,
913 0xFA30, 0xFA6A,
914 0xFB00, 0xFB06,
915 0xFB13, 0xFB17,
916 0xFF21, 0xFF3A,
917 0xFF41, 0xFF5A,
918 0xFF66, 0xFFBE,
919 0xFFC2, 0xFFC7,
920 0xFFCA, 0xFFCF,
921 0xFFD2, 0xFFD7,
922 0xFFDA, 0xFFDC,
923 0x10300, 0x1031E,
924 0x10320, 0x10323,
925 0x10330, 0x1034A,
926 0x10400, 0x10425,
927 0x10428, 0x1044D,
928 0x1D000, 0x1D0F5,
929 0x1D100, 0x1D126,
930 0x1D12A, 0x1D166,
931 0x1D16A, 0x1D172,
932 0x1D183, 0x1D184,
933 0x1D18C, 0x1D1A9,
934 0x1D1AE, 0x1D1DD,
935 0x1D400, 0x1D454,
936 0x1D456, 0x1D49C,
937 0x1D49E, 0x1D49F,
938 0x1D4A2, 0x1D4A2,
939 0x1D4A5, 0x1D4A6,
940 0x1D4A9, 0x1D4AC,
941 0x1D4AE, 0x1D4B9,
942 0x1D4BB, 0x1D4BB,
943 0x1D4BD, 0x1D4C0,
944 0x1D4C2, 0x1D4C3,
945 0x1D4C5, 0x1D505,
946 0x1D507, 0x1D50A,
947 0x1D50D, 0x1D514,
948 0x1D516, 0x1D51C,
949 0x1D51E, 0x1D539,
950 0x1D53B, 0x1D53E,
951 0x1D540, 0x1D544,
952 0x1D546, 0x1D546,
953 0x1D54A, 0x1D550,
954 0x1D552, 0x1D6A3,
955 0x1D6A8, 0x1D7C9,
956 0x20000, 0x2A6D6,
957 0x2F800, 0x2FA1D,
958 0xF0000, 0xFFFFD,
959 0x100000, 0x10FFFD
962 /* End of stringprep tables */
965 /* Is the given Unicode codepoint in the given table of ranges? */
966 #define IS_CODE_IN_TABLE(code, map) is_code_in_table(code, map, lengthof(map))
968 static int
969 codepoint_range_cmp(const void *a, const void *b)
971 const pg_wchar *key = (const pg_wchar *) a;
972 const pg_wchar *range = (const pg_wchar *) b;
974 if (*key < range[0])
975 return -1; /* less than lower bound */
976 if (*key > range[1])
977 return 1; /* greater than upper bound */
979 return 0; /* within range */
982 static bool
983 is_code_in_table(pg_wchar code, const pg_wchar *map, int mapsize)
985 Assert(mapsize % 2 == 0);
987 if (code < map[0] || code > map[mapsize - 1])
988 return false;
990 if (bsearch(&code, map, mapsize / 2, sizeof(pg_wchar) * 2,
991 codepoint_range_cmp))
992 return true;
993 else
994 return false;
998 * Calculate the length in characters of a null-terminated UTF-8 string.
1000 * Returns -1 if the input is not valid UTF-8.
1002 static int
1003 pg_utf8_string_len(const char *source)
1005 const unsigned char *p = (const unsigned char *) source;
1006 int l;
1007 int num_chars = 0;
1008 size_t len = strlen(source);
1010 while (len)
1012 l = pg_utf_mblen(p);
1014 if (len < l || !pg_utf8_islegal(p, l))
1015 return -1;
1017 p += l;
1018 len -= l;
1019 num_chars++;
1022 return num_chars;
1027 * pg_saslprep - Normalize a password with SASLprep.
1029 * SASLprep requires the input to be in UTF-8 encoding, but PostgreSQL
1030 * supports many encodings, so we don't blindly assume that. pg_saslprep
1031 * will check if the input looks like valid UTF-8, and returns
1032 * SASLPREP_INVALID_UTF8 if not.
1034 * If the string contains prohibited characters (or more precisely, if the
1035 * output string would contain prohibited characters after normalization),
1036 * returns SASLPREP_PROHIBITED.
1038 * On success, returns SASLPREP_SUCCESS, and the normalized string in
1039 * *output.
1041 * In frontend, the normalized string is malloc'd, and the caller is
1042 * responsible for freeing it. If an allocation fails, returns
1043 * SASLPREP_OOM. In backend, the normalized string is palloc'd instead,
1044 * and a failed allocation leads to ereport(ERROR).
1046 pg_saslprep_rc
1047 pg_saslprep(const char *input, char **output)
1049 pg_wchar *input_chars = NULL;
1050 pg_wchar *output_chars = NULL;
1051 int input_size;
1052 char *result;
1053 int result_size;
1054 int count;
1055 int i;
1056 bool contains_RandALCat;
1057 unsigned char *p;
1058 pg_wchar *wp;
1060 /* Ensure we return *output as NULL on failure */
1061 *output = NULL;
1064 * Quick check if the input is pure ASCII. An ASCII string requires no
1065 * further processing.
1067 if (pg_is_ascii(input))
1069 *output = STRDUP(input);
1070 if (!(*output))
1071 goto oom;
1072 return SASLPREP_SUCCESS;
1076 * Convert the input from UTF-8 to an array of Unicode codepoints.
1078 * This also checks that the input is a legal UTF-8 string.
1080 input_size = pg_utf8_string_len(input);
1081 if (input_size < 0)
1082 return SASLPREP_INVALID_UTF8;
1083 if (input_size >= MaxAllocSize / sizeof(pg_wchar))
1084 goto oom;
1086 input_chars = ALLOC((input_size + 1) * sizeof(pg_wchar));
1087 if (!input_chars)
1088 goto oom;
1090 p = (unsigned char *) input;
1091 for (i = 0; i < input_size; i++)
1093 input_chars[i] = utf8_to_unicode(p);
1094 p += pg_utf_mblen(p);
1096 input_chars[i] = (pg_wchar) '\0';
1099 * The steps below correspond to the steps listed in [RFC3454], Section
1100 * "2. Preparation Overview"
1104 * 1) Map -- For each character in the input, check if it has a mapping
1105 * and, if so, replace it with its mapping.
1107 count = 0;
1108 for (i = 0; i < input_size; i++)
1110 pg_wchar code = input_chars[i];
1112 if (IS_CODE_IN_TABLE(code, non_ascii_space_ranges))
1113 input_chars[count++] = 0x0020;
1114 else if (IS_CODE_IN_TABLE(code, commonly_mapped_to_nothing_ranges))
1116 /* map to nothing */
1118 else
1119 input_chars[count++] = code;
1121 input_chars[count] = (pg_wchar) '\0';
1122 input_size = count;
1124 if (input_size == 0)
1125 goto prohibited; /* don't allow empty password */
1128 * 2) Normalize -- Normalize the result of step 1 using Unicode
1129 * normalization.
1131 output_chars = unicode_normalize(UNICODE_NFKC, input_chars);
1132 if (!output_chars)
1133 goto oom;
1136 * 3) Prohibit -- Check for any characters that are not allowed in the
1137 * output. If any are found, return an error.
1139 for (i = 0; i < input_size; i++)
1141 pg_wchar code = input_chars[i];
1143 if (IS_CODE_IN_TABLE(code, prohibited_output_ranges))
1144 goto prohibited;
1145 if (IS_CODE_IN_TABLE(code, unassigned_codepoint_ranges))
1146 goto prohibited;
1150 * 4) Check bidi -- Possibly check for right-to-left characters, and if
1151 * any are found, make sure that the whole string satisfies the
1152 * requirements for bidirectional strings. If the string does not satisfy
1153 * the requirements for bidirectional strings, return an error.
1155 * [RFC3454], Section "6. Bidirectional Characters" explains in more
1156 * detail what that means:
1158 * "In any profile that specifies bidirectional character handling, all
1159 * three of the following requirements MUST be met:
1161 * 1) The characters in section 5.8 MUST be prohibited.
1163 * 2) If a string contains any RandALCat character, the string MUST NOT
1164 * contain any LCat character.
1166 * 3) If a string contains any RandALCat character, a RandALCat character
1167 * MUST be the first character of the string, and a RandALCat character
1168 * MUST be the last character of the string."
1170 contains_RandALCat = false;
1171 for (i = 0; i < input_size; i++)
1173 pg_wchar code = input_chars[i];
1175 if (IS_CODE_IN_TABLE(code, RandALCat_codepoint_ranges))
1177 contains_RandALCat = true;
1178 break;
1182 if (contains_RandALCat)
1184 pg_wchar first = input_chars[0];
1185 pg_wchar last = input_chars[input_size - 1];
1187 for (i = 0; i < input_size; i++)
1189 pg_wchar code = input_chars[i];
1191 if (IS_CODE_IN_TABLE(code, LCat_codepoint_ranges))
1192 goto prohibited;
1195 if (!IS_CODE_IN_TABLE(first, RandALCat_codepoint_ranges) ||
1196 !IS_CODE_IN_TABLE(last, RandALCat_codepoint_ranges))
1197 goto prohibited;
1201 * Finally, convert the result back to UTF-8.
1203 result_size = 0;
1204 for (wp = output_chars; *wp; wp++)
1206 unsigned char buf[4];
1208 unicode_to_utf8(*wp, buf);
1209 result_size += pg_utf_mblen(buf);
1212 result = ALLOC(result_size + 1);
1213 if (!result)
1214 goto oom;
1217 * There are no error exits below here, so the error exit paths don't need
1218 * to worry about possibly freeing "result".
1220 p = (unsigned char *) result;
1221 for (wp = output_chars; *wp; wp++)
1223 unicode_to_utf8(*wp, p);
1224 p += pg_utf_mblen(p);
1226 Assert((char *) p == result + result_size);
1227 *p = '\0';
1229 FREE(input_chars);
1230 FREE(output_chars);
1232 *output = result;
1233 return SASLPREP_SUCCESS;
1235 prohibited:
1236 if (input_chars)
1237 FREE(input_chars);
1238 if (output_chars)
1239 FREE(output_chars);
1241 return SASLPREP_PROHIBITED;
1243 oom:
1244 if (input_chars)
1245 FREE(input_chars);
1246 if (output_chars)
1247 FREE(output_chars);
1249 return SASLPREP_OOM;