util.encodings: Spell out all IDNA 2008 options ICU has
[prosody.git] / util-src / encodings.c
blob6f2676f2240def069644b7508e84069196465334
1 /* Prosody IM
2 -- Copyright (C) 2008-2010 Matthew Wild
3 -- Copyright (C) 2008-2010 Waqas Hussain
4 -- Copyright (C) 1994-2015 Lua.org, PUC-Rio.
5 --
6 -- This project is MIT/X11 licensed. Please see the
7 -- COPYING file in the source package for more information.
8 --
9 */
12 * encodings.c
13 * Lua library for base64, stringprep and idna encodings
16 /* Newer MSVC compilers deprecate strcpy as unsafe, but we use it in a safe way */
17 #define _CRT_SECURE_NO_DEPRECATE
19 #include <string.h>
20 #include <stdlib.h>
21 #include "lua.h"
22 #include "lauxlib.h"
24 #if (LUA_VERSION_NUM == 501)
25 #define luaL_setfuncs(L, R, N) luaL_register(L, NULL, R)
26 #endif
28 /***************** BASE64 *****************/
30 static const char code[] =
31 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
33 static void base64_encode(luaL_Buffer *b, unsigned int c1, unsigned int c2, unsigned int c3, int n) {
34 unsigned long tuple = c3 + 256UL * (c2 + 256UL * c1);
35 int i;
36 char s[4];
38 for(i = 0; i < 4; i++) {
39 s[3 - i] = code[tuple % 64];
40 tuple /= 64;
43 for(i = n + 1; i < 4; i++) {
44 s[i] = '=';
47 luaL_addlstring(b, s, 4);
50 static int Lbase64_encode(lua_State *L) { /** encode(s) */
51 size_t l;
52 const unsigned char *s = (const unsigned char *)luaL_checklstring(L, 1, &l);
53 luaL_Buffer b;
54 int n;
55 luaL_buffinit(L, &b);
57 for(n = l / 3; n--; s += 3) {
58 base64_encode(&b, s[0], s[1], s[2], 3);
61 switch(l % 3) {
62 case 1:
63 base64_encode(&b, s[0], 0, 0, 1);
64 break;
66 case 2:
67 base64_encode(&b, s[0], s[1], 0, 2);
68 break;
71 luaL_pushresult(&b);
72 return 1;
75 static void base64_decode(luaL_Buffer *b, int c1, int c2, int c3, int c4, int n) {
76 unsigned long tuple = c4 + 64L * (c3 + 64L * (c2 + 64L * c1));
77 char s[3];
79 switch(--n) {
80 case 3:
81 s[2] = (char) tuple;
82 /* Falls through. */
84 case 2:
85 s[1] = (char)(tuple >> 8);
86 /* Falls through. */
88 case 1:
89 s[0] = (char)(tuple >> 16);
92 luaL_addlstring(b, s, n);
95 static int Lbase64_decode(lua_State *L) { /** decode(s) */
96 size_t l;
97 const char *s = luaL_checklstring(L, 1, &l);
98 luaL_Buffer b;
99 int n = 0;
100 char t[4];
101 luaL_buffinit(L, &b);
103 for(;;) {
104 int c = *s++;
106 switch(c) {
107 const char *p;
109 default:
110 p = strchr(code, c);
112 if(p == NULL) {
113 return 0;
116 t[n++] = (char)(p - code);
118 if(n == 4) {
119 base64_decode(&b, t[0], t[1], t[2], t[3], 4);
120 n = 0;
123 break;
125 case '=':
127 switch(n) {
128 case 1:
129 base64_decode(&b, t[0], 0, 0, 0, 1);
130 break;
132 case 2:
133 base64_decode(&b, t[0], t[1], 0, 0, 2);
134 break;
136 case 3:
137 base64_decode(&b, t[0], t[1], t[2], 0, 3);
138 break;
141 n = 0;
142 break;
144 case 0:
145 luaL_pushresult(&b);
146 return 1;
148 case '\n':
149 case '\r':
150 case '\t':
151 case ' ':
152 case '\f':
153 case '\b':
154 break;
159 static const luaL_Reg Reg_base64[] = {
160 { "encode", Lbase64_encode },
161 { "decode", Lbase64_decode },
162 { NULL, NULL }
165 /******************* UTF-8 ********************/
168 * Adapted from Lua 5.3
169 * Needed because libidn does not validate that input is valid UTF-8
172 #define MAXUNICODE 0x10FFFF
175 * Decode one UTF-8 sequence, returning NULL if byte sequence is invalid.
177 static const char *utf8_decode(const char *o, int *val) {
178 static const unsigned int limits[] = {0xFF, 0x7F, 0x7FF, 0xFFFF};
179 const unsigned char *s = (const unsigned char *)o;
180 unsigned int c = s[0];
181 unsigned int res = 0; /* final result */
183 if(c < 0x80) { /* ascii? */
184 res = c;
185 } else {
186 int count = 0; /* to count number of continuation bytes */
188 while(c & 0x40) { /* still have continuation bytes? */
189 int cc = s[++count]; /* read next byte */
191 if((cc & 0xC0) != 0x80) { /* not a continuation byte? */
192 return NULL; /* invalid byte sequence */
195 res = (res << 6) | (cc & 0x3F); /* add lower 6 bits from cont. byte */
196 c <<= 1; /* to test next bit */
199 res |= ((c & 0x7F) << (count * 5)); /* add first byte */
201 if(count > 3 || res > MAXUNICODE || res <= limits[count] || (0xd800 <= res && res <= 0xdfff)) {
202 return NULL; /* invalid byte sequence */
205 s += count; /* skip continuation bytes read */
208 if(val) {
209 *val = res;
212 return (const char *)s + 1; /* +1 to include first byte */
216 * Check that a string is valid UTF-8
217 * Returns NULL if not
219 const char *check_utf8(lua_State *L, int idx, size_t *l) {
220 size_t pos, len;
221 const char *s = luaL_checklstring(L, idx, &len);
222 pos = 0;
224 while(pos <= len) {
225 const char *s1 = utf8_decode(s + pos, NULL);
227 if(s1 == NULL) { /* conversion error? */
228 return NULL;
231 pos = s1 - s;
234 if(l != NULL) {
235 *l = len;
238 return s;
241 static int Lutf8_valid(lua_State *L) {
242 lua_pushboolean(L, check_utf8(L, 1, NULL) != NULL);
243 return 1;
246 static int Lutf8_length(lua_State *L) {
247 size_t len;
249 if(!check_utf8(L, 1, &len)) {
250 lua_pushnil(L);
251 lua_pushliteral(L, "invalid utf8");
252 return 2;
255 lua_pushinteger(L, len);
256 return 1;
259 static const luaL_Reg Reg_utf8[] = {
260 { "valid", Lutf8_valid },
261 { "length", Lutf8_length },
262 { NULL, NULL }
265 /***************** STRINGPREP *****************/
266 #ifdef USE_STRINGPREP_ICU
268 #include <unicode/usprep.h>
269 #include <unicode/ustring.h>
270 #include <unicode/utrace.h>
271 #include <unicode/uspoof.h>
272 #include <unicode/uidna.h>
274 static int icu_stringprep_prep(lua_State *L, const UStringPrepProfile *profile) {
275 size_t input_len;
276 int32_t unprepped_len, prepped_len, output_len;
277 const char *input;
278 char output[1024];
280 UChar unprepped[1024]; /* Temporary unicode buffer (1024 characters) */
281 UChar prepped[1024];
283 UErrorCode err = U_ZERO_ERROR;
285 if(!lua_isstring(L, 1)) {
286 lua_pushnil(L);
287 return 1;
290 input = lua_tolstring(L, 1, &input_len);
292 if(input_len >= 1024) {
293 lua_pushnil(L);
294 return 1;
297 u_strFromUTF8(unprepped, 1024, &unprepped_len, input, input_len, &err);
299 if(U_FAILURE(err)) {
300 lua_pushnil(L);
301 return 1;
304 prepped_len = usprep_prepare(profile, unprepped, unprepped_len, prepped, 1024, USPREP_ALLOW_UNASSIGNED, NULL, &err);
306 if(U_FAILURE(err)) {
307 lua_pushnil(L);
308 return 1;
309 } else {
310 u_strToUTF8(output, 1024, &output_len, prepped, prepped_len, &err);
312 if(U_SUCCESS(err) && output_len < 1024) {
313 lua_pushlstring(L, output, output_len);
314 } else {
315 lua_pushnil(L);
318 return 1;
322 UStringPrepProfile *icu_nameprep;
323 UStringPrepProfile *icu_nodeprep;
324 UStringPrepProfile *icu_resourceprep;
325 UStringPrepProfile *icu_saslprep;
326 USpoofChecker *icu_spoofcheck;
327 UIDNA *icu_idna2008;
329 #if (U_ICU_VERSION_MAJOR_NUM < 58)
330 /* COMPAT */
331 #define USPOOF_CONFUSABLE (USPOOF_SINGLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_WHOLE_SCRIPT_CONFUSABLE)
332 #endif
334 /* initialize global ICU stringprep profiles */
335 void init_icu(void) {
336 UErrorCode err = U_ZERO_ERROR;
337 utrace_setLevel(UTRACE_VERBOSE);
338 icu_nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, &err);
339 icu_nodeprep = usprep_openByType(USPREP_RFC3920_NODEPREP, &err);
340 icu_resourceprep = usprep_openByType(USPREP_RFC3920_RESOURCEPREP, &err);
341 icu_saslprep = usprep_openByType(USPREP_RFC4013_SASLPREP, &err);
342 icu_spoofcheck = uspoof_open(&err);
343 uspoof_setChecks(icu_spoofcheck, USPOOF_CONFUSABLE, &err);
344 int options = UIDNA_DEFAULT;
345 #if 0
346 /* COMPAT with future Unicode versions */
347 options |= UIDNA_ALLOW_UNASSIGNED;
348 #endif
349 #if 1
350 /* Forbid eg labels starting with _ */
351 options |= UIDNA_USE_STD3_RULES;
352 #endif
353 #if 0
354 /* TODO determine if we need this */
355 options |= UIDNA_CHECK_BIDI;
356 #endif
357 #if 0
358 /* UTS46 makes it sound like these are the responsibility of registrars */
359 options |= UIDNA_CHECK_CONTEXTJ;
360 options |= UIDNA_CHECK_CONTEXTO;
361 #endif
362 #if 0
363 /* This disables COMPAT with IDNA 2003 */
364 options |= UIDNA_NONTRANSITIONAL_TO_ASCII;
365 options |= UIDNA_NONTRANSITIONAL_TO_UNICODE;
366 #endif
367 icu_idna2008 = uidna_openUTS46(options, &err);
369 if(U_FAILURE(err)) {
370 fprintf(stderr, "[c] util.encodings: error: %s\n", u_errorName((UErrorCode)err));
374 #define MAKE_PREP_FUNC(myFunc, prep) \
375 static int myFunc(lua_State *L) { return icu_stringprep_prep(L, prep); }
377 MAKE_PREP_FUNC(Lstringprep_nameprep, icu_nameprep) /** stringprep.nameprep(s) */
378 MAKE_PREP_FUNC(Lstringprep_nodeprep, icu_nodeprep) /** stringprep.nodeprep(s) */
379 MAKE_PREP_FUNC(Lstringprep_resourceprep, icu_resourceprep) /** stringprep.resourceprep(s) */
380 MAKE_PREP_FUNC(Lstringprep_saslprep, icu_saslprep) /** stringprep.saslprep(s) */
382 static const luaL_Reg Reg_stringprep[] = {
383 { "nameprep", Lstringprep_nameprep },
384 { "nodeprep", Lstringprep_nodeprep },
385 { "resourceprep", Lstringprep_resourceprep },
386 { "saslprep", Lstringprep_saslprep },
387 { NULL, NULL }
389 #else /* USE_STRINGPREP_ICU */
391 /****************** libidn ********************/
393 #include <stringprep.h>
395 static int stringprep_prep(lua_State *L, const Stringprep_profile *profile) {
396 size_t len;
397 const char *s;
398 char string[1024];
399 int ret;
401 if(!lua_isstring(L, 1)) {
402 lua_pushnil(L);
403 return 1;
406 s = check_utf8(L, 1, &len);
408 if(s == NULL || len >= 1024 || len != strlen(s)) {
409 lua_pushnil(L);
410 return 1; /* TODO return error message */
413 strcpy(string, s);
414 ret = stringprep(string, 1024, (Stringprep_profile_flags)0, profile);
416 if(ret == STRINGPREP_OK) {
417 lua_pushstring(L, string);
418 return 1;
419 } else {
420 lua_pushnil(L);
421 return 1; /* TODO return error message */
425 #define MAKE_PREP_FUNC(myFunc, prep) \
426 static int myFunc(lua_State *L) { return stringprep_prep(L, prep); }
428 MAKE_PREP_FUNC(Lstringprep_nameprep, stringprep_nameprep) /** stringprep.nameprep(s) */
429 MAKE_PREP_FUNC(Lstringprep_nodeprep, stringprep_xmpp_nodeprep) /** stringprep.nodeprep(s) */
430 MAKE_PREP_FUNC(Lstringprep_resourceprep, stringprep_xmpp_resourceprep) /** stringprep.resourceprep(s) */
431 MAKE_PREP_FUNC(Lstringprep_saslprep, stringprep_saslprep) /** stringprep.saslprep(s) */
433 static const luaL_Reg Reg_stringprep[] = {
434 { "nameprep", Lstringprep_nameprep },
435 { "nodeprep", Lstringprep_nodeprep },
436 { "resourceprep", Lstringprep_resourceprep },
437 { "saslprep", Lstringprep_saslprep },
438 { NULL, NULL }
440 #endif
442 /***************** IDNA *****************/
443 #ifdef USE_STRINGPREP_ICU
444 #include <unicode/ustdio.h>
445 #include <unicode/uidna.h>
446 /* IDNA2003 or IDNA2008 ? ? ? */
447 static int Lidna_to_ascii(lua_State *L) { /** idna.to_ascii(s) */
448 size_t len;
449 int32_t ulen, dest_len, output_len;
450 const char *s = luaL_checklstring(L, 1, &len);
451 UChar ustr[1024];
452 UErrorCode err = U_ZERO_ERROR;
453 UChar dest[1024];
454 char output[1024];
456 u_strFromUTF8(ustr, 1024, &ulen, s, len, &err);
458 if(U_FAILURE(err)) {
459 lua_pushnil(L);
460 return 1;
463 UIDNAInfo info = UIDNA_INFO_INITIALIZER;
464 dest_len = uidna_nameToASCII(icu_idna2008, ustr, ulen, dest, 256, &info, &err);
466 if(U_FAILURE(err) || info.errors) {
467 lua_pushnil(L);
468 return 1;
469 } else {
470 u_strToUTF8(output, 1024, &output_len, dest, dest_len, &err);
472 if(U_SUCCESS(err) && output_len < 1024) {
473 lua_pushlstring(L, output, output_len);
474 } else {
475 lua_pushnil(L);
478 return 1;
482 static int Lidna_to_unicode(lua_State *L) { /** idna.to_unicode(s) */
483 size_t len;
484 int32_t ulen, dest_len, output_len;
485 const char *s = luaL_checklstring(L, 1, &len);
486 UChar ustr[1024];
487 UErrorCode err = U_ZERO_ERROR;
488 UChar dest[1024];
489 char output[1024];
491 u_strFromUTF8(ustr, 1024, &ulen, s, len, &err);
493 if(U_FAILURE(err)) {
494 lua_pushnil(L);
495 return 1;
498 UIDNAInfo info = UIDNA_INFO_INITIALIZER;
499 dest_len = uidna_nameToUnicode(icu_idna2008, ustr, ulen, dest, 1024, &info, &err);
501 if(U_FAILURE(err) || info.errors) {
502 lua_pushnil(L);
503 return 1;
504 } else {
505 u_strToUTF8(output, 1024, &output_len, dest, dest_len, &err);
507 if(U_SUCCESS(err) && output_len < 1024) {
508 lua_pushlstring(L, output, output_len);
509 } else {
510 lua_pushnil(L);
513 return 1;
517 static int Lskeleton(lua_State *L) {
518 size_t len;
519 int32_t ulen, dest_len, output_len;
520 const char *s = luaL_checklstring(L, 1, &len);
521 UErrorCode err = U_ZERO_ERROR;
522 UChar ustr[1024];
523 UChar dest[1024];
524 char output[1024];
526 u_strFromUTF8(ustr, 1024, &ulen, s, len, &err);
528 if(U_FAILURE(err)) {
529 lua_pushnil(L);
530 return 1;
533 dest_len = uspoof_getSkeleton(icu_spoofcheck, 0, ustr, ulen, dest, 1024, &err);
535 if(U_FAILURE(err)) {
536 lua_pushnil(L);
537 return 1;
540 u_strToUTF8(output, 1024, &output_len, dest, dest_len, &err);
542 if(U_SUCCESS(err)) {
543 lua_pushlstring(L, output, output_len);
544 return 1;
547 lua_pushnil(L);
548 return 1;
551 #else /* USE_STRINGPREP_ICU */
552 /****************** libidn ********************/
554 #include <idna.h>
555 #include <idn-free.h>
557 static int Lidna_to_ascii(lua_State *L) { /** idna.to_ascii(s) */
558 size_t len;
559 const char *s = check_utf8(L, 1, &len);
560 char *output = NULL;
561 int ret;
563 if(s == NULL || len != strlen(s)) {
564 lua_pushnil(L);
565 return 1; /* TODO return error message */
568 ret = idna_to_ascii_8z(s, &output, IDNA_USE_STD3_ASCII_RULES);
570 if(ret == IDNA_SUCCESS) {
571 lua_pushstring(L, output);
572 idn_free(output);
573 return 1;
574 } else {
575 lua_pushnil(L);
576 idn_free(output);
577 return 1; /* TODO return error message */
581 static int Lidna_to_unicode(lua_State *L) { /** idna.to_unicode(s) */
582 size_t len;
583 const char *s = luaL_checklstring(L, 1, &len);
584 char *output = NULL;
585 int ret = idna_to_unicode_8z8z(s, &output, 0);
587 if(ret == IDNA_SUCCESS) {
588 lua_pushstring(L, output);
589 idn_free(output);
590 return 1;
591 } else {
592 lua_pushnil(L);
593 idn_free(output);
594 return 1; /* TODO return error message */
597 #endif
599 static const luaL_Reg Reg_idna[] = {
600 { "to_ascii", Lidna_to_ascii },
601 { "to_unicode", Lidna_to_unicode },
602 { NULL, NULL }
605 /***************** end *****************/
607 LUALIB_API int luaopen_util_encodings(lua_State *L) {
608 #if (LUA_VERSION_NUM > 501)
609 luaL_checkversion(L);
610 #endif
611 #ifdef USE_STRINGPREP_ICU
612 init_icu();
613 #endif
614 lua_newtable(L);
616 lua_newtable(L);
617 luaL_setfuncs(L, Reg_base64, 0);
618 lua_setfield(L, -2, "base64");
620 lua_newtable(L);
621 luaL_setfuncs(L, Reg_stringprep, 0);
622 lua_setfield(L, -2, "stringprep");
624 lua_newtable(L);
625 luaL_setfuncs(L, Reg_idna, 0);
626 lua_setfield(L, -2, "idna");
628 lua_newtable(L);
629 luaL_setfuncs(L, Reg_utf8, 0);
630 lua_setfield(L, -2, "utf8");
632 #ifdef USE_STRINGPREP_ICU
633 lua_newtable(L);
634 lua_pushcfunction(L, Lskeleton);
635 lua_setfield(L, -2, "skeleton");
636 lua_setfield(L, -2, "confusable");
637 #endif
639 lua_pushliteral(L, "-3.14");
640 lua_setfield(L, -2, "version");
641 return 1;