CHANGES: Update for release
[prosody.git] / util-src / encodings.c
blob465819d5e6a09e6d7104845ffba34fe3f31271f3
1 /* Prosody IM
2 -- Copyright (C) 2008-2010 Matthew Wild
3 -- Copyright (C) 2008-2010 Waqas Hussain
4 -- Copyright (C) 1994-2015 Lua.org, PUC-Rio.
5 --
6 -- This project is MIT/X11 licensed. Please see the
7 -- COPYING file in the source package for more information.
8 --
9 */
12 * encodings.c
13 * Lua library for base64, stringprep and idna encodings
16 /* Newer MSVC compilers deprecate strcpy as unsafe, but we use it in a safe way */
17 #define _CRT_SECURE_NO_DEPRECATE
19 #include <string.h>
20 #include <stdlib.h>
21 #include "lua.h"
22 #include "lauxlib.h"
24 #if (LUA_VERSION_NUM == 501)
25 #define luaL_setfuncs(L, R, N) luaL_register(L, NULL, R)
26 #endif
28 /***************** BASE64 *****************/
30 static const char code[] =
31 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
33 static void base64_encode(luaL_Buffer *b, unsigned int c1, unsigned int c2, unsigned int c3, int n) {
34 unsigned long tuple = c3 + 256UL * (c2 + 256UL * c1);
35 int i;
36 char s[4];
38 for(i = 0; i < 4; i++) {
39 s[3 - i] = code[tuple % 64];
40 tuple /= 64;
43 for(i = n + 1; i < 4; i++) {
44 s[i] = '=';
47 luaL_addlstring(b, s, 4);
50 static int Lbase64_encode(lua_State *L) { /** encode(s) */
51 size_t l;
52 const unsigned char *s = (const unsigned char *)luaL_checklstring(L, 1, &l);
53 luaL_Buffer b;
54 int n;
55 luaL_buffinit(L, &b);
57 for(n = l / 3; n--; s += 3) {
58 base64_encode(&b, s[0], s[1], s[2], 3);
61 switch(l % 3) {
62 case 1:
63 base64_encode(&b, s[0], 0, 0, 1);
64 break;
66 case 2:
67 base64_encode(&b, s[0], s[1], 0, 2);
68 break;
71 luaL_pushresult(&b);
72 return 1;
75 static void base64_decode(luaL_Buffer *b, int c1, int c2, int c3, int c4, int n) {
76 unsigned long tuple = c4 + 64L * (c3 + 64L * (c2 + 64L * c1));
77 char s[3];
79 switch(--n) {
80 case 3:
81 s[2] = (char) tuple;
83 case 2:
84 s[1] = (char)(tuple >> 8);
86 case 1:
87 s[0] = (char)(tuple >> 16);
90 luaL_addlstring(b, s, n);
93 static int Lbase64_decode(lua_State *L) { /** decode(s) */
94 size_t l;
95 const char *s = luaL_checklstring(L, 1, &l);
96 luaL_Buffer b;
97 int n = 0;
98 char t[4];
99 luaL_buffinit(L, &b);
101 for(;;) {
102 int c = *s++;
104 switch(c) {
105 const char *p;
107 default:
108 p = strchr(code, c);
110 if(p == NULL) {
111 return 0;
114 t[n++] = (char)(p - code);
116 if(n == 4) {
117 base64_decode(&b, t[0], t[1], t[2], t[3], 4);
118 n = 0;
121 break;
123 case '=':
125 switch(n) {
126 case 1:
127 base64_decode(&b, t[0], 0, 0, 0, 1);
128 break;
130 case 2:
131 base64_decode(&b, t[0], t[1], 0, 0, 2);
132 break;
134 case 3:
135 base64_decode(&b, t[0], t[1], t[2], 0, 3);
136 break;
139 n = 0;
140 break;
142 case 0:
143 luaL_pushresult(&b);
144 return 1;
146 case '\n':
147 case '\r':
148 case '\t':
149 case ' ':
150 case '\f':
151 case '\b':
152 break;
157 static const luaL_Reg Reg_base64[] = {
158 { "encode", Lbase64_encode },
159 { "decode", Lbase64_decode },
160 { NULL, NULL }
163 /******************* UTF-8 ********************/
166 * Adapted from Lua 5.3
167 * Needed because libidn does not validate that input is valid UTF-8
170 #define MAXUNICODE 0x10FFFF
173 * Decode one UTF-8 sequence, returning NULL if byte sequence is invalid.
175 static const char *utf8_decode(const char *o, int *val) {
176 static const unsigned int limits[] = {0xFF, 0x7F, 0x7FF, 0xFFFF};
177 const unsigned char *s = (const unsigned char *)o;
178 unsigned int c = s[0];
179 unsigned int res = 0; /* final result */
181 if(c < 0x80) { /* ascii? */
182 res = c;
183 } else {
184 int count = 0; /* to count number of continuation bytes */
186 while(c & 0x40) { /* still have continuation bytes? */
187 int cc = s[++count]; /* read next byte */
189 if((cc & 0xC0) != 0x80) { /* not a continuation byte? */
190 return NULL; /* invalid byte sequence */
193 res = (res << 6) | (cc & 0x3F); /* add lower 6 bits from cont. byte */
194 c <<= 1; /* to test next bit */
197 res |= ((c & 0x7F) << (count * 5)); /* add first byte */
199 if(count > 3 || res > MAXUNICODE || res <= limits[count] || (0xd800 <= res && res <= 0xdfff)) {
200 return NULL; /* invalid byte sequence */
203 s += count; /* skip continuation bytes read */
206 if(val) {
207 *val = res;
210 return (const char *)s + 1; /* +1 to include first byte */
214 * Check that a string is valid UTF-8
215 * Returns NULL if not
217 const char *check_utf8(lua_State *L, int idx, size_t *l) {
218 size_t pos, len;
219 const char *s = luaL_checklstring(L, 1, &len);
220 pos = 0;
222 while(pos <= len) {
223 const char *s1 = utf8_decode(s + pos, NULL);
225 if(s1 == NULL) { /* conversion error? */
226 return NULL;
229 pos = s1 - s;
232 if(l != NULL) {
233 *l = len;
236 return s;
239 static int Lutf8_valid(lua_State *L) {
240 lua_pushboolean(L, check_utf8(L, 1, NULL) != NULL);
241 return 1;
244 static int Lutf8_length(lua_State *L) {
245 size_t len;
247 if(!check_utf8(L, 1, &len)) {
248 lua_pushnil(L);
249 lua_pushliteral(L, "invalid utf8");
250 return 2;
253 lua_pushinteger(L, len);
254 return 1;
257 static const luaL_Reg Reg_utf8[] = {
258 { "valid", Lutf8_valid },
259 { "length", Lutf8_length },
260 { NULL, NULL }
263 /***************** STRINGPREP *****************/
264 #ifdef USE_STRINGPREP_ICU
266 #include <unicode/usprep.h>
267 #include <unicode/ustring.h>
268 #include <unicode/utrace.h>
270 static int icu_stringprep_prep(lua_State *L, const UStringPrepProfile *profile) {
271 size_t input_len;
272 int32_t unprepped_len, prepped_len, output_len;
273 const char *input;
274 char output[1024];
276 UChar unprepped[1024]; /* Temporary unicode buffer (1024 characters) */
277 UChar prepped[1024];
279 UErrorCode err = U_ZERO_ERROR;
281 if(!lua_isstring(L, 1)) {
282 lua_pushnil(L);
283 return 1;
286 input = lua_tolstring(L, 1, &input_len);
288 if(input_len >= 1024) {
289 lua_pushnil(L);
290 return 1;
293 u_strFromUTF8(unprepped, 1024, &unprepped_len, input, input_len, &err);
295 if(U_FAILURE(err)) {
296 lua_pushnil(L);
297 return 1;
300 prepped_len = usprep_prepare(profile, unprepped, unprepped_len, prepped, 1024, 0, NULL, &err);
302 if(U_FAILURE(err)) {
303 lua_pushnil(L);
304 return 1;
305 } else {
306 u_strToUTF8(output, 1024, &output_len, prepped, prepped_len, &err);
308 if(U_SUCCESS(err) && output_len < 1024) {
309 lua_pushlstring(L, output, output_len);
310 } else {
311 lua_pushnil(L);
314 return 1;
318 UStringPrepProfile *icu_nameprep;
319 UStringPrepProfile *icu_nodeprep;
320 UStringPrepProfile *icu_resourceprep;
321 UStringPrepProfile *icu_saslprep;
323 /* initialize global ICU stringprep profiles */
324 void init_icu() {
325 UErrorCode err = U_ZERO_ERROR;
326 utrace_setLevel(UTRACE_VERBOSE);
327 icu_nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, &err);
328 icu_nodeprep = usprep_openByType(USPREP_RFC3920_NODEPREP, &err);
329 icu_resourceprep = usprep_openByType(USPREP_RFC3920_RESOURCEPREP, &err);
330 icu_saslprep = usprep_openByType(USPREP_RFC4013_SASLPREP, &err);
332 if(U_FAILURE(err)) {
333 fprintf(stderr, "[c] util.encodings: error: %s\n", u_errorName((UErrorCode)err));
337 #define MAKE_PREP_FUNC(myFunc, prep) \
338 static int myFunc(lua_State *L) { return icu_stringprep_prep(L, prep); }
340 MAKE_PREP_FUNC(Lstringprep_nameprep, icu_nameprep) /** stringprep.nameprep(s) */
341 MAKE_PREP_FUNC(Lstringprep_nodeprep, icu_nodeprep) /** stringprep.nodeprep(s) */
342 MAKE_PREP_FUNC(Lstringprep_resourceprep, icu_resourceprep) /** stringprep.resourceprep(s) */
343 MAKE_PREP_FUNC(Lstringprep_saslprep, icu_saslprep) /** stringprep.saslprep(s) */
345 static const luaL_Reg Reg_stringprep[] = {
346 { "nameprep", Lstringprep_nameprep },
347 { "nodeprep", Lstringprep_nodeprep },
348 { "resourceprep", Lstringprep_resourceprep },
349 { "saslprep", Lstringprep_saslprep },
350 { NULL, NULL }
352 #else /* USE_STRINGPREP_ICU */
354 /****************** libidn ********************/
356 #include <stringprep.h>
358 static int stringprep_prep(lua_State *L, const Stringprep_profile *profile) {
359 size_t len;
360 const char *s;
361 char string[1024];
362 int ret;
364 if(!lua_isstring(L, 1)) {
365 lua_pushnil(L);
366 return 1;
369 s = check_utf8(L, 1, &len);
371 if(s == NULL || len >= 1024 || len != strlen(s)) {
372 lua_pushnil(L);
373 return 1; /* TODO return error message */
376 strcpy(string, s);
377 ret = stringprep(string, 1024, (Stringprep_profile_flags)0, profile);
379 if(ret == STRINGPREP_OK) {
380 lua_pushstring(L, string);
381 return 1;
382 } else {
383 lua_pushnil(L);
384 return 1; /* TODO return error message */
388 #define MAKE_PREP_FUNC(myFunc, prep) \
389 static int myFunc(lua_State *L) { return stringprep_prep(L, prep); }
391 MAKE_PREP_FUNC(Lstringprep_nameprep, stringprep_nameprep) /** stringprep.nameprep(s) */
392 MAKE_PREP_FUNC(Lstringprep_nodeprep, stringprep_xmpp_nodeprep) /** stringprep.nodeprep(s) */
393 MAKE_PREP_FUNC(Lstringprep_resourceprep, stringprep_xmpp_resourceprep) /** stringprep.resourceprep(s) */
394 MAKE_PREP_FUNC(Lstringprep_saslprep, stringprep_saslprep) /** stringprep.saslprep(s) */
396 static const luaL_Reg Reg_stringprep[] = {
397 { "nameprep", Lstringprep_nameprep },
398 { "nodeprep", Lstringprep_nodeprep },
399 { "resourceprep", Lstringprep_resourceprep },
400 { "saslprep", Lstringprep_saslprep },
401 { NULL, NULL }
403 #endif
405 /***************** IDNA *****************/
406 #ifdef USE_STRINGPREP_ICU
407 #include <unicode/ustdio.h>
408 #include <unicode/uidna.h>
409 /* IDNA2003 or IDNA2008 ? ? ? */
410 static int Lidna_to_ascii(lua_State *L) { /** idna.to_ascii(s) */
411 size_t len;
412 int32_t ulen, dest_len, output_len;
413 const char *s = luaL_checklstring(L, 1, &len);
414 UChar ustr[1024];
415 UErrorCode err = U_ZERO_ERROR;
416 UChar dest[1024];
417 char output[1024];
419 u_strFromUTF8(ustr, 1024, &ulen, s, len, &err);
421 if(U_FAILURE(err)) {
422 lua_pushnil(L);
423 return 1;
426 dest_len = uidna_IDNToASCII(ustr, ulen, dest, 1024, UIDNA_USE_STD3_RULES, NULL, &err);
428 if(U_FAILURE(err)) {
429 lua_pushnil(L);
430 return 1;
431 } else {
432 u_strToUTF8(output, 1024, &output_len, dest, dest_len, &err);
434 if(U_SUCCESS(err) && output_len < 1024) {
435 lua_pushlstring(L, output, output_len);
436 } else {
437 lua_pushnil(L);
440 return 1;
444 static int Lidna_to_unicode(lua_State *L) { /** idna.to_unicode(s) */
445 size_t len;
446 int32_t ulen, dest_len, output_len;
447 const char *s = luaL_checklstring(L, 1, &len);
448 UChar ustr[1024];
449 UErrorCode err = U_ZERO_ERROR;
450 UChar dest[1024];
451 char output[1024];
453 u_strFromUTF8(ustr, 1024, &ulen, s, len, &err);
455 if(U_FAILURE(err)) {
456 lua_pushnil(L);
457 return 1;
460 dest_len = uidna_IDNToUnicode(ustr, ulen, dest, 1024, UIDNA_USE_STD3_RULES, NULL, &err);
462 if(U_FAILURE(err)) {
463 lua_pushnil(L);
464 return 1;
465 } else {
466 u_strToUTF8(output, 1024, &output_len, dest, dest_len, &err);
468 if(U_SUCCESS(err) && output_len < 1024) {
469 lua_pushlstring(L, output, output_len);
470 } else {
471 lua_pushnil(L);
474 return 1;
478 #else /* USE_STRINGPREP_ICU */
479 /****************** libidn ********************/
481 #include <idna.h>
482 #include <idn-free.h>
484 static int Lidna_to_ascii(lua_State *L) { /** idna.to_ascii(s) */
485 size_t len;
486 const char *s = check_utf8(L, 1, &len);
487 char *output = NULL;
488 int ret;
490 if(s == NULL || len != strlen(s)) {
491 lua_pushnil(L);
492 return 1; /* TODO return error message */
495 ret = idna_to_ascii_8z(s, &output, IDNA_USE_STD3_ASCII_RULES);
497 if(ret == IDNA_SUCCESS) {
498 lua_pushstring(L, output);
499 idn_free(output);
500 return 1;
501 } else {
502 lua_pushnil(L);
503 idn_free(output);
504 return 1; /* TODO return error message */
508 static int Lidna_to_unicode(lua_State *L) { /** idna.to_unicode(s) */
509 size_t len;
510 const char *s = luaL_checklstring(L, 1, &len);
511 char *output = NULL;
512 int ret = idna_to_unicode_8z8z(s, &output, 0);
514 if(ret == IDNA_SUCCESS) {
515 lua_pushstring(L, output);
516 idn_free(output);
517 return 1;
518 } else {
519 lua_pushnil(L);
520 idn_free(output);
521 return 1; /* TODO return error message */
524 #endif
526 static const luaL_Reg Reg_idna[] = {
527 { "to_ascii", Lidna_to_ascii },
528 { "to_unicode", Lidna_to_unicode },
529 { NULL, NULL }
532 /***************** end *****************/
534 LUALIB_API int luaopen_util_encodings(lua_State *L) {
535 #if (LUA_VERSION_NUM > 501)
536 luaL_checkversion(L);
537 #endif
538 #ifdef USE_STRINGPREP_ICU
539 init_icu();
540 #endif
541 lua_newtable(L);
543 lua_newtable(L);
544 luaL_setfuncs(L, Reg_base64, 0);
545 lua_setfield(L, -2, "base64");
547 lua_newtable(L);
548 luaL_setfuncs(L, Reg_stringprep, 0);
549 lua_setfield(L, -2, "stringprep");
551 lua_newtable(L);
552 luaL_setfuncs(L, Reg_idna, 0);
553 lua_setfield(L, -2, "idna");
555 lua_newtable(L);
556 luaL_setfuncs(L, Reg_utf8, 0);
557 lua_setfield(L, -2, "utf8");
559 lua_pushliteral(L, "-3.14");
560 lua_setfield(L, -2, "version");
561 return 1;