2 -- Copyright (C) 2008-2010 Matthew Wild
3 -- Copyright (C) 2008-2010 Waqas Hussain
4 -- Copyright (C) 1994-2015 Lua.org, PUC-Rio.
6 -- This project is MIT/X11 licensed. Please see the
7 -- COPYING file in the source package for more information.
13 * Lua library for base64, stringprep and idna encodings
16 /* Newer MSVC compilers deprecate strcpy as unsafe, but we use it in a safe way */
17 #define _CRT_SECURE_NO_DEPRECATE
24 #if (LUA_VERSION_NUM == 501)
25 #define luaL_setfuncs(L, R, N) luaL_register(L, NULL, R)
28 /***************** BASE64 *****************/
30 static const char code
[] =
31 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
33 static void base64_encode(luaL_Buffer
*b
, unsigned int c1
, unsigned int c2
, unsigned int c3
, int n
) {
34 unsigned long tuple
= c3
+ 256UL * (c2
+ 256UL * c1
);
38 for(i
= 0; i
< 4; i
++) {
39 s
[3 - i
] = code
[tuple
% 64];
43 for(i
= n
+ 1; i
< 4; i
++) {
47 luaL_addlstring(b
, s
, 4);
50 static int Lbase64_encode(lua_State
*L
) { /** encode(s) */
52 const unsigned char *s
= (const unsigned char *)luaL_checklstring(L
, 1, &l
);
57 for(n
= l
/ 3; n
--; s
+= 3) {
58 base64_encode(&b
, s
[0], s
[1], s
[2], 3);
63 base64_encode(&b
, s
[0], 0, 0, 1);
67 base64_encode(&b
, s
[0], s
[1], 0, 2);
75 static void base64_decode(luaL_Buffer
*b
, int c1
, int c2
, int c3
, int c4
, int n
) {
76 unsigned long tuple
= c4
+ 64L * (c3
+ 64L * (c2
+ 64L * c1
));
85 s
[1] = (char)(tuple
>> 8);
89 s
[0] = (char)(tuple
>> 16);
92 luaL_addlstring(b
, s
, n
);
95 static int Lbase64_decode(lua_State
*L
) { /** decode(s) */
97 const char *s
= luaL_checklstring(L
, 1, &l
);
101 luaL_buffinit(L
, &b
);
116 t
[n
++] = (char)(p
- code
);
119 base64_decode(&b
, t
[0], t
[1], t
[2], t
[3], 4);
129 base64_decode(&b
, t
[0], 0, 0, 0, 1);
133 base64_decode(&b
, t
[0], t
[1], 0, 0, 2);
137 base64_decode(&b
, t
[0], t
[1], t
[2], 0, 3);
159 static const luaL_Reg Reg_base64
[] = {
160 { "encode", Lbase64_encode
},
161 { "decode", Lbase64_decode
},
165 /******************* UTF-8 ********************/
168 * Adapted from Lua 5.3
169 * Needed because libidn does not validate that input is valid UTF-8
172 #define MAXUNICODE 0x10FFFF
175 * Decode one UTF-8 sequence, returning NULL if byte sequence is invalid.
177 static const char *utf8_decode(const char *o
, int *val
) {
178 static const unsigned int limits
[] = {0xFF, 0x7F, 0x7FF, 0xFFFF};
179 const unsigned char *s
= (const unsigned char *)o
;
180 unsigned int c
= s
[0];
181 unsigned int res
= 0; /* final result */
183 if(c
< 0x80) { /* ascii? */
186 int count
= 0; /* to count number of continuation bytes */
188 while(c
& 0x40) { /* still have continuation bytes? */
189 int cc
= s
[++count
]; /* read next byte */
191 if((cc
& 0xC0) != 0x80) { /* not a continuation byte? */
192 return NULL
; /* invalid byte sequence */
195 res
= (res
<< 6) | (cc
& 0x3F); /* add lower 6 bits from cont. byte */
196 c
<<= 1; /* to test next bit */
199 res
|= ((c
& 0x7F) << (count
* 5)); /* add first byte */
201 if(count
> 3 || res
> MAXUNICODE
|| res
<= limits
[count
] || (0xd800 <= res
&& res
<= 0xdfff)) {
202 return NULL
; /* invalid byte sequence */
205 s
+= count
; /* skip continuation bytes read */
212 return (const char *)s
+ 1; /* +1 to include first byte */
216 * Check that a string is valid UTF-8
217 * Returns NULL if not
219 const char *check_utf8(lua_State
*L
, int idx
, size_t *l
) {
221 const char *s
= luaL_checklstring(L
, idx
, &len
);
225 const char *s1
= utf8_decode(s
+ pos
, NULL
);
227 if(s1
== NULL
) { /* conversion error? */
241 static int Lutf8_valid(lua_State
*L
) {
242 lua_pushboolean(L
, check_utf8(L
, 1, NULL
) != NULL
);
246 static int Lutf8_length(lua_State
*L
) {
249 if(!check_utf8(L
, 1, &len
)) {
251 lua_pushliteral(L
, "invalid utf8");
255 lua_pushinteger(L
, len
);
259 static const luaL_Reg Reg_utf8
[] = {
260 { "valid", Lutf8_valid
},
261 { "length", Lutf8_length
},
265 /***************** STRINGPREP *****************/
266 #ifdef USE_STRINGPREP_ICU
268 #include <unicode/usprep.h>
269 #include <unicode/ustring.h>
270 #include <unicode/utrace.h>
271 #include <unicode/uspoof.h>
272 #include <unicode/uidna.h>
274 static int icu_stringprep_prep(lua_State
*L
, const UStringPrepProfile
*profile
) {
276 int32_t unprepped_len
, prepped_len
, output_len
;
280 UChar unprepped
[1024]; /* Temporary unicode buffer (1024 characters) */
283 UErrorCode err
= U_ZERO_ERROR
;
285 if(!lua_isstring(L
, 1)) {
290 input
= lua_tolstring(L
, 1, &input_len
);
292 if(input_len
>= 1024) {
297 u_strFromUTF8(unprepped
, 1024, &unprepped_len
, input
, input_len
, &err
);
304 prepped_len
= usprep_prepare(profile
, unprepped
, unprepped_len
, prepped
, 1024, USPREP_ALLOW_UNASSIGNED
, NULL
, &err
);
310 u_strToUTF8(output
, 1024, &output_len
, prepped
, prepped_len
, &err
);
312 if(U_SUCCESS(err
) && output_len
< 1024) {
313 lua_pushlstring(L
, output
, output_len
);
322 UStringPrepProfile
*icu_nameprep
;
323 UStringPrepProfile
*icu_nodeprep
;
324 UStringPrepProfile
*icu_resourceprep
;
325 UStringPrepProfile
*icu_saslprep
;
326 USpoofChecker
*icu_spoofcheck
;
329 #if (U_ICU_VERSION_MAJOR_NUM < 58)
331 #define USPOOF_CONFUSABLE (USPOOF_SINGLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_WHOLE_SCRIPT_CONFUSABLE)
334 /* initialize global ICU stringprep profiles */
335 void init_icu(void) {
336 UErrorCode err
= U_ZERO_ERROR
;
337 utrace_setLevel(UTRACE_VERBOSE
);
338 icu_nameprep
= usprep_openByType(USPREP_RFC3491_NAMEPREP
, &err
);
339 icu_nodeprep
= usprep_openByType(USPREP_RFC3920_NODEPREP
, &err
);
340 icu_resourceprep
= usprep_openByType(USPREP_RFC3920_RESOURCEPREP
, &err
);
341 icu_saslprep
= usprep_openByType(USPREP_RFC4013_SASLPREP
, &err
);
342 icu_spoofcheck
= uspoof_open(&err
);
343 uspoof_setChecks(icu_spoofcheck
, USPOOF_CONFUSABLE
, &err
);
344 int options
= UIDNA_DEFAULT
;
346 /* COMPAT with future Unicode versions */
347 options
|= UIDNA_ALLOW_UNASSIGNED
;
350 /* Forbid eg labels starting with _ */
351 options
|= UIDNA_USE_STD3_RULES
;
354 /* TODO determine if we need this */
355 options
|= UIDNA_CHECK_BIDI
;
358 /* UTS46 makes it sound like these are the responsibility of registrars */
359 options
|= UIDNA_CHECK_CONTEXTJ
;
360 options
|= UIDNA_CHECK_CONTEXTO
;
363 /* This disables COMPAT with IDNA 2003 */
364 options
|= UIDNA_NONTRANSITIONAL_TO_ASCII
;
365 options
|= UIDNA_NONTRANSITIONAL_TO_UNICODE
;
367 icu_idna2008
= uidna_openUTS46(options
, &err
);
370 fprintf(stderr
, "[c] util.encodings: error: %s\n", u_errorName((UErrorCode
)err
));
374 #define MAKE_PREP_FUNC(myFunc, prep) \
375 static int myFunc(lua_State *L) { return icu_stringprep_prep(L, prep); }
377 MAKE_PREP_FUNC(Lstringprep_nameprep
, icu_nameprep
) /** stringprep.nameprep(s) */
378 MAKE_PREP_FUNC(Lstringprep_nodeprep
, icu_nodeprep
) /** stringprep.nodeprep(s) */
379 MAKE_PREP_FUNC(Lstringprep_resourceprep
, icu_resourceprep
) /** stringprep.resourceprep(s) */
380 MAKE_PREP_FUNC(Lstringprep_saslprep
, icu_saslprep
) /** stringprep.saslprep(s) */
382 static const luaL_Reg Reg_stringprep
[] = {
383 { "nameprep", Lstringprep_nameprep
},
384 { "nodeprep", Lstringprep_nodeprep
},
385 { "resourceprep", Lstringprep_resourceprep
},
386 { "saslprep", Lstringprep_saslprep
},
389 #else /* USE_STRINGPREP_ICU */
391 /****************** libidn ********************/
393 #include <stringprep.h>
395 static int stringprep_prep(lua_State
*L
, const Stringprep_profile
*profile
) {
401 if(!lua_isstring(L
, 1)) {
406 s
= check_utf8(L
, 1, &len
);
408 if(s
== NULL
|| len
>= 1024 || len
!= strlen(s
)) {
410 return 1; /* TODO return error message */
414 ret
= stringprep(string
, 1024, (Stringprep_profile_flags
)0, profile
);
416 if(ret
== STRINGPREP_OK
) {
417 lua_pushstring(L
, string
);
421 return 1; /* TODO return error message */
425 #define MAKE_PREP_FUNC(myFunc, prep) \
426 static int myFunc(lua_State *L) { return stringprep_prep(L, prep); }
428 MAKE_PREP_FUNC(Lstringprep_nameprep
, stringprep_nameprep
) /** stringprep.nameprep(s) */
429 MAKE_PREP_FUNC(Lstringprep_nodeprep
, stringprep_xmpp_nodeprep
) /** stringprep.nodeprep(s) */
430 MAKE_PREP_FUNC(Lstringprep_resourceprep
, stringprep_xmpp_resourceprep
) /** stringprep.resourceprep(s) */
431 MAKE_PREP_FUNC(Lstringprep_saslprep
, stringprep_saslprep
) /** stringprep.saslprep(s) */
433 static const luaL_Reg Reg_stringprep
[] = {
434 { "nameprep", Lstringprep_nameprep
},
435 { "nodeprep", Lstringprep_nodeprep
},
436 { "resourceprep", Lstringprep_resourceprep
},
437 { "saslprep", Lstringprep_saslprep
},
442 /***************** IDNA *****************/
443 #ifdef USE_STRINGPREP_ICU
444 #include <unicode/ustdio.h>
445 #include <unicode/uidna.h>
446 /* IDNA2003 or IDNA2008 ? ? ? */
447 static int Lidna_to_ascii(lua_State
*L
) { /** idna.to_ascii(s) */
449 int32_t ulen
, dest_len
, output_len
;
450 const char *s
= luaL_checklstring(L
, 1, &len
);
452 UErrorCode err
= U_ZERO_ERROR
;
456 u_strFromUTF8(ustr
, 1024, &ulen
, s
, len
, &err
);
463 UIDNAInfo info
= UIDNA_INFO_INITIALIZER
;
464 dest_len
= uidna_nameToASCII(icu_idna2008
, ustr
, ulen
, dest
, 256, &info
, &err
);
466 if(U_FAILURE(err
) || info
.errors
) {
470 u_strToUTF8(output
, 1024, &output_len
, dest
, dest_len
, &err
);
472 if(U_SUCCESS(err
) && output_len
< 1024) {
473 lua_pushlstring(L
, output
, output_len
);
482 static int Lidna_to_unicode(lua_State
*L
) { /** idna.to_unicode(s) */
484 int32_t ulen
, dest_len
, output_len
;
485 const char *s
= luaL_checklstring(L
, 1, &len
);
487 UErrorCode err
= U_ZERO_ERROR
;
491 u_strFromUTF8(ustr
, 1024, &ulen
, s
, len
, &err
);
498 UIDNAInfo info
= UIDNA_INFO_INITIALIZER
;
499 dest_len
= uidna_nameToUnicode(icu_idna2008
, ustr
, ulen
, dest
, 1024, &info
, &err
);
501 if(U_FAILURE(err
) || info
.errors
) {
505 u_strToUTF8(output
, 1024, &output_len
, dest
, dest_len
, &err
);
507 if(U_SUCCESS(err
) && output_len
< 1024) {
508 lua_pushlstring(L
, output
, output_len
);
517 static int Lskeleton(lua_State
*L
) {
519 int32_t ulen
, dest_len
, output_len
;
520 const char *s
= luaL_checklstring(L
, 1, &len
);
521 UErrorCode err
= U_ZERO_ERROR
;
526 u_strFromUTF8(ustr
, 1024, &ulen
, s
, len
, &err
);
533 dest_len
= uspoof_getSkeleton(icu_spoofcheck
, 0, ustr
, ulen
, dest
, 1024, &err
);
540 u_strToUTF8(output
, 1024, &output_len
, dest
, dest_len
, &err
);
543 lua_pushlstring(L
, output
, output_len
);
551 #else /* USE_STRINGPREP_ICU */
552 /****************** libidn ********************/
555 #include <idn-free.h>
557 static int Lidna_to_ascii(lua_State
*L
) { /** idna.to_ascii(s) */
559 const char *s
= check_utf8(L
, 1, &len
);
563 if(s
== NULL
|| len
!= strlen(s
)) {
565 return 1; /* TODO return error message */
568 ret
= idna_to_ascii_8z(s
, &output
, IDNA_USE_STD3_ASCII_RULES
);
570 if(ret
== IDNA_SUCCESS
) {
571 lua_pushstring(L
, output
);
577 return 1; /* TODO return error message */
581 static int Lidna_to_unicode(lua_State
*L
) { /** idna.to_unicode(s) */
583 const char *s
= luaL_checklstring(L
, 1, &len
);
585 int ret
= idna_to_unicode_8z8z(s
, &output
, 0);
587 if(ret
== IDNA_SUCCESS
) {
588 lua_pushstring(L
, output
);
594 return 1; /* TODO return error message */
599 static const luaL_Reg Reg_idna
[] = {
600 { "to_ascii", Lidna_to_ascii
},
601 { "to_unicode", Lidna_to_unicode
},
605 /***************** end *****************/
607 LUALIB_API
int luaopen_util_encodings(lua_State
*L
) {
608 #if (LUA_VERSION_NUM > 501)
609 luaL_checkversion(L
);
611 #ifdef USE_STRINGPREP_ICU
617 luaL_setfuncs(L
, Reg_base64
, 0);
618 lua_setfield(L
, -2, "base64");
621 luaL_setfuncs(L
, Reg_stringprep
, 0);
622 lua_setfield(L
, -2, "stringprep");
625 luaL_setfuncs(L
, Reg_idna
, 0);
626 lua_setfield(L
, -2, "idna");
629 luaL_setfuncs(L
, Reg_utf8
, 0);
630 lua_setfield(L
, -2, "utf8");
632 #ifdef USE_STRINGPREP_ICU
634 lua_pushcfunction(L
, Lskeleton
);
635 lua_setfield(L
, -2, "skeleton");
636 lua_setfield(L
, -2, "confusable");
639 lua_pushliteral(L
, "-3.14");
640 lua_setfield(L
, -2, "version");