1 /******************************************************************************
2 * Copyright (C) 2010-2012 Lua.org, PUC-Rio. All rights reserved.
4 * SPDX-License-Identifier: MIT
6 ******************************************************************************/
8 ** {======================================================
9 ** Library for packing/unpacking structures.
10 ** See Copyright Notice above.
12 ** Small changes were made by Hadriel Kaplan - those changes
13 ** are in the Public Domain.
15 ** Some changes are based on a patch to struct.h from
16 ** Flemming Madsen, from here:
17 ** http://lua-users.org/lists/lua-l/2009-10/msg00572.html
18 ** In particular, these changes from him:
19 ** -Can handle 'long long' integers (i8 / I8); though they're converted to doubles
20 ** -Can insert/specify padding anywhere in a struct. ('X' eg. when a string is following a union)
21 ** -Can report current offset in both pack and unpack ('=')
22 ** -Can mask out return values when you only want to calculate sizes or unmarshal pascal-style strings. '(' & ')'
25 ** -Added support for Int64/UInt64 being packed/unpacked, using 'e'/'E'
26 ** -Made it follow Wireshark's conventions so we could get API docs
27 ** =======================================================
34 ** x[num] - pad num bytes, default 1
35 ** X[num] - pad to num align, default MAXALIGN
37 ** Following are system-dependent sizes:
38 ** i/I - signed/unsigned int
39 ** l/L - signed/unsigned long
43 ** Following are system-independent sizes:
44 ** b/B - signed/unsigned byte
45 ** h/H - signed/unsigned short
46 ** in/In - signed/unsigned integer of size `n' bytes
47 Note: Unpack of i/I is done to a Lua_number, typically a double,
48 so unpacking a 64-bit field (i8/I8) will lose precision.
49 Use e/E to unpack into a Wireshark Int64/UInt64 object/userdata instead.
50 ** e/E - signed/unsigned eight-byte Integer (64bits, long long), to/from Int64/UInt64 object
52 ** cn - sequence of `n' chars (from/to a string); when packing, n==0 means
53 the whole string; when unpacking, n==0 means use the previous
54 read number as the string length
55 ** s - zero-terminated string
57 ** '(' ')' - stop assigning items. ')' start assigning (padding when packing)
58 ** '=' - return current position / offset
64 #include <wsutil/array.h>
67 /* WSLUA_MODULE Struct Binary encode/decode support
69 The Struct class offers basic facilities to convert Lua values to and from C-style structs
70 in binary Lua strings. This is based on Roberto Ierusalimschy's Lua struct library found
71 in http://www.inf.puc-rio.br/~roberto/struct/, with some minor modifications as follows:
72 * Added support for `Int64`/`UInt64` being packed/unpacked, using 'e'/'E'.
73 * Can handle 'long long' integers (i8 / I8); though they're converted to doubles.
74 * Can insert/specify padding anywhere in a struct. ('X' eg. when a string is following a union).
75 * Can report current offset in both `pack` and `unpack` ('`=`').
76 * Can mask out return values when you only want to calculate sizes or unmarshal
77 pascal-style strings using '`(`' & '`)`'.
79 All but the first of those changes are based on an email from Flemming Madsen, on the lua-users
80 mailing list, which can be found http://lua-users.org/lists/lua-l/2009-10/msg00572.html[here].
82 The main functions are `Struct.pack`, which packs multiple Lua values into a struct-like
83 Lua binary string; and `Struct.unpack`, which unpacks multiple Lua values from a given
84 struct-like Lua binary string. There are some additional helper functions available as well.
86 All functions in the Struct library are called as static member functions, not object methods,
87 so they are invoked as "Struct.pack(...)" instead of "object:pack(...)".
89 The fist argument to several of the `Struct` functions is a format string, which describes
90 the layout of the structure. The format string is a sequence of conversion elements, which
91 respect the current endianness and the current alignment requirements. Initially, the
92 current endianness is the machine's native endianness and the current alignment requirement
93 is 1 (meaning no alignment at all). You can change these settings with appropriate directives
96 The supported elements in the format string are as follows:
98 * `$$ $$' (empty space) ignored.
99 * `++!++__n__' flag to set the current alignment requirement to 'n' (necessarily a power of 2);
100 an absent 'n' means the machine's native alignment.
101 * `++>++' flag to set mode to big endian (i.e., network-order).
102 * `++<++' flag to set mode to little endian.
103 * `++x++' a padding zero byte with no corresponding Lua value.
104 * `++b++' a signed char.
105 * `++B++' an unsigned char.
106 * `++h++' a signed short (native size).
107 * `++H++' an unsigned short (native size).
108 * `++l++' a signed long (native size).
109 * `++L++' an unsigned long (native size).
110 * `++T++' a size_t (native size).
111 * `++i++__n__' a signed integer with 'n' bytes. An absent 'n' means the native size of an int.
112 * `++I++__n__' like `++i++__n__' but unsigned.
113 * `++e++' signed 8-byte Integer (64-bits, long long), to/from a +Int64+ object.
114 * `++E++' unsigned 8-byte Integer (64-bits, long long), to/from a +UInt64+ object.
115 * `++f++' a float (native size).
116 * `++d++' a double (native size).
117 * `++s++' a zero-terminated string.
118 * `++c++__n__' a sequence of exactly 'n' chars corresponding to a single Lua string. An absent 'n'
119 means 1. When packing, the given string must have at least 'n' characters (extra
120 characters are discarded).
121 * `++c0++' this is like `++c++__n__', except that the 'n' is given by other means: When packing, 'n' is
122 the length of the given string; when unpacking, 'n' is the value of the previous unpacked
123 value (which must be a number). In that case, this previous value is not returned.
124 * `++x++__n__' pad to 'n' number of bytes, default 1.
125 * `++X++__n__' pad to 'n' alignment, default MAXALIGN.
126 * `++(++' to stop assigning items, and `++)++' start assigning (padding when packing).
127 * `++=++' to return the current position / offset.
131 Using `i`, `I`, `h`, `H`, `l`, `L`, `f`, and `T` is strongly discouraged, as those sizes
132 are system-dependent. Use the explicitly sized variants instead, such as `i4` or `E`.
134 Unpacking of `i`/`I` is done to a Lua number, a double-precision floating point,
135 so unpacking a 64-bit field (`i8`/`I8`) will lose precision.
136 Use `e`/`E` to unpack into a Wireshark `Int64`/`UInt64` object instead.
141 Lua 5.3 and later provides several built-in functions for struct unpacking and packing:
142 https://www.lua.org/manual/5.4/manual.html#pdf-string.pack[string.pack],
143 https://www.lua.org/manual/5.4/manual.html#pdf-string.packsize[string.packsize], and
144 https://www.lua.org/manual/5.4/manual.html#pdf-string.unpack[string.unpack].
145 You can use those as well, but note that the
146 https://www.lua.org/manual/5.4/manual.html#6.4.2[format string] conversion elements
147 are slightly different, and they do not support the Wireshark `Int64`/`UInt64` objects.
152 /* The following line is here so that make-reg.py does the right thing. This 'Struct' class
153 isn't really a class, so it doesn't have the checkStruct/pushStruct/etc. functions
154 the following macro would generate; but it does need to be registered and such, so...
155 WSLUA_CLASS_DEFINE_BASE(Struct,NOP,0);
158 /* basic integer type - yes this is system-specific size - it's meant to be */
159 #if !defined(STRUCT_INT)
160 #define STRUCT_INT long
163 typedef STRUCT_INT Inttype
;
165 /* corresponding unsigned version */
166 typedef unsigned STRUCT_INT Uinttype
;
168 /* maximum size (in bytes) for integral types */
169 #define MAXINTSIZE 32
171 /* is 'x' a power of 2? */
172 #define isp2(x) ((x) > 0 && ((x) & ((x) - 1)) == 0)
174 /* dummy structure to get padding/alignment requirements */
181 #define PADDING (sizeof(struct cD) - sizeof(double))
182 #define MAXALIGN (PADDING > sizeof(int) ? PADDING : sizeof(int))
189 /* trick to determine native endianness of system */
193 } const native
= {1};
196 typedef struct Header
{
202 /* For options that take a number argument, gets the number */
203 static int getnum (lua_State
*L
, const char **fmt
, int df
) {
204 if (!g_ascii_isdigit(**fmt
)) /* no number? */
205 return df
; /* return default value */
209 if (a
> (INT_MAX
/ 10) || a
* 10 > (INT_MAX
- (**fmt
- '0')))
210 luaL_error(L
, "integral size overflow");
211 a
= a
*10 + *((*fmt
)++) - '0';
212 } while (g_ascii_isdigit(**fmt
));
218 #define defaultoptions(h) ((h)->endian = native.endian, (h)->align = 1, (h)->noassign = false)
221 /* gets size (number of bytes) for a given type */
222 static size_t optsize (lua_State
*L
, char opt
, const char **fmt
) {
224 case 'B': case 'b': return sizeof(char);
225 case 'H': case 'h': return sizeof(short);
226 case 'L': case 'l': return sizeof(long);
227 case 'E': case 'e': return sizeof(int64_t);
228 case 'T': return sizeof(size_t);
229 case 'f': return sizeof(float);
230 case 'd': return sizeof(double);
231 case 'x': return getnum(L
, fmt
, 1);
232 case 'X': return getnum(L
, fmt
, MAXALIGN
);
233 case 'c': return getnum(L
, fmt
, 1);
234 case 'i': case 'I': {
235 int sz
= getnum(L
, fmt
, sizeof(int));
237 luaL_error(L
, "integral size %d is larger than limit of %d",
245 return 0; /* these cases do not have a size */
247 const char *msg
= lua_pushfstring(L
, "invalid format option [%c]", opt
);
248 return luaL_argerror(L
, 1, msg
);
255 ** return number of bytes needed to align an element of size 'size'
256 ** at current position 'len'
258 static int gettoalign (size_t len
, Header
*h
, int opt
, size_t size
) {
259 if (size
== 0 || opt
== 'c' || opt
== 's') return 0;
260 if (size
> (size_t)h
->align
)
261 size
= h
->align
; /* respect max. alignment */
262 return (int)((size
- (len
& (size
- 1))) & (size
- 1));
267 ** options to control endianness and alignment settings
269 static void controloptions (lua_State
*L
, int opt
, const char **fmt
,
272 case ' ': return; /* ignore white spaces */
273 case '>': h
->endian
= BIG
; return;
274 case '<': h
->endian
= LITTLE
; return;
275 case '(': h
->noassign
= true; return;
276 case ')': h
->noassign
= false; return;
278 int a
= getnum(L
, fmt
, MAXALIGN
);
280 luaL_error(L
, "alignment %d is not a power of 2", a
);
285 const char *msg
= lua_pushfstring(L
, "invalid format option '%c'", opt
);
286 luaL_argerror(L
, 1, msg
);
291 /* Encodes a Lua number as an integer of given size and endianness into a string struct */
292 static void putinteger (lua_State
*L
, luaL_Buffer
*b
, int arg
, int endian
,
294 lua_Number n
= luaL_checknumber(L
, arg
);
295 /* this one's not system dependent size - it's a long long */
297 char buff
[MAXINTSIZE
];
299 value
= (uint64_t)(int64_t)n
;
302 if (endian
== LITTLE
) {
304 for (i
= 0; i
< size
; i
++) {
305 buff
[i
] = (value
& 0xff);
311 for (i
= size
- 1; i
>= 0; i
--) {
312 buff
[i
] = (value
& 0xff);
316 luaL_addlstring(b
, buff
, size
);
319 /* corrects endianness - usually done by other functions themselves, but is
320 * used for float/doubles, since on some platforms they're endian'ed as well
322 static void correctbytes (char *b
, int size
, int endian
) {
323 if (endian
!= native
.endian
) {
334 WSLUA_CONSTRUCTOR
Struct_pack (lua_State
*L
) {
335 /* Returns a string containing the values arg1, arg2, etc. packed/encoded according to the format string. */
336 #define WSLUA_ARG_Struct_pack_FORMAT 1 /* The format string */
337 #define WSLUA_ARG_Struct_pack_VALUE 2 /* One or more Lua value(s) to encode, based on the given format. */
339 const char *fmt
= wslua_checkstring_only(L
, WSLUA_ARG_Struct_pack_FORMAT
);
344 size_t totalsize
= 0;
346 lua_pushnil(L
); /* mark to separate arguments from string buffer */
347 luaL_buffinit(L
, &b
);
348 while (*fmt
!= '\0') {
350 size_t size
= optsize(L
, opt
, &fmt
);
351 int toalign
= gettoalign(totalsize
, &h
, opt
, size
);
352 totalsize
+= toalign
;
353 while (toalign
-- > 0) luaL_addchar(&b
, '\0');
354 if (opt
== 'X') size
= 0; /* 'X' is about alignment, not size */
355 if (h
.noassign
&& size
) opt
= 'x'; /* for pack, "(i4)" is the same as "x4" */
357 case 'b': case 'B': case 'h': case 'H':
358 case 'l': case 'L': case 'T': case 'i': case 'I': { /* integer types */
359 putinteger(L
, &b
, arg
++, h
.endian
, (int)size
);
363 Int64_pack(L
, &b
, arg
++, h
.endian
== LITTLE
);
367 UInt64_pack(L
, &b
, arg
++, h
.endian
== LITTLE
);
370 case 'x': case 'X': {
373 luaL_addchar(&b
, '\0');
377 float f
= (float)luaL_checknumber(L
, arg
++);
378 correctbytes((char *)&f
, (int)size
, h
.endian
);
379 luaL_addlstring(&b
, (char *)&f
, size
);
383 double d
= luaL_checknumber(L
, arg
++);
384 correctbytes((char *)&d
, (int)size
, h
.endian
);
385 luaL_addlstring(&b
, (char *)&d
, size
);
388 case 'c': case 's': {
390 const char *s
= luaL_checklstring(L
, arg
++, &l
);
391 if (size
== 0) size
= l
;
392 luaL_argcheck(L
, l
>= (size_t)size
, arg
, "string too short");
393 luaL_addlstring(&b
, s
, size
);
395 luaL_addchar(&b
, '\0'); /* add zero at the end */
401 if (poscnt
< (int)array_length(posBuf
))
402 posBuf
[poscnt
++] = (int)totalsize
+ 1;
405 default: controloptions(L
, opt
, &fmt
, &h
);
410 for (arg
= 0; arg
< poscnt
; arg
++)
411 lua_pushinteger(L
, posBuf
[arg
]);
412 WSLUA_RETURN(poscnt
+ 1); /* The packed binary Lua string, plus any positions due to '=' being used in format. */
415 static Uinttype
decodeinteger (const char *buff
, int endian
, int size
)
420 for (i
= 0; i
< size
; i
++) {
422 l
|= (Uinttype
)(unsigned char)buff
[i
];
426 for (i
= size
- 1; i
>= 0; i
--) {
428 l
|= (Uinttype
)(unsigned char)buff
[i
];
434 /* Decodes an integer from a string struct into a lua_Integer, if it fits
435 * without truncation, or a lua_Number, based on given endianness and size.
436 * If the integer type is signed, that is handled correctly as well.
437 * Note for large values of size there can be a loss of precision.
439 static void getinteger (lua_State
*L
, const char *buff
, int endian
,
440 int issigned
, int size
) {
441 Uinttype l
= decodeinteger(buff
, endian
, size
);
443 if (size
< LUA_INTEGER_SIZE
) {
444 /* Fits in a lua_Integer (we need a larger size as lua_Integer
446 lua_pushinteger(L
, (lua_Integer
)l
);
448 /* Does not fit in a lua_Integer */
449 lua_pushnumber(L
, (lua_Number
)l
);
452 else { /* signed format */
453 Uinttype mask
= (Uinttype
)(~((Uinttype
)0)) << (size
*8 - 1);
454 if (l
& mask
) /* negative value? */
455 l
|= mask
; /* sign extension */
456 if (size
<= LUA_INTEGER_SIZE
) {
457 /* Fits in a lua_Integer */
458 lua_pushinteger(L
, (lua_Integer
)(Inttype
)l
);
460 /* Does not fit in a lua_Integer */
461 lua_pushnumber(L
, (lua_Number
)(Inttype
)l
);
466 #define b_pushnumber(n) { if (!h.noassign) lua_pushnumber(L, (lua_Number)(n)); }
468 WSLUA_CONSTRUCTOR
Struct_unpack (lua_State
*L
) {
469 /* Unpacks/decodes multiple Lua values from a given struct-like binary Lua string.
470 The number of returned values depends on the format given, plus an additional value of the position where it stopped reading is returned. */
471 #define WSLUA_ARG_Struct_unpack_FORMAT 1 /* The format string */
472 #define WSLUA_ARG_Struct_unpack_STRUCT 2 /* The binary Lua string to unpack */
473 #define WSLUA_OPTARG_Struct_unpack_BEGIN 3 /* The position to begin reading from (default=1) */
475 const char *fmt
= wslua_checkstring_only(L
, WSLUA_ARG_Struct_unpack_FORMAT
);
477 const char *data
= wslua_checklstring_only(L
, WSLUA_ARG_Struct_unpack_STRUCT
, &ld
);
478 size_t pos
= luaL_optinteger(L
, WSLUA_OPTARG_Struct_unpack_BEGIN
, 1) - 1;
483 size_t size
= optsize(L
, opt
, &fmt
);
484 pos
+= gettoalign(pos
, &h
, opt
, size
);
485 luaL_argcheck(L
, pos
+size
<= ld
, 2, "data string too short");
487 if (opt
== 'X') size
= 0;
488 if (h
.noassign
&& size
> 0) {
489 /* if we're not assigning, and the opt type has a size, then loop again */
490 /* this will not be the case for control options, 'c0', 's', and '=' */
495 luaL_checkstack(L
, 1, "too many results");
497 case 'b': case 'B': case 'h': case 'H':
498 case 'l': case 'L': case 'T': case 'i': case 'I': { /* integer types */
499 int issigned
= g_ascii_islower(opt
);
500 getinteger(L
, data
+pos
, h
.endian
, issigned
, (int)size
);
504 Int64_unpack(L
, data
+pos
, h
.endian
== LITTLE
);
508 UInt64_unpack(L
, data
+pos
, h
.endian
== LITTLE
);
511 case 'x': case 'X': {
516 memcpy(&f
, data
+pos
, size
);
517 correctbytes((char *)&f
, sizeof(f
), h
.endian
);
518 lua_pushnumber(L
, f
);
523 memcpy(&d
, data
+pos
, size
);
524 correctbytes((char *)&d
, sizeof(d
), h
.endian
);
525 lua_pushnumber(L
, d
);
530 if (!lua_isnumber(L
, -1))
531 luaL_error(L
, "format `c0' needs a previous size");
532 size
= wslua_touint32(L
, -1);
534 luaL_argcheck(L
, pos
+size
<= ld
, 2, "data string too short");
537 lua_pushlstring(L
, data
+pos
, size
);
541 const char *e
= (const char *)memchr(data
+pos
, '\0', ld
- pos
);
543 luaL_error(L
, "unfinished string in data");
544 size
= (e
- (data
+pos
)) + 1;
546 lua_pushlstring(L
, data
+pos
, size
- 1);
550 lua_pushinteger(L
, pos
+ 1);
553 default: controloptions(L
, opt
, &fmt
, &h
);
557 lua_pushinteger(L
, pos
+ 1);
558 WSLUA_RETURN(lua_gettop(L
) - 2); /* One or more values based on format, plus the position it stopped unpacking. */
562 WSLUA_CONSTRUCTOR
Struct_size (lua_State
*L
) {
563 /* Returns the length of a binary string that would be consumed/handled by the given format string. */
564 #define WSLUA_ARG_Struct_size_FORMAT 1 /* The format string */
566 const char *fmt
= wslua_checkstring_only(L
, WSLUA_ARG_Struct_size_FORMAT
);
571 size_t size
= optsize(L
, opt
, &fmt
);
572 pos
+= gettoalign(pos
, &h
, opt
, size
);
574 luaL_argerror(L
, 1, "option 's' has no fixed size");
575 else if (opt
== 'c' && size
== 0)
576 luaL_argerror(L
, 1, "option 'c0' has no fixed size");
577 if (!g_ascii_isalnum(opt
))
578 controloptions(L
, opt
, &fmt
, &h
);
581 lua_pushinteger(L
, pos
);
582 WSLUA_RETURN(1); /* The size number */
585 WSLUA_CONSTRUCTOR
Struct_values (lua_State
*L
) {
586 /* Returns the number of Lua values contained in the given format string.
587 This will be the number of returned values from a call to Struct.unpack()
588 not including the extra return value of offset position. (i.e., Struct.values()
589 does not count that extra return value) This will also be the number of
590 arguments Struct.pack() expects, not including the format string argument. */
591 #define WSLUA_ARG_Struct_values_FORMAT 1 /* The format string */
593 const char *fmt
= wslua_checkstring_only(L
, WSLUA_ARG_Struct_values_FORMAT
);
598 /* we use a size != 0 to mean it is a value */
599 size_t size
= optsize(L
, opt
, &fmt
);
600 /* but some will be zero and not be a value, or vice-versa */
603 /* these are values */
613 if (!g_ascii_isalnum(opt
))
614 controloptions(L
, opt
, &fmt
, &h
);
615 else if (size
&& !h
.noassign
)
618 lua_pushinteger(L
, vals
);
619 WSLUA_RETURN(1); /* The number of values */
622 WSLUA_CONSTRUCTOR
Struct_tohex (lua_State
*L
) {
623 /* Converts the passed-in binary string to a hex-ascii string. */
624 #define WSLUA_ARG_Struct_tohex_BYTESTRING 1 /* A Lua string consisting of binary bytes */
625 #define WSLUA_OPTARG_Struct_tohex_LOWERCASE 2 /* True to use lower-case hex characters (default=false). */
626 #define WSLUA_OPTARG_Struct_tohex_SEPARATOR 3 /* A string separator to insert between hex bytes (default=nil). */
627 const char* s
= NULL
;
629 bool lowercase
= false;
630 const char* sep
= NULL
;
632 /* luaL_checklstring coerces the argument to a string, and that's ok for tohex,
633 just not fromhex. In fact, we should accept/coerce a Int64/UInt64 here too someday. */
634 s
= luaL_checklstring(L
, WSLUA_ARG_Struct_tohex_BYTESTRING
, &len
);
636 lowercase
= wslua_optbool(L
,WSLUA_OPTARG_Struct_tohex_LOWERCASE
,false);
637 sep
= luaL_optstring(L
,WSLUA_OPTARG_Struct_tohex_SEPARATOR
,NULL
);
639 wslua_bin2hex(L
, s
, (unsigned)len
, lowercase
, sep
);
640 WSLUA_RETURN(1); /* The Lua hex-ascii string */
643 WSLUA_CONSTRUCTOR
Struct_fromhex (lua_State
*L
) {
644 /* Converts the passed-in hex-ascii string to a binary string. */
645 #define WSLUA_ARG_Struct_fromhex_HEXBYTES 1 /* A string consisting of hexadecimal bytes like "00 B1 A2" or "1a2b3c4d" */
646 #define WSLUA_OPTARG_Struct_fromhex_SEPARATOR 2 /* A string separator between hex bytes/words (default none). */
647 const char* s
= NULL
;
649 const char* sep
= NULL
;
651 /* luaL_checklstring coerces the argument to a string, and we don't want to do that */
652 s
= wslua_checklstring_only(L
, WSLUA_ARG_Struct_fromhex_HEXBYTES
, &len
);
654 sep
= luaL_optstring(L
,WSLUA_OPTARG_Struct_fromhex_SEPARATOR
,NULL
);
656 wslua_hex2bin(L
, s
, (unsigned)len
, sep
);
657 WSLUA_RETURN(1); /* The Lua binary string */
660 /* }====================================================== */
662 /* Gets registered as metamethod automatically by WSLUA_REGISTER_CLASS/META */
663 static int Struct__gc(lua_State
* L _U_
) {
667 WSLUA_METHODS Struct_methods
[] = {
668 WSLUA_CLASS_FNREG(Struct
,pack
),
669 WSLUA_CLASS_FNREG(Struct
,unpack
),
670 WSLUA_CLASS_FNREG(Struct
,size
),
671 WSLUA_CLASS_FNREG(Struct
,values
),
672 WSLUA_CLASS_FNREG(Struct
,tohex
),
673 WSLUA_CLASS_FNREG(Struct
,fromhex
),
677 WSLUA_META Struct_meta
[] = {
681 LUALIB_API
int Struct_register(lua_State
* L
) {
682 WSLUA_REGISTER_CLASS(Struct
);
687 * Editor modelines - https://www.wireshark.org/tools/modelines.html
692 * indent-tabs-mode: nil
695 * vi: set shiftwidth=2 tabstop=8 expandtab:
696 * :indentSize=2:tabSize=8:noTabs=true: