docs/develop/app/dano_message_format.txt

   1 /*      The Dano Message Format
   2
   3 0.      Disclaimer
   4         The information herein is based on reverse engeneering flattened BMessages.
   5         The conclusions might be wrong in the details, and an implementation can
   6         probably not be drawn right from this description, but the overall format
   7         described here should come close to the one found on Dano based systems.
   8
   9 1.      Concept
  10         In the Dano message format, data is kept in a flat buffer and is organised
  11         in multiple "sections". Each section has a header that identifies the type
  12         of the section and it's size. Each section contains a field that then holds
  13         more information on the data and the data itself. Everything is usually
  14         padded to 8 byte boundaries.
  15
  16 2.      Section Headers
  17         The section header looks like this:
  18
  19         typedef struct section_header_s {
  20                 int32           code;
  21                 ssize_t         size;
  22                 uint8           data[0];
  23         } SectionHeader;
  24
  25         The code identifies the type of the data following the header. Valid types
  26         are the following:
  27
  28         enum {
  29                 SECTION_MESSAGE_HEADER = 'FOB2',
  30                 SECTION_OFFSET_TABLE = 'STof',
  31                 SECTION_TARGET_INFORMATION = 'ENwh',
  32                 SECTION_SINGLE_ITEM_DATA = 'SGDa'
  33                 SECTION_FIXED_SIZE_ARRAY_DATA = 'FADa',
  34                 SECTION_VARIABLE_SIZE_ARRAY_DATA = 'VADa',
  35                 SECTION_SORTED_INDEX_TABLE = 'DXIn',
  36                 SECTION_END_OF_DATA = 'DDEn'
  37         };
  38
  39         The size field includes the size of the header itself and its data.
  40
  41 3.      Message Header Section
  42         The message header section stores the what field of the message. Its code,
  43         conveniently at the very first 4 bytes, also identifies the message as a
  44         Dano message ('FOB2'). The layout is as follows:
  45
  46         typedef struct message_header_s {
  47                 int32           what;
  48                 int32           padding;
  49         } MessageHeader;
  50
  51 4.      Offset Table Section
  52         The offset table stores the byte offsets to the sorted index table and to
  53         the end of data section. It looks like this:
  54
  55         typedef struct offset_table_s {
  56                 int32           indexTable;
  57                 int32           endOfData;
  58                 int64           padding;
  59         } OffsetTable;
  60
  61         The index table offset is important since we will usually insert new fields
  62         before the index table. The end of data offset can be used to directly
  63         know where the index table ends. It's also possible that the end of index
  64         offset is actually the end of the index table.
  65         Both offsets are based on the beginning of the first data section and not
  66         from the top of the message.
  67
  68 5.      Single Item Data Section
  69         The single item data section holds information on exactly one data item.
  70         Since when only dealing with one item it doesn't matter wether it is fixed
  71         size or not we do not distinct between these two types. The format is as
  72         follows:
  73
  74         typedef struct single_item_s {
  75                 type_code       type;
  76                 ssize_t         itemSize;
  77                 uint8           nameLength;
  78                 char            name[0];
  79         } SingleItem;
  80
  81         The the name is padded to the next 8 byte boundary. After nameLength + 1
  82         bytes the item data begins. The nameLength field does not count the
  83         terminating 0 of the name, but the name is actually 0 terminated.
  84
  85 6.      Fixed Size Item Array Data
  86         This type of section holds an array of fixed size items. Describing the
  87         format of this section in a struct is a bit harder, since the count
  88         variable is stored after the name field. In pseudo code it would look like
  89         this:
  90
  91         typedef struct fixed_size_s {
  92                 type_code       type;
  93                 ssize_t         sizePerItem;
  94                 uint8           nameLength;
  95                 char            name[pad_to_8(nameLength + 1)];
  96                 int32           count;
  97                 int32           padding;
  98                 uint8           data[0];
  99         } FixedSize;
 100
 101 7.      Variable Sized Item Array Data
 102         The format is very similar to the one of the fixed size item array above.
 103         Again in pseudo code:
 104
 105         typedef struct variable_size_s {
 106                 type_code       type;
 107                 int32           padding;
 108                 uint8           nameLength;
 109                 char            name[pad_to_8(nameLength + 1)];
 110                 int32           count;
 111                 ssize_t         totalSize;
 112                 uint8           data[0];
 113         } VariableSize;
 114
 115         The data itself is constructed of the variable sized items, each padded to
 116         an eight byte boundary. Where they begin and where they end is not encoded
 117         in the data itself but in an "endpoint table" following the data (at data
 118         + totalSize). The endpoint table is an array of int32 items each pointing
 119         to the end of an item (not including padding). As an example we take an
 120         array of three variable sized items layouted like this:
 121
 122                 <data>
 123                         76 61 72 69 61 62 6c 65 variable
 124                         20 73 69 7a 65 64 20 64  sized d
 125                         61 74 61 00 00 00 00 00 ata..... (pad)
 126                         61 72 69 61 62 6c 65 20 ariable
 127                         73 69 7a 65 64 20 64 61 sized da
 128                         74 61 00 00 00 00 00 00 ta...... (pad)
 129                         6c 61 73 74 20 69 6e 20 last in
 130                         74 68 69 73 20 61 72 72 this arr
 131                         61 79 21 00 00 00 00 00 ay!..... (pad)
 132                 </data>
 133
 134         Then the endpoint table would look like this:
 135
 136                 <endPointTable>
 137                         <endPoint 20 />
 138                         <endPoint 43 />
 139                         <endPoint 68 />
 140                 <endPointTable>
 141
 142         The first endpoint (20) means that the size of the first item is 20 bytes.
 143         The second endpoint (43) is constructed from the start of the second item
 144         which is at pad_to_8(endpoint[0]) plus the size of the item. In this case
 145         pad_to_8(endpoint[0]) results in 24, this is where the second item begins.
 146         So 43 - 24 gives us the unpadded length of item 2 (19). The third item
 147         starts at pad_to_8(endpoint[1]) and is in our case 48. The length of item
 148         three is therefor 68 - 48 = 20 bytes. Note that in this example we are
 149         talking about strings where the 0 termination is included in the item size.
 150
 151 8.      Sorted Index Table
 152         The sorted index table is a list of direct offsets to the fields. It is
 153         binary sorted using the field names. This means that we can use it for
 154         name lookups with a O(log(n)) complexity instead of doing linear searches.
 155         The section data is composed directly out of the int32 array of offsets.
 156         No extra data is stored in this section. All offsets have the first data
 157         section as their base.
 158
 159 9.      End Of Data Section
 160         This section terminates the section stream. No other data is stored in this
 161         section.
 162
 163 10.     Target Information Section
 164         The target information section is used to hold the target team, handler,
 165         port information for message delivery. As this data is not relevant when
 166         handling disk stored messages only, the format of this section is not
 167         discussed here.
 168
 169 */