1 // Copyright (c) 2009-2024, Google LLC
2 // All rights reserved.
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
8 #include "upb_generator/minitable/fasttable.h"
18 #include "absl/strings/substitute.h"
19 #include "upb/base/descriptor_constants.h"
20 #include "upb/mini_table/field.h"
21 #include "upb/mini_table/message.h"
22 #include "upb/reflection/def.hpp"
23 #include "upb/wire/types.h"
24 #include "upb_generator/file_layout.h"
27 #include "upb/port/def.inc"
34 // Returns fields in order of "hotness", eg. how frequently they appear in
35 // serialized payloads. Ideally this will use a profile. When we don't have
36 // that, we assume that fields with smaller numbers are used more frequently.
37 inline std::vector
<upb::FieldDefPtr
> FieldHotnessOrder(
38 upb::MessageDefPtr message
) {
39 std::vector
<upb::FieldDefPtr
> fields
;
40 size_t field_count
= message
.field_count();
41 fields
.reserve(field_count
);
42 for (size_t i
= 0; i
< field_count
; i
++) {
43 fields
.push_back(message
.field(i
));
45 std::sort(fields
.begin(), fields
.end(),
46 [](upb::FieldDefPtr a
, upb::FieldDefPtr b
) {
47 return std::make_pair(!a
.is_required(), a
.number()) <
48 std::make_pair(!b
.is_required(), b
.number());
53 typedef std::pair
<std::string
, uint64_t> TableEntry
;
55 uint32_t GetWireTypeForField(upb::FieldDefPtr field
) {
56 if (field
.packed()) return kUpb_WireType_Delimited
;
57 switch (field
.type()) {
58 case kUpb_FieldType_Double
:
59 case kUpb_FieldType_Fixed64
:
60 case kUpb_FieldType_SFixed64
:
61 return kUpb_WireType_64Bit
;
62 case kUpb_FieldType_Float
:
63 case kUpb_FieldType_Fixed32
:
64 case kUpb_FieldType_SFixed32
:
65 return kUpb_WireType_32Bit
;
66 case kUpb_FieldType_Int64
:
67 case kUpb_FieldType_UInt64
:
68 case kUpb_FieldType_Int32
:
69 case kUpb_FieldType_Bool
:
70 case kUpb_FieldType_UInt32
:
71 case kUpb_FieldType_Enum
:
72 case kUpb_FieldType_SInt32
:
73 case kUpb_FieldType_SInt64
:
74 return kUpb_WireType_Varint
;
75 case kUpb_FieldType_Group
:
76 return kUpb_WireType_StartGroup
;
77 case kUpb_FieldType_Message
:
78 case kUpb_FieldType_String
:
79 case kUpb_FieldType_Bytes
:
80 return kUpb_WireType_Delimited
;
85 uint32_t MakeTag(uint32_t field_number
, uint32_t wire_type
) {
86 return field_number
<< 3 | wire_type
;
89 size_t WriteVarint32ToArray(uint64_t val
, char* buf
) {
92 uint8_t byte
= val
& 0x7fU
;
94 if (val
) byte
|= 0x80U
;
100 uint64_t GetEncodedTag(upb::FieldDefPtr field
) {
101 uint32_t wire_type
= GetWireTypeForField(field
);
102 uint32_t unencoded_tag
= MakeTag(field
.number(), wire_type
);
103 char tag_bytes
[10] = {0};
104 WriteVarint32ToArray(unencoded_tag
, tag_bytes
);
105 uint64_t encoded_tag
= 0;
106 memcpy(&encoded_tag
, tag_bytes
, sizeof(encoded_tag
));
107 // TODO: byte-swap for big endian.
111 int GetTableSlot(upb::FieldDefPtr field
) {
112 uint64_t tag
= GetEncodedTag(field
);
114 // Tag must fit within a two-byte varint.
117 return (tag
& 0xf8) >> 3;
120 bool TryFillTableEntry(const DefPoolPair
& pools
, upb::FieldDefPtr field
,
122 const upb_MiniTable
* mt
= pools
.GetMiniTable64(field
.containing_type());
123 const upb_MiniTableField
* mt_f
=
124 upb_MiniTable_FindFieldByNumber(mt
, field
.number());
125 std::string type
= "";
126 std::string cardinality
= "";
127 switch (upb_MiniTableField_Type(mt_f
)) {
128 case kUpb_FieldType_Bool
:
131 case kUpb_FieldType_Enum
:
132 if (upb_MiniTableField_IsClosedEnum(mt_f
)) {
133 // We don't have the means to test proto2 enum fields for valid values.
137 case kUpb_FieldType_Int32
:
138 case kUpb_FieldType_UInt32
:
141 case kUpb_FieldType_Int64
:
142 case kUpb_FieldType_UInt64
:
145 case kUpb_FieldType_Fixed32
:
146 case kUpb_FieldType_SFixed32
:
147 case kUpb_FieldType_Float
:
150 case kUpb_FieldType_Fixed64
:
151 case kUpb_FieldType_SFixed64
:
152 case kUpb_FieldType_Double
:
155 case kUpb_FieldType_SInt32
:
158 case kUpb_FieldType_SInt64
:
161 case kUpb_FieldType_String
:
164 case kUpb_FieldType_Bytes
:
167 case kUpb_FieldType_Message
:
171 return false; // Not supported yet.
174 if (upb_MiniTableField_IsArray(mt_f
)) {
175 cardinality
= upb_MiniTableField_IsPacked(mt_f
) ? "p" : "r";
176 } else if (upb_MiniTableField_IsScalar(mt_f
)) {
177 cardinality
= upb_MiniTableField_IsInOneof(mt_f
) ? "o" : "s";
179 return false; // Not supported yet (ever?).
182 uint64_t expected_tag
= GetEncodedTag(field
);
187 // |--------|--------|--------|--------|--------|--------|--------|--------|
188 // | offset (16) |case offset (16) |presence| submsg | exp. tag (16) |
189 // |--------|--------|--------|--------|--------|--------|--------|--------|
191 // - |presence| is either hasbit index or field number for oneofs.
194 static_cast<uint64_t>(mt_f
->UPB_PRIVATE(offset
)) << 48 | expected_tag
;
196 if (field
.IsSequence()) {
197 // No hasbit/oneof-related fields.
199 if (field
.real_containing_oneof()) {
200 uint64_t case_offset
= ~mt_f
->presence
;
201 if (case_offset
> 0xffff || field
.number() > 0xff) return false;
202 data
|= field
.number() << 24;
203 data
|= case_offset
<< 32;
205 uint64_t hasbit_index
= 63; // No hasbit (set a high, unused bit).
206 if (mt_f
->presence
) {
207 hasbit_index
= mt_f
->presence
;
208 if (hasbit_index
> 31) return false;
210 data
|= hasbit_index
<< 24;
213 if (field
.ctype() == kUpb_CType_Message
) {
214 uint64_t idx
= mt_f
->UPB_PRIVATE(submsg_index
);
215 if (idx
> 255) return false;
218 std::string size_ceil
= "max";
219 size_t size
= SIZE_MAX
;
220 if (field
.message_type().file() == field
.file()) {
221 // We can only be guaranteed the size of the sub-message if it is in the
222 // same file as us. We could relax this to increase the speed of
223 // cross-file sub-message parsing if we are comfortable requiring that
224 // users compile all messages at the same time.
225 const upb_MiniTable
* sub_mt
= pools
.GetMiniTable64(field
.message_type());
226 size
= sub_mt
->UPB_PRIVATE(size
) + 8;
228 std::vector
<size_t> breaks
= {64, 128, 192, 256};
229 for (auto brk
: breaks
) {
231 size_ceil
= std::to_string(brk
);
235 ent
.first
= absl::Substitute("upb_p$0$1_$2bt_max$3b", cardinality
, type
,
236 expected_tag
> 0xff ? "2" : "1", size_ceil
);
239 ent
.first
= absl::Substitute("upb_p$0$1_$2bt", cardinality
, type
,
240 expected_tag
> 0xff ? "2" : "1");
248 std::vector
<TableEntry
> FastDecodeTable(upb::MessageDefPtr message
,
249 const DefPoolPair
& pools
) {
250 std::vector
<TableEntry
> table
;
251 for (const auto field
: FieldHotnessOrder(message
)) {
253 int slot
= GetTableSlot(field
);
254 // std::cerr << "table slot: " << field->number() << ": " << slot << "\n";
256 // Tag can't fit in the table.
259 if (!TryFillTableEntry(pools
, field
, ent
)) {
260 // Unsupported field type or offset, hasbit index, etc. doesn't fit.
263 while ((size_t)slot
>= table
.size()) {
264 size_t size
= std::max(static_cast<size_t>(1), table
.size() * 2);
265 table
.resize(size
, TableEntry
{"_upb_FastDecoder_DecodeGeneric", 0});
267 if (table
[slot
].first
!= "_upb_FastDecoder_DecodeGeneric") {
268 // A hotter field already filled this slot.
276 } // namespace generator