Automated rollback of commit 694d7935747a5b4eac351b757ad8ff22599c8077.
[google-protobuf.git] / upb_generator / minitable / fasttable.cc
blob9986d01f5ac7784047e9ceb222a03b7bed8c5803
1 // Copyright (c) 2009-2024, Google LLC
2 // All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
8 #include "upb_generator/minitable/fasttable.h"
10 #include <algorithm>
11 #include <cstddef>
12 #include <cstdint>
13 #include <cstring>
14 #include <string>
15 #include <utility>
16 #include <vector>
18 #include "absl/strings/substitute.h"
19 #include "upb/base/descriptor_constants.h"
20 #include "upb/mini_table/field.h"
21 #include "upb/mini_table/message.h"
22 #include "upb/reflection/def.hpp"
23 #include "upb/wire/types.h"
24 #include "upb_generator/file_layout.h"
26 // Must be last.
27 #include "upb/port/def.inc"
29 namespace upb {
30 namespace generator {
32 namespace {
34 // Returns fields in order of "hotness", eg. how frequently they appear in
35 // serialized payloads. Ideally this will use a profile. When we don't have
36 // that, we assume that fields with smaller numbers are used more frequently.
37 inline std::vector<upb::FieldDefPtr> FieldHotnessOrder(
38 upb::MessageDefPtr message) {
39 std::vector<upb::FieldDefPtr> fields;
40 size_t field_count = message.field_count();
41 fields.reserve(field_count);
42 for (size_t i = 0; i < field_count; i++) {
43 fields.push_back(message.field(i));
45 std::sort(fields.begin(), fields.end(),
46 [](upb::FieldDefPtr a, upb::FieldDefPtr b) {
47 return std::make_pair(!a.is_required(), a.number()) <
48 std::make_pair(!b.is_required(), b.number());
49 });
50 return fields;
53 typedef std::pair<std::string, uint64_t> TableEntry;
55 uint32_t GetWireTypeForField(upb::FieldDefPtr field) {
56 if (field.packed()) return kUpb_WireType_Delimited;
57 switch (field.type()) {
58 case kUpb_FieldType_Double:
59 case kUpb_FieldType_Fixed64:
60 case kUpb_FieldType_SFixed64:
61 return kUpb_WireType_64Bit;
62 case kUpb_FieldType_Float:
63 case kUpb_FieldType_Fixed32:
64 case kUpb_FieldType_SFixed32:
65 return kUpb_WireType_32Bit;
66 case kUpb_FieldType_Int64:
67 case kUpb_FieldType_UInt64:
68 case kUpb_FieldType_Int32:
69 case kUpb_FieldType_Bool:
70 case kUpb_FieldType_UInt32:
71 case kUpb_FieldType_Enum:
72 case kUpb_FieldType_SInt32:
73 case kUpb_FieldType_SInt64:
74 return kUpb_WireType_Varint;
75 case kUpb_FieldType_Group:
76 return kUpb_WireType_StartGroup;
77 case kUpb_FieldType_Message:
78 case kUpb_FieldType_String:
79 case kUpb_FieldType_Bytes:
80 return kUpb_WireType_Delimited;
82 UPB_UNREACHABLE();
85 uint32_t MakeTag(uint32_t field_number, uint32_t wire_type) {
86 return field_number << 3 | wire_type;
89 size_t WriteVarint32ToArray(uint64_t val, char* buf) {
90 size_t i = 0;
91 do {
92 uint8_t byte = val & 0x7fU;
93 val >>= 7;
94 if (val) byte |= 0x80U;
95 buf[i++] = byte;
96 } while (val);
97 return i;
100 uint64_t GetEncodedTag(upb::FieldDefPtr field) {
101 uint32_t wire_type = GetWireTypeForField(field);
102 uint32_t unencoded_tag = MakeTag(field.number(), wire_type);
103 char tag_bytes[10] = {0};
104 WriteVarint32ToArray(unencoded_tag, tag_bytes);
105 uint64_t encoded_tag = 0;
106 memcpy(&encoded_tag, tag_bytes, sizeof(encoded_tag));
107 // TODO: byte-swap for big endian.
108 return encoded_tag;
111 int GetTableSlot(upb::FieldDefPtr field) {
112 uint64_t tag = GetEncodedTag(field);
113 if (tag > 0x7fff) {
114 // Tag must fit within a two-byte varint.
115 return -1;
117 return (tag & 0xf8) >> 3;
120 bool TryFillTableEntry(const DefPoolPair& pools, upb::FieldDefPtr field,
121 TableEntry& ent) {
122 const upb_MiniTable* mt = pools.GetMiniTable64(field.containing_type());
123 const upb_MiniTableField* mt_f =
124 upb_MiniTable_FindFieldByNumber(mt, field.number());
125 std::string type = "";
126 std::string cardinality = "";
127 switch (upb_MiniTableField_Type(mt_f)) {
128 case kUpb_FieldType_Bool:
129 type = "b1";
130 break;
131 case kUpb_FieldType_Enum:
132 if (upb_MiniTableField_IsClosedEnum(mt_f)) {
133 // We don't have the means to test proto2 enum fields for valid values.
134 return false;
136 [[fallthrough]];
137 case kUpb_FieldType_Int32:
138 case kUpb_FieldType_UInt32:
139 type = "v4";
140 break;
141 case kUpb_FieldType_Int64:
142 case kUpb_FieldType_UInt64:
143 type = "v8";
144 break;
145 case kUpb_FieldType_Fixed32:
146 case kUpb_FieldType_SFixed32:
147 case kUpb_FieldType_Float:
148 type = "f4";
149 break;
150 case kUpb_FieldType_Fixed64:
151 case kUpb_FieldType_SFixed64:
152 case kUpb_FieldType_Double:
153 type = "f8";
154 break;
155 case kUpb_FieldType_SInt32:
156 type = "z4";
157 break;
158 case kUpb_FieldType_SInt64:
159 type = "z8";
160 break;
161 case kUpb_FieldType_String:
162 type = "s";
163 break;
164 case kUpb_FieldType_Bytes:
165 type = "b";
166 break;
167 case kUpb_FieldType_Message:
168 type = "m";
169 break;
170 default:
171 return false; // Not supported yet.
174 if (upb_MiniTableField_IsArray(mt_f)) {
175 cardinality = upb_MiniTableField_IsPacked(mt_f) ? "p" : "r";
176 } else if (upb_MiniTableField_IsScalar(mt_f)) {
177 cardinality = upb_MiniTableField_IsInOneof(mt_f) ? "o" : "s";
178 } else {
179 return false; // Not supported yet (ever?).
182 uint64_t expected_tag = GetEncodedTag(field);
184 // Data is:
186 // 48 32 16 0
187 // |--------|--------|--------|--------|--------|--------|--------|--------|
188 // | offset (16) |case offset (16) |presence| submsg | exp. tag (16) |
189 // |--------|--------|--------|--------|--------|--------|--------|--------|
191 // - |presence| is either hasbit index or field number for oneofs.
193 uint64_t data =
194 static_cast<uint64_t>(mt_f->UPB_PRIVATE(offset)) << 48 | expected_tag;
196 if (field.IsSequence()) {
197 // No hasbit/oneof-related fields.
199 if (field.real_containing_oneof()) {
200 uint64_t case_offset = ~mt_f->presence;
201 if (case_offset > 0xffff || field.number() > 0xff) return false;
202 data |= field.number() << 24;
203 data |= case_offset << 32;
204 } else {
205 uint64_t hasbit_index = 63; // No hasbit (set a high, unused bit).
206 if (mt_f->presence) {
207 hasbit_index = mt_f->presence;
208 if (hasbit_index > 31) return false;
210 data |= hasbit_index << 24;
213 if (field.ctype() == kUpb_CType_Message) {
214 uint64_t idx = mt_f->UPB_PRIVATE(submsg_index);
215 if (idx > 255) return false;
216 data |= idx << 16;
218 std::string size_ceil = "max";
219 size_t size = SIZE_MAX;
220 if (field.message_type().file() == field.file()) {
221 // We can only be guaranteed the size of the sub-message if it is in the
222 // same file as us. We could relax this to increase the speed of
223 // cross-file sub-message parsing if we are comfortable requiring that
224 // users compile all messages at the same time.
225 const upb_MiniTable* sub_mt = pools.GetMiniTable64(field.message_type());
226 size = sub_mt->UPB_PRIVATE(size) + 8;
228 std::vector<size_t> breaks = {64, 128, 192, 256};
229 for (auto brk : breaks) {
230 if (size <= brk) {
231 size_ceil = std::to_string(brk);
232 break;
235 ent.first = absl::Substitute("upb_p$0$1_$2bt_max$3b", cardinality, type,
236 expected_tag > 0xff ? "2" : "1", size_ceil);
238 } else {
239 ent.first = absl::Substitute("upb_p$0$1_$2bt", cardinality, type,
240 expected_tag > 0xff ? "2" : "1");
242 ent.second = data;
243 return true;
246 } // namespace
248 std::vector<TableEntry> FastDecodeTable(upb::MessageDefPtr message,
249 const DefPoolPair& pools) {
250 std::vector<TableEntry> table;
251 for (const auto field : FieldHotnessOrder(message)) {
252 TableEntry ent;
253 int slot = GetTableSlot(field);
254 // std::cerr << "table slot: " << field->number() << ": " << slot << "\n";
255 if (slot < 0) {
256 // Tag can't fit in the table.
257 continue;
259 if (!TryFillTableEntry(pools, field, ent)) {
260 // Unsupported field type or offset, hasbit index, etc. doesn't fit.
261 continue;
263 while ((size_t)slot >= table.size()) {
264 size_t size = std::max(static_cast<size_t>(1), table.size() * 2);
265 table.resize(size, TableEntry{"_upb_FastDecoder_DecodeGeneric", 0});
267 if (table[slot].first != "_upb_FastDecoder_DecodeGeneric") {
268 // A hotter field already filled this slot.
269 continue;
271 table[slot] = ent;
273 return table;
276 } // namespace generator
277 } // namespace upb