x86: fix a bug that we misgenerated the the 8-bit imul with a constant
[ajla.git] / layout.c
blob129b26bea0e28376738b7eebaf8d97bdf124d13d
1 /*
2 * Copyright (C) 2024 Mikulas Patocka
4 * This file is part of Ajla.
6 * Ajla is free software: you can redistribute it and/or modify it under the
7 * terms of the GNU General Public License as published by the Free Software
8 * Foundation, either version 3 of the License, or (at your option) any later
9 * version.
11 * Ajla is distributed in the hope that it will be useful, but WITHOUT ANY
12 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
13 * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along with
16 * Ajla. If not, see <https://www.gnu.org/licenses/>.
19 #include "ajla.h"
21 #include "mem_al.h"
22 #include "str.h"
24 #include "layout.h"
26 #define MAX_LOG2_ALIGN 5
27 #define MAX_SIZE 65
29 struct layout_entry {
30 frame_t slots;
31 unsigned char log2_align;
32 frame_t position;
35 struct layout {
36 uchar_efficient_t slot_bits;
37 char_efficient_t flags_per_slot_bits;
38 frame_t alignment;
39 frame_t alignment_offset;
40 struct layout_entry *entries;
41 size_t n_entries;
42 frame_t n_slots;
43 frame_t max_align;
44 frame_t min_idx[MAX_SIZE][MAX_LOG2_ALIGN];
47 static frame_t bitmap_slots(struct layout *l, frame_t n_vars)
49 return (n_vars + (1 << l->flags_per_slot_bits) - 1) >> l->flags_per_slot_bits;
52 static frame_t bitmap_slots_estimate(struct layout *l, frame_t n_vars)
54 return (n_vars - 1) / ((1 << l->flags_per_slot_bits) - 1) + 1;
57 struct layout *layout_start(uchar_efficient_t slot_bits, char_efficient_t flags_per_slot_bits, frame_t alignment, frame_t alignment_offset, ajla_error_t *mayfail)
59 struct layout *l;
61 l = mem_alloc_mayfail(struct layout *, sizeof(struct layout), mayfail);
62 if (unlikely(!l))
63 goto err0;
64 l->slot_bits = slot_bits;
65 l->flags_per_slot_bits = flags_per_slot_bits;
66 ajla_assert_lo(is_power_of_2(alignment) && !((alignment | alignment_offset) & ((1U << l->slot_bits) - 1)), (file_line, "layout_start: invalid alignment %"PRIuMAX", %"PRIuMAX"", (uintmax_t)alignment, (uintmax_t)alignment_offset));
67 l->alignment = alignment >> l->slot_bits;
68 l->alignment_offset = alignment_offset >> l->slot_bits;
69 if (unlikely(!array_init_mayfail(struct layout_entry, &l->entries, &l->n_entries, mayfail)))
70 goto err1;
71 l->max_align = 1U << l->slot_bits;
73 return l;
75 err1:
76 mem_free(l);
77 err0:
78 return NULL;
81 bool layout_add(struct layout *l, frame_t size, frame_t align, ajla_error_t *mayfail)
83 struct layout_entry le;
85 ajla_assert(is_power_of_2(align), (file_line, "layout_add: invalid align %"PRIuMAX"", (uintmax_t)align));
87 if (unlikely(align < 1U << l->slot_bits))
88 align = 1U << l->slot_bits;
89 if (unlikely(align > l->max_align))
90 l->max_align = align;
91 size = round_up(size, 1U << l->slot_bits);
93 le.slots = size >> l->slot_bits;
94 le.log2_align = (unsigned char)log_2(align >> l->slot_bits);
95 le.position = NO_FRAME_T; /* avoid warning */
96 if (!array_add_mayfail(struct layout_entry, &l->entries, &l->n_entries, le, NULL, mayfail))
97 return false;
98 return true;
101 bool layout_compute(struct layout *l, bool linear, ajla_error_t *mayfail)
103 frame_t i, j;
104 frame_t n_slots;
105 frame_t flag_slots;
107 uchar_efficient_t *bitmap = NULL;
108 size_t bitmap_size;
110 #define align_slots \
111 do { \
112 frame_t a = n_slots; \
113 a += -(a + l->alignment_offset) & (l->alignment - 1); \
114 if (unlikely(a < n_slots)) \
115 goto overflow; \
116 n_slots = a; \
117 } while (0)
119 n_slots = 0;
120 if (unlikely(!l->n_entries))
121 goto ret_true;
123 flag_slots = 0;
124 if (l->flags_per_slot_bits >= 0) {
125 for (i = 0; i < l->n_entries; i++) {
126 frame_t a = n_slots + l->entries[i].slots;
127 if (unlikely(a < n_slots))
128 goto overflow;
129 n_slots = a;
132 align_slots;
133 flag_slots = bitmap_slots_estimate(l, n_slots);
134 n_slots += flag_slots;
135 if (unlikely(n_slots < flag_slots))
136 goto overflow;
137 align_slots;
138 flag_slots = bitmap_slots(l, n_slots);
141 again:
142 for (i = 0; i < MAX_SIZE; i++)
143 for (j = 0; j < MAX_LOG2_ALIGN; j++)
144 l->min_idx[i][j] = flag_slots;
146 if (unlikely(!array_init_mayfail(uchar_efficient_t, &bitmap, &bitmap_size, mayfail)))
147 goto ret_false;
149 for (i = 0; i < l->n_entries; i++) {
150 frame_t *ptr, start;
151 struct layout_entry *le = &l->entries[i];
152 if (linear) {
153 ptr = &l->min_idx[0][0];
154 start = *ptr;
155 } else if (likely(le->slots < MAX_SIZE) && likely(le->log2_align < MAX_LOG2_ALIGN)) {
156 ptr = &l->min_idx[le->slots][le->log2_align];
157 start = *ptr;
158 } else {
159 ptr = NULL;
160 start = flag_slots;
163 while (1) {
164 frame_t a;
165 frame_t more = -(start + l->alignment_offset) & ((1U << le->log2_align) - 1);
166 if (unlikely(more != 0))
167 goto search_further;
169 a = start + le->slots;
170 if (unlikely(a < start))
171 goto overflow;
172 for (j = le->slots - 1; j != (frame_t)-1; j--) {
173 if ((frame_t)(start + j) < bitmap_size && bitmap[start + j]) {
174 more = j + 1;
175 goto search_further;
179 while (bitmap_size < (frame_t)(start + le->slots)) {
180 if (unlikely(!array_add_mayfail(uchar_efficient_t, &bitmap, &bitmap_size, 0, NULL, mayfail)))
181 goto ret_false;
182 if (unlikely(bitmap_size != (frame_t)bitmap_size))
183 goto overflow;
185 for (j = 0; j < le->slots; j++)
186 bitmap[start + j] = 1;
187 break;
189 search_further:
190 if (unlikely(start + more < start))
191 goto overflow;
192 start += more;
194 le->position = start;
195 if (likely(ptr != NULL))
196 *ptr = start + le->slots;
199 n_slots = (frame_t)bitmap_size;
200 align_slots;
202 mem_free(bitmap);
203 bitmap = NULL;
205 if (l->flags_per_slot_bits >= 0) {
206 frame_t new_flag_slots = bitmap_slots(l, n_slots);
207 if (unlikely(!new_flag_slots))
208 goto overflow;
209 if (unlikely(new_flag_slots > flag_slots)) {
210 flag_slots = new_flag_slots;
211 goto again;
215 ret_true:
216 if (unlikely(n_slots >= sign_bit(frame_t)))
217 goto overflow;
219 l->n_slots = n_slots;
221 return true;
223 #undef add_slots
225 overflow:
226 fatal_mayfail(error_ajla(EC_ASYNC, AJLA_ERROR_SIZE_OVERFLOW), mayfail, "layout size overflow");
227 ret_false:
228 if (bitmap)
229 mem_free(bitmap);
230 return false;
233 frame_t layout_get(struct layout *l, frame_t idx)
235 ajla_assert_lo(idx < l->n_entries, (file_line, "layout_get: invalid index: %"PRIuMAX" >= %"PRIuMAX"", (uintmax_t)idx, (uintmax_t)l->n_entries));
236 return l->entries[idx].position;
239 frame_t layout_size(const struct layout *l)
241 return l->n_slots;
244 frame_t layout_alignment(const struct layout *l)
246 return l->max_align;
249 void layout_free(struct layout *l)
251 if (likely(l->entries != NULL))
252 mem_free(l->entries);
253 mem_free(l);