changes: document CVT fixes.
[nasm/avx512.git] / macros / smartalign.mac
blobf2ae104b97ba6abf0282b8de77a2aa762612063c
2 ; Smart alignment macros
4 USE: smartalign
6 %imacro alignmode 1-2.nolist
7   %ifidni %1,nop
8     %define __ALIGN_JMP_THRESHOLD__ 16
10     %define __ALIGN_16BIT_1B__ 0x90
11     %define __ALIGN_16BIT_2B__ 0x90,0x90
12     %define __ALIGN_16BIT_3B__ 0x90,0x90,0x90
13     %define __ALIGN_16BIT_4B__ 0x90,0x90,0x90,0x90
14     %define __ALIGN_16BIT_5B__ 0x90,0x90,0x90,0x90,0x90
15     %define __ALIGN_16BIT_6B__ 0x90,0x90,0x90,0x90,0x90,0x90
16     %define __ALIGN_16BIT_7B__ 0x90,0x90,0x90,0x90,0x90,0x90,0x90
17     %define __ALIGN_16BIT_8B__ 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90
19     %define __ALIGN_32BIT_1B__ 0x90
20     %define __ALIGN_32BIT_2B__ 0x90,0x90
21     %define __ALIGN_32BIT_3B__ 0x90,0x90,0x90
22     %define __ALIGN_32BIT_4B__ 0x90,0x90,0x90,0x90
23     %define __ALIGN_32BIT_5B__ 0x90,0x90,0x90,0x90,0x90
24     %define __ALIGN_32BIT_6B__ 0x90,0x90,0x90,0x90,0x90,0x90
25     %define __ALIGN_32BIT_7B__ 0x90,0x90,0x90,0x90,0x90,0x90,0x90
26     %define __ALIGN_32BIT_8B__ 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90
28     %define __ALIGN_64BIT_1B__ 0x90
29     %define __ALIGN_64BIT_2B__ 0x90,0x90
30     %define __ALIGN_64BIT_3B__ 0x90,0x90,0x90
31     %define __ALIGN_64BIT_4B__ 0x90,0x90,0x90,0x90
32     %define __ALIGN_64BIT_5B__ 0x90,0x90,0x90,0x90,0x90
33     %define __ALIGN_64BIT_6B__ 0x90,0x90,0x90,0x90,0x90,0x90
34     %define __ALIGN_64BIT_7B__ 0x90,0x90,0x90,0x90,0x90,0x90,0x90
35     %define __ALIGN_64BIT_8B__ 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90
36   %elifidni %1,generic
37     %define __ALIGN_JMP_THRESHOLD__ 8
39     %define __ALIGN_16BIT_1B__ 0x90
40     %define __ALIGN_16BIT_2B__ 0x89,0xf6
41     %define __ALIGN_16BIT_3B__ 0x8d,0x74,0x00
42     %define __ALIGN_16BIT_4B__ 0x8d,0xb4,0x00,0x00
43     %define __ALIGN_16BIT_5B__ 0x90,0x8d,0xb4,0x00,0x00
44     %define __ALIGN_16BIT_6B__ 0x89,0xf6,0x8d,0xbd,0x00,0x00
45     %define __ALIGN_16BIT_7B__ 0x8d,0x74,0x00,0x8d,0xbd,0x00,0x00
46     %define __ALIGN_16BIT_8B__ 0x8d,0xb4,0x00,0x00,0x8d,0xbd,0x00,0x00
48     %define __ALIGN_32BIT_1B__ 0x90
49     %define __ALIGN_32BIT_2B__ 0x89,0xf6
50     %define __ALIGN_32BIT_3B__ 0x8d,0x76,0x00
51     %define __ALIGN_32BIT_4B__ 0x8d,0x74,0x26,0x00
52     %define __ALIGN_32BIT_5B__ 0x90,0x8d,0x74,0x26,0x00
53     %define __ALIGN_32BIT_6B__ 0x8d,0xb6,0x00,0x00,0x00,0x00
54     %define __ALIGN_32BIT_7B__ 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00
55     %undef  __ALIGN_32BIT_8B__
57     %define __ALIGN_64BIT_1B__ 0x90
58     %define __ALIGN_64BIT_2B__ 0x66,0x90
59     %define __ALIGN_64BIT_3B__ 0x66,0x66,0x90
60     %define __ALIGN_64BIT_4B__ 0x66,0x66,0x66,0x90
61     %define __ALIGN_64BIT_5B__ 0x66,0x66,0x90,0x66,0x90
62     %define __ALIGN_64BIT_6B__ 0x66,0x66,0x90,0x66,0x66,0x90
63     %define __ALIGN_64BIT_7B__ 0x66,0x66,0x66,0x90,0x66,0x66,0x90
64     %define __ALIGN_64BIT_8B__ 0x66,0x66,0x66,0x90,0x66,0x66,0x66,0x90
65   %elifidni %1,k8
66     %define __ALIGN_JMP_THRESHOLD__ 16
68     %define __ALIGN_16BIT_1B__ 0x90
69     %define __ALIGN_16BIT_2B__ 0x66,0x90
70     %define __ALIGN_16BIT_3B__ 0x66,0x66,0x90
71     %define __ALIGN_16BIT_4B__ 0x66,0x66,0x66,0x90
72     %define __ALIGN_16BIT_5B__ 0x66,0x66,0x90,0x66,0x90
73     %define __ALIGN_16BIT_6B__ 0x66,0x66,0x90,0x66,0x66,0x90
74     %define __ALIGN_16BIT_7B__ 0x66,0x66,0x66,0x90,0x66,0x66,0x90
75     %define __ALIGN_16BIT_8B__ 0x66,0x66,0x66,0x90,0x66,0x66,0x66,0x90
77     %define __ALIGN_32BIT_1B__ 0x90
78     %define __ALIGN_32BIT_2B__ 0x66,0x90
79     %define __ALIGN_32BIT_3B__ 0x66,0x66,0x90
80     %define __ALIGN_32BIT_4B__ 0x66,0x66,0x66,0x90
81     %define __ALIGN_32BIT_5B__ 0x66,0x66,0x90,0x66,0x90
82     %define __ALIGN_32BIT_6B__ 0x66,0x66,0x90,0x66,0x66,0x90
83     %define __ALIGN_32BIT_7B__ 0x66,0x66,0x66,0x90,0x66,0x66,0x90
84     %define __ALIGN_32BIT_8B__ 0x66,0x66,0x66,0x90,0x66,0x66,0x66,0x90
86     %define __ALIGN_64BIT_1B__ 0x90
87     %define __ALIGN_64BIT_2B__ 0x66,0x90
88     %define __ALIGN_64BIT_3B__ 0x66,0x66,0x90
89     %define __ALIGN_64BIT_4B__ 0x66,0x66,0x66,0x90
90     %define __ALIGN_64BIT_5B__ 0x66,0x66,0x90,0x66,0x90
91     %define __ALIGN_64BIT_6B__ 0x66,0x66,0x90,0x66,0x66,0x90
92     %define __ALIGN_64BIT_7B__ 0x66,0x66,0x66,0x90,0x66,0x66,0x90
93     %define __ALIGN_64BIT_8B__ 0x66,0x66,0x66,0x90,0x66,0x66,0x66,0x90
94   %elifidni %1,k7
95     %define __ALIGN_JMP_THRESHOLD__ 16
97     %define __ALIGN_16BIT_1B__ 0x90
98     %define __ALIGN_16BIT_2B__ 0x66,0x90
99     %define __ALIGN_16BIT_3B__ 0x66,0x66,0x90
100     %define __ALIGN_16BIT_4B__ 0x66,0x66,0x66,0x90
101     %define __ALIGN_16BIT_5B__ 0x66,0x66,0x90,0x66,0x90
102     %define __ALIGN_16BIT_6B__ 0x66,0x66,0x90,0x66,0x66,0x90
103     %define __ALIGN_16BIT_7B__ 0x66,0x66,0x66,0x90,0x66,0x66,0x90
104     %define __ALIGN_16BIT_8B__ 0x66,0x66,0x66,0x90,0x66,0x66,0x66,0x90
106     %define __ALIGN_32BIT_1B__ 0x90
107     %define __ALIGN_32BIT_2B__ 0x8b,0xc0
108     %define __ALIGN_32BIT_3B__ 0x8d,0x04,0x20
109     %define __ALIGN_32BIT_4B__ 0x8d,0x44,0x20,0x00
110     %define __ALIGN_32BIT_5B__ 0x8d,0x44,0x20,0x00,0x90
111     %define __ALIGN_32BIT_6B__ 0x8d,0x80,0x00,0x00,0x00,0x00
112     %define __ALIGN_32BIT_7B__ 0x8d,0x04,0x05,0x00,0x00,0x00,0x00
113     %undef  __ALIGN_32BIT_8B__
115     %define __ALIGN_64BIT_1B__ 0x90
116     %define __ALIGN_64BIT_2B__ 0x66,0x90
117     %define __ALIGN_64BIT_3B__ 0x66,0x66,0x90
118     %define __ALIGN_64BIT_4B__ 0x66,0x66,0x66,0x90
119     %define __ALIGN_64BIT_5B__ 0x66,0x66,0x90,0x66,0x90
120     %define __ALIGN_64BIT_6B__ 0x66,0x66,0x90,0x66,0x66,0x90
121     %define __ALIGN_64BIT_7B__ 0x66,0x66,0x66,0x90,0x66,0x66,0x90
122     %define __ALIGN_64BIT_8B__ 0x66,0x66,0x66,0x90,0x66,0x66,0x66,0x90
123   %elifidni %1,p6
124     %define __ALIGN_JMP_THRESHOLD__ 16
126     %define __ALIGN_16BIT_1B__ 0x90
127     %define __ALIGN_16BIT_2B__ 0x66,0x90
128     %define __ALIGN_16BIT_3B__ 0x0f,0x1f,0x00
129     %define __ALIGN_16BIT_4B__ 0x0f,0x1f,0x40,0x00
130     %define __ALIGN_16BIT_5B__ 0x90,0x0f,0x1f,0x40,0x00
131     %define __ALIGN_16BIT_6B__ 0x0f,0x1f,0x00,0x0f,0x1f,0x00
132     %define __ALIGN_16BIT_7B__ 0x0f,0x1f,0x00,0x0f,0x1f,0x40,0x00
133     %define __ALIGN_16BIT_8B__ 0x0f,0x1f,0x40,0x00,0x0f,0x1f,0x40,0x00
135     %define __ALIGN_32BIT_1B__ 0x90
136     %define __ALIGN_32BIT_2B__ 0x66,0x90
137     %define __ALIGN_32BIT_3B__ 0x0f,0x1f,0x00
138     %define __ALIGN_32BIT_4B__ 0x0f,0x1f,0x40,0x00
139     %define __ALIGN_32BIT_5B__ 0x0f,0x1f,0x44,0x00,0x00
140     %define __ALIGN_32BIT_6B__ 0x66,0x0f,0x1f,0x44,0x00,0x00
141     %define __ALIGN_32BIT_7B__ 0x0f,0x1f,0x80,0x00,0x00,0x00,0x00
142     %define __ALIGN_32BIT_8B__ 0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00
144     %define __ALIGN_64BIT_1B__ 0x90
145     %define __ALIGN_64BIT_2B__ 0x66,0x90
146     %define __ALIGN_64BIT_3B__ 0x0f,0x1f,0x00
147     %define __ALIGN_64BIT_4B__ 0x0f,0x1f,0x40,0x00
148     %define __ALIGN_64BIT_5B__ 0x0f,0x1f,0x44,0x00,0x00
149     %define __ALIGN_64BIT_6B__ 0x66,0x0f,0x1f,0x44,0x00,0x00
150     %define __ALIGN_64BIT_7B__ 0x0f,0x1f,0x80,0x00,0x00,0x00,0x00
151     %define __ALIGN_64BIT_8B__ 0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00
152   %else
153     %error unknown alignment mode: %1
154   %endif
155   %ifnempty %2
156     %xdefine __ALIGN_JMP_THRESHOLD__ %2
157   %endif
158   %xdefine __ALIGNMODE__ %1,__ALIGN_JMP_THRESHOLD__
159 %endmacro
161 %unimacro align 1-2+.nolist
162 %imacro align 1-2+.nolist
163   %ifnempty %2
164     times (((%1) - (($-$$) % (%1))) % (%1)) %2
165   %else
166     %push
167     %assign %$pad (((%1) - (($-$$) % (%1))) % (%1))
168     %if %$pad > __ALIGN_JMP_THRESHOLD__
169       jmp %$end
170       ; We can't re-use %$pad here as $ will have changed!
171       times (((%1) - (($-$$) % (%1))) % (%1)) nop
172 %$end:
173     %else
174       %if __BITS__ == 16
175         %ifdef __ALIGN_16BIT_8B__
176           %rep %$pad / 8
177             db __ALIGN_16BIT_8B__
178           %endrep
179           %assign %$pad %$pad % 8
180         %else
181           %rep %$pad / 7
182             db __ALIGN_16BIT_7B__
183           %endrep
184           %assign %$pad %$pad % 7
185         %endif
186         %if %$pad == 1
187           db __ALIGN_16BIT_1B__
188         %elif %$pad == 2
189           db __ALIGN_16BIT_2B__
190         %elif %$pad == 3
191           db __ALIGN_16BIT_3B__
192         %elif %$pad == 4
193           db __ALIGN_16BIT_4B__
194         %elif %$pad == 5
195           db __ALIGN_16BIT_5B__
196         %elif %$pad == 6
197           db __ALIGN_16BIT_6B__
198         %elif %$pad == 7
199           db __ALIGN_16BIT_7B__
200         %endif
201       %elif __BITS__ == 32
202         %ifdef __ALIGN_32BIT_8B__
203           %rep %$pad / 8
204             db __ALIGN_32BIT_8B__
205           %endrep
206           %assign %$pad %$pad % 8
207         %else
208           %rep %$pad / 7
209             db __ALIGN_32BIT_7B__
210           %endrep
211           %assign %$pad %$pad % 7
212         %endif
213         %if %$pad == 1
214           db __ALIGN_32BIT_1B__
215         %elif %$pad == 2
216           db __ALIGN_32BIT_2B__
217         %elif %$pad == 3
218           db __ALIGN_32BIT_3B__
219         %elif %$pad == 4
220           db __ALIGN_32BIT_4B__
221         %elif %$pad == 5
222           db __ALIGN_32BIT_5B__
223         %elif %$pad == 6
224           db __ALIGN_32BIT_6B__
225         %elif %$pad == 7
226           db __ALIGN_32BIT_7B__
227         %endif
228       %elif __BITS__ == 64
229         %ifdef __ALIGN_64BIT_8B__
230           %rep %$pad / 8
231             db __ALIGN_64BIT_8B__
232           %endrep
233           %assign %$pad %$pad % 8
234         %else
235           %rep %$pad / 7
236             db __ALIGN_64BIT_7B__
237           %endrep
238           %assign %$pad %$pad % 7
239         %endif
240         %if %$pad == 1
241           db __ALIGN_64BIT_1B__
242         %elif %$pad == 2
243           db __ALIGN_64BIT_2B__
244         %elif %$pad == 3
245           db __ALIGN_64BIT_3B__
246         %elif %$pad == 4
247           db __ALIGN_64BIT_4B__
248         %elif %$pad == 5
249           db __ALIGN_64BIT_5B__
250         %elif %$pad == 6
251           db __ALIGN_64BIT_6B__
252         %elif %$pad == 7
253           db __ALIGN_64BIT_7B__
254         %endif
255       %else
256         %error "Invalid __BITS__ value"
257       %endif
258     %endif
259     %pop
260   %endif
261 %endmacro
263         alignmode generic