1 ; vim
: set filetype
=fasm foldmethod
=marker commentstring
=;
%s colorcolumn
=101 :
2 ; XXX
: We actually used this code unit as
a training ground. Expect it to
be very overkill.
3 relations
: namespace relations
4 ;
{{{ init_once
-------------------------------------------------------------------------------------
7 TRACE
'PBF:BLK:PRIMBLK:PRIMGRP:RELATIONS:INIT_ONCE'
12 mov rsi
, array.bytes_n
13 mov rdx
, 1b or 10b ; PROT_READ | PROT_WRITE
14 mov
r10, 0x22 ; MAP_PRIVATE | MAP_ANONYMOUS
19 cmp rax
, linux.errno.last
20 jae
.err_relations_mmap_failed
22 mov
[array.finish
], rax
24 lea rdi
, [msg.idx_relations_n
]
26 call qword
[libc.printf
]
29 mov rsi
, array.idx.bytes_n
30 mov rdx
, 1b or 10b ; PROT_READ | PROT_WRITE
31 mov
r10, 0x22 ; MAP_PRIVATE | MAP_ANONYMOUS
36 cmp rax
, linux.errno.last
37 jae
.err_relations_idx_mmap_failed
40 mov
[array.idx.finish
], rax ; next availabe idx slot
45 .err_relations_mmap_failed:
48 lea rsi
, [msg.mmap_failed
]
49 call qword
[libc.dprintf
]
53 .err_relations_idx_mmap_failed:
56 lea rsi
, [msg.idx_mmap_failed
]
57 call qword
[libc.dprintf
]
61 ;
}}} init_once
-------------------------------------------------------------------------------------
63 define memcpy_dst rbx ; memcpy_dst
(out
) = memcpy_dst
(in
) + memcpy_bytes_n
64 define memcpy_src
r11 ; clobbered
, don
't care
65 define memcpy_bytes_n r9 ; clobbered, don't care
66 define memcpy_link rcx ; the return addr
67 ;
===================================================================================================
68 ;
{{{ parse
-----------------------------------------------------------------------------------------
69 ; scratch ~ rax rcx
r11 r9
71 define relations_p rbx ; in
72 define relations_finish_p
r13 ; in
73 define flags
r15 ; in
/out
74 define strtbl_idx_p
r8
75 define lat_of_lon_of xmm3 ; f64
/f64
78 define keys_p
r14 ; can
be 0
79 define keys_p_d r14d ; can
be 0
80 define keys_finish_p rbp
81 define vals_p
r10 ; num of vals is num of keys
82 define memids_p rdx ; memids
= MEMber IDs
84 define memids_finish_p rsi
85 define types_p rdi ; num of types is num of memids
86 define roles_p_xmm xmm2 ; the role of
a member is
a string
89 xor keys_p_d
, keys_p_d ; if no
(key
/val
)
90 xor memids_p_d
, memids_p_d ; presume we can have no memids
(should
not happen though
)
93 ;
"relationS" is plural because of the primgrp field name
, but there is
94 ; actually only one
"relation" which will
be repeated
95 cmp relations_p
, relations_finish_p
97 varint_load
r11, relations_p ; msg key
98 mov
r9, r11 ;
= msg key copy
99 shr
r11, 3 ;
= field num
112 and r9, 111b ; field type
113 TRACE
'PBF:BLK:PRIMBLK:PRIMGRP:RELATIONS:MSG:FIELD %lu OF TYPE %lu', r11, r9
114 val_skip relations_p
, r9, r11
118 varint_load id
, relations_p
122 varint_load
r11, relations_p ; keys_sz
123 mov keys_p
, relations_p
124 TRACE
'PBF:BLK:PRIMBLK:PRIMGRP:RELATIONS:KEYS:start = %p:size = %lu(0x%lx) bytes', relations_p
, r11, r11
126 mov keys_finish_p
, relations_p
130 varint_load
r11, relations_p ; vals_sz
131 mov vals_p
, relations_p
132 TRACE
'PBF:BLK:PRIMBLK:PRIMGRP:RELATIONS:VALS:start = %p:size = %lu(0x%lx) bytes', vals_p
, r11, r11
137 varint_load
r11, relations_p ; memids_sz
138 mov memids_p
, relations_p
139 TRACE
'PBF:BLK:PRIMBLK:PRIMGRP:RELATIONS:MEMIDS:start = %p:size = %lu(0x%lx) bytes', memids_p
, r11, r11
141 mov memids_finish_p
, relations_p
145 varint_load
r11, relations_p ; types_sz
146 mov types_p
, relations_p
147 TRACE
'PBF:BLK:PRIMBLK:PRIMGRP:RELATIONS:TYPES:start = %p:size = %lu(0x%lx) bytes', types_p
, r11, r11
152 varint_load
r11, relations_p ; roles_sz
153 TRACE
'PBF:BLK:PRIMBLK:PRIMGRP:RELATIONS:ROLES:start = %p:size = %lu(0x%lx) bytes', relations_p
, r11, r11
154 movq roles_p_xmm
, relations_p
157 purge relations_finish_p
159 ;
}}} parse
-----------------------------------------------------------------------------------------
160 ;
{{{ unserialize
-----------------------------------------------------------------------------------
161 ; scratch ~ rax rcx
r11 r9
163 ;define flags
r15 ; in
/out
164 ;define strtbl_idx_p
r8
165 ;define lat_of_lon_of xmm3 ; f64
/f64
168 ;define keys_p
r14 ; can
be 0
169 ;define keys_p_d r14d ; can
be 0
170 ;define keys_finish_p rbp
171 ;define vals_p
r10 ; num of vals is num of keys
172 ;define memids_p rdx ; memids
= MEMber IDs
173 ;define memids_p_d edx
174 ;define memids_finish_p rsi
175 ;define types_p rdi ; num of types is num of memids
176 ;define roles_p_xmm xmm2 ; the role of
a member is
a string
178 define relation_p rbx
179 define relation_start_p
r13
182 mov relations_p
, [array.finish
]
183 mov relation_start_p
, relations_p
184 ;
{{{ keys_val_cpy
----------------------------------------------------------------------------------
186 add relation_p
, array.relation_t.keys_vals ; relation_p points on the start of the keys_vals section
188 jz unserialize.keys_vals_done
192 cmp keys_p
, keys_finish_p
193 je unserialize.keys_vals_done
195 varint_load
r11, keys_p ;
= key_id
, keys_p points on next key_id
196 mov
r11, [strtbl_idx_p
+ 8 * r11] ;
= key_src_addr
197 varint_load
r9, r11 ;
r9 = key_bytes_n
, r11 = key_src_addr
198 mov
[relation_p
], r9b ; store the byte containing the sz of the key str in bytes
200 ; memcpy_dst
= relation_p
(rbx
), memcpy_src
= key_src_addr
(r11), memcpy_bytes_n
= key_bytes_n
(r9)
201 lea memcpy_link
, [.key_cpy_done]
202 jmp memcpy ; relation_p
= relation_p
+ key_bytes_n
205 varint_load
r11, vals_p ;
= val_id
, vals_p points on next val_id
206 mov
r11, [strtbl_idx_p
+ 8 * r11] ;
= val_src_addr
207 varint_load
r9, r11;
r9 = val_bytes_n
, r11 = val_src_addr
208 mov
[relation_p
], r9w ; store the short containing the sz of the val str in bytes
210 ; memcpy_dst
= relation_p
(rbx
), memcpy_src
= val_src_addr
(r11), memcpy_bytes_n
= val_bytes_n
(r9)
211 lea memcpy_link
, [.next_key_val]
212 jmp memcpy ; relation_p
= relation_p
+ key_bytes_n
213 ;
}}} keys_val_cpy
----------------------------------------------------------------------------------
215 unserialize.keys_vals_done
:
216 mov byte
[relation_p
], 0 ; insert the
0-szed key
/terminator
218 ; here relation_p points on the start of the section of members
219 mov
[relation_start_p
+ array.relation_t.members
], relation_p
220 test memids_p
, memids_p
221 jz unserialize.members_done ; unlikely
226 ;
{{{ members
---------------------------------------------------------------------------------------
227 ; scratch ~ rax rcx
r11 r9
229 ;define flags
r15 ; in
/out
230 ;define strtbl_idx_p
r8
231 ;define lat_of_lon_of xmm3 ; f64
/f64
234 ;define memids_p rdx ; memids
= MEMber IDs
235 ;define memids_p_d edx
236 ;define memids_finish_p rsi
237 ;define types_p rdi ; num of types is num of memids
238 ;define roles_p_xmm xmm2 ; the role of
a member is
a string
239 ; local from unserialize
240 ;define relation_p rbx
241 ;define relation_start_p
r13
243 define roles_p
r14 ; get it from xmm roles_p_xmm
244 define prev_memid rbp
245 define prev_memid_d ebp
246 define member_p relation_p ; alias
247 define member_p.p member_p
+ array.relation_t.member_t.p
248 define member_p.type member_p
+ array.relation_t.member_t.type
250 movq roles_p
, roles_p_xmm
251 xor prev_memid_d
, prev_memid_d ; prev_id
= 0
255 cmp memids_p
, memids_finish_p
256 je unserialize.members_done
258 ; decode the relation id
-------------------------------------------------------------------
259 varint_load
r11, memids_p ; load
/skip the raw zigzag delta_refs
264 xor r11, rax ;
(i
>> 1) ^
-(i
& 1) ;
= delta_id
265 add prev_memid
, r11 ;
= prev_memid
= memid because DELTA encoded
266 mov
[member_p.p
], prev_memid ; store memid
267 ; decode the type
--------------------------------------------------------------------------
268 varint_load
r11, types_p
269 mov byte
[member_p.type
], r11b ;
a byte in enough
270 ; cpy the role str
-------------------------------------------------------------------------
271 add member_p
, array.relation_t.member_t.role ; member_p points on the first byte of the role str
272 varint_load
r11, roles_p ;
= role_str_id
273 mov
r11, [strtbl_idx_p
+ 8 * r11] ; role_src_addr
274 varint_load
r9, r11 ;
r9 = role_bytes_n
, r11 = role_src_addr
275 mov
[member_p
], r9b ; insert the byte containing the sz of the role str in bytes
277 ; memcpy_dst
= member_p
(rbx
), memcpy_src
= role_src_addr
(r11), memcpy_bytes_n
= role_bytes_n
(r9)
278 lea memcpy_link
, [.next_member]
279 jmp memcpy ; member_p
= member_p
+ role_bytes_n
286 ;
}}} members
---------------------------------------------------------------------------------------
288 purge memids_finish_p
292 unserialize.members_done
:
293 ; here relation_p points on the terminating member
(only
a 0xff type
and nothing else
)
294 mov byte
[relation_p
], 0xff ; the terminator type is at the start of relation_p
295 inc relation_p ;
= next relation
296 mov
[relation_start_p
+ array.relation_t.next
], relation_p
297 mov
[array.finish
], relation_p
299 ;
{{{ idx_insert_node
-------------------------------------------------------------------------------
302 ;define flags
r15 ; in
/out
303 ;define strtbl_idx_p
r8
304 ;define lat_of_lon_of xmm3 ; f64
/f64
307 ; local from unserialize
308 ;define relation_start_p
r13
310 define idx_finish_p rbx
311 define id_bit_idx
r14
312 define id_bit_idx_d r14d
313 define bit_val_select rcx ; we use rcx here because we may use rcx compressed instructions
314 define bit_val_select_d ecx
315 define bit_val_select_b cl
316 define id_bit_idx_msb rbp
319 ;define idx_slot_p.relation idx_slot_p
+ array.idx.slot_t.relation
321 mov idx_slot_p
, [array.idx
]
322 mov idx_finish_p
, [array.idx.finish
]
323 TRACE_RELATIONS_IDX
'id=%#016lx nodes.idx=%p nodes.finish=%p', id
, idx_slot_p
, idx_finish_p
324 xor id_bit_idx_d
, id_bit_idx_d
325 xor bit_val_select_d
, bit_val_select_d
326 mov id_bit_idx_msb
, -1 ; this is to make it work with id
= 0
327 bsr id_bit_idx_msb
, id ; if id
= 0, id_bit_idx_msb is untouched namely
-1
328 TRACE_RELATIONS_IDX
'id_bit_idx_msb=%lu', id_bit_idx_msb
331 TRACE_RELATIONS_IDX
'id_bit_idx=%lu', id_bit_idx
332 cmp id_bit_idx_msb
, id_bit_idx
335 setc bit_val_select_b ; bit_val_select
= 1 if id
[id_bit_idx
] = 1, else
0
336 TRACE_RELATIONS_IDX
'bit_val_select=%lu', bit_val_select
337 mov rax
, [idx_slot_p
+ 8 * bit_val_select
]
338 TRACE_RELATIONS_IDX
'slot=%p bit slot=%p', idx_slot_p
, rax
340 jnz
.idx_existing_slot
341 TRACE_RELATIONS_IDX
'non existing slot using finish=%p', idx_finish_p
342 mov rax
, idx_finish_p
343 add idx_finish_p
, array.idx.slot_t.bytes_n ; we could zero the mem here
, but mmap does it for us
345 prefetchw
[idx_finish_p
+ 64 * 2] ; try to get ready
346 prefetchw
[idx_finish_p
+ 64 * 3]
348 mov
[idx_slot_p
+ 8 * bit_val_select
], rax
352 TRACE_RELATIONS_IDX
'next_slot=%p', idx_slot_p
358 if TRACE_RELATIONS_IDX_ENABLED
359 mov rax
, [idx_slot_p
+ array.idx.slot_t.relation
]
361 jz
.relation_slot_available
362 TRACE_RELATIONS_IDX
'WARNING: overwritting an existing relation=%p', rax
363 .relation_slot_available:
364 TRACE_RELATIONS_IDX
'inserting relation=%p in slot=%p', relation_start_p
, idx_slot_p
366 mov
[idx_slot_p
+ array.idx.slot_t.relation
], relation_start_p
367 ; XXX
: if one day we need to
be backed by
a file we will need to DONT_NEED
"madvise" the
368 mov
[array.idx.finish
], idx_finish_p
369 ; the current relation memory
372 purge bit_val_select_b
373 purge bit_val_select_d
378 ;
}}} idx_insert_node
-------------------------------------------------------------------------------
379 purge relation_start_p
384 jmp primgrp.parse.return_from_relations
385 ;
}}} unserialize
-----------------------------------------------------------------------------------
386 ;
{{{ local memcpy
(sse
) -----------------------------------------------------------------------------
387 ; XXX
: lddqu performs aligned reads
, carefull where you use this
, but should
be fine with classic
389 ; scratch ~ rax xmm7 xmm6 xmm5 xmm4
392 cmp memcpy_bytes_n
, 16 * 4
394 ; since we can
be cache line mis-aligned
, speculatively do prefetch
2 cls ahead
395 prefetchnta
[memcpy_src
+ 64 * 2] ; speculative non-temporal
"3rd" cache line ahead
396 prefetchnta
[memcpy_src
+ 64 * 3] ; speculative non-temporal
"4th" cache line ahead
398 prefetchw
[memcpy_dst
+ 64 * 2]
399 prefetchw
[memcpy_dst
+ 64 * 3]
401 lddqu xmm7
, [memcpy_src
+ 16 * 0]
402 lddqu xmm6
, [memcpy_src
+ 16 * 1]
403 lddqu xmm5
, [memcpy_src
+ 16 * 2]
404 lddqu xmm4
, [memcpy_src
+ 16 * 3]
406 movdqu
[memcpy_dst
+ 16 * 0], xmm7
407 movdqu
[memcpy_dst
+ 16 * 1], xmm6
408 movdqu
[memcpy_dst
+ 16 * 2], xmm5
409 movdqu
[memcpy_dst
+ 16 * 3], xmm4
411 add memcpy_dst
, 16 * 4
412 sub memcpy_bytes_n
, 16 * 4
414 add memcpy_src
, 16 * 4
418 cmp memcpy_bytes_n
, 16 * 3
421 lddqu xmm7
, [memcpy_src
+ 16 * 0]
422 lddqu xmm6
, [memcpy_src
+ 16 * 1]
423 lddqu xmm5
, [memcpy_src
+ 16 * 2]
425 movdqu
[memcpy_dst
+ 16 * 0], xmm7
426 movdqu
[memcpy_dst
+ 16 * 1], xmm6
427 movdqu
[memcpy_dst
+ 16 * 2], xmm5
429 add memcpy_dst
, 16 * 3
430 sub memcpy_bytes_n
, 16 * 3
432 add memcpy_src
, 16 * 3
436 cmp memcpy_bytes_n
, 16 * 2
439 lddqu xmm7
, [memcpy_src
+ 16 * 0]
440 lddqu xmm6
, [memcpy_src
+ 16 * 1]
442 movdqu
[memcpy_dst
+ 16 * 0], xmm7
443 movdqu
[memcpy_dst
+ 16 * 1], xmm6
445 add memcpy_dst
, 16 * 2
446 sub memcpy_bytes_n
, 16 * 2
448 add memcpy_src
, 16 * 2
452 cmp memcpy_bytes_n
, 16 * 1
455 lddqu xmm7
, [memcpy_src
+ 16 * 0]
456 movdqu
[memcpy_dst
+ 16 * 0], xmm7
458 add memcpy_dst
, 16 * 1
459 sub memcpy_bytes_n
, 16 * 1
461 add memcpy_src
, 16 * 1
465 cmp memcpy_bytes_n
, 8 * 1
467 mov rax
, [memcpy_src
+ 8 * 0]
468 mov
[memcpy_dst
+ 8 * 0], rax
470 add memcpy_dst
, 8 * 1
471 sub memcpy_bytes_n
, 8 * 1
473 add memcpy_src
, 8 * 1
477 cmp memcpy_bytes_n
, 7
479 cmp memcpy_bytes_n
, 6
481 cmp memcpy_bytes_n
, 5
483 cmp memcpy_bytes_n
, 4
485 cmp memcpy_bytes_n
, 3
487 cmp memcpy_bytes_n
, 2
489 cmp memcpy_bytes_n
, 1
537 ;
}}} local memcpy
(sse
) -----------------------------------------------------------------------------
538 ;
{{{ resolve_ids
----------------------------------------------------------------------------------
539 ; from pbf.tile_db_build
542 define relation_p rbx
543 define relations_finish_p
r12
547 define relations_idx
r11
549 define bit_val_select
r9
550 define bit_val_select_b r9b
551 define id_bit_idx_msb rcx ; we use rcx for compressed opcodes
552 define id_bit_idx_msb_d ecx
556 mov relation_p
, [array
]
557 mov relations_finish_p
, [array.finish
]
558 mov nodes_idx
, [primgrp.dense.nodes.idx
]
559 mov ways_idx
, [primgrp.ways.array.idx
]
560 mov relations_idx
, [primgrp.relations.array.idx
]
563 TRACE_2ND_PASS
'relation=%p relation_finish_p=%p', relation_p
, relations_finish_p
564 cmp relation_p
, relations_finish_p
565 je pbf.tile_db_build.features_select_pass ; prep
3rd pass
566 mov member_p
, [relation_p
+ array.relation_t.members
]
569 mov al
, [member_p
+ array.relation_t.member_t.type
]
570 ; this field actually contains the id after the first major pass
571 mov id
, [member_p
+ array.relation_t.member_t.p
]
572 TRACE_2ND_PASS
'member=%p type=%#2lx(lowest byte only) id=%#lx(invalid if type=0xff)', member_p
, rax
, id
574 jne
.have_more_members
575 mov relation_p
, [relation_p
+ array.relation_t.next
]
576 TRACE_2ND_PASS
'no more members next relation=%p', relation_p
581 cmove idx_slot_p
, nodes_idx
584 cmove idx_slot_p
, ways_idx
587 cmove idx_slot_p
, relations_idx
589 xor bit_val_select
, bit_val_select ; bit_val_select
= 0
590 mov id_bit_idx_msb
, -1 ; this is to make it work with ref
= 0
591 bsr id_bit_idx_msb
, id ; if id
= 0, ref_bit_idx_msb is untouched namely
-1
592 inc id_bit_idx_msb_d ; store the idx
+ 1, 32bits because of the following jecxz
595 jecxz
.idx_slot_lookup_done ; bit_idx + 1 is stored in cx then 0 means we are finished
597 setc bit_val_select_b
598 mov idx_slot_p
, [idx_slot_p
+ 8 * bit_val_select
]
599 test idx_slot_p
, idx_slot_p ; if the idx slot in
0, we have
a missing node
600 jz
.missing_node ; unlikely
602 prefetcht0
[idx_slot_p
]
608 .idx_slot_lookup_done:
609 mov rax
, [idx_slot_p
+ 2 * 8] ; XXX
: all
3 idxs
(nodes
/ways
/relations
) have the same struct
610 mov
[member_p
+ array.relation_t.member_t.p
], rax
611 TRACE_2ND_PASS
'id found in idx, replacing with pointer %p', rax
612 .skip_member_role: ; from unlikely code code below
613 add member_p
, array.relation_t.member_t.role
+ 1 ; points on the byte right after the byte containing the sz of the role str in bytes
614 movzx rax
, byte
[member_p
- 1]
618 .missing_node: ; unlikely
619 mov qword
[member_p
+ array.relation_t.member_t.p
], 0
620 TRACE_2ND_PASS
'missing id, pointer to 0'
621 jmp
.skip_member_role
623 purge id_bit_idx_msb_d
625 purge bit_val_select_b
632 purge relations_finish_p
635 ;
}}} resolve_ids
----------------------------------------------------------------------------------
636 ;
===================================================================================================
641 ;
{{{ macros
----------------------------------------------------------------------------------------
642 if TRACE_RELATIONS_IDX_ENABLED
643 macro TRACE_RELATIONS_IDX fmt
, regs
&
644 TRACE_PREFIX
'RELATIONS_IDX', fmt
, regs
647 macro TRACE_RELATIONS_IDX fmt
, regs
&
650 ;
---------------------------------------------------------------------------------------------------
651 if TRACE_RELATIONS_2ND_PASS_ENABLED
652 macro TRACE_2ND_PASS fmt
, regs
&
653 TRACE_PREFIX
'RELATIONS_2ND_PASS', fmt
, regs
656 macro TRACE_2ND_PASS fmt
, regs
&
659 ;
}}} macros
----------------------------------------------------------------------------------------
660 end namespace ; relations