3 implementation of custom dht, based on pastry and kademlia.
4 keyspace is divided into buckets of limited capacity
5 node belongs to bucket, where at least 'depth' bits match 'prefix'
8 TODO: weight nodes by IP-Address common prefix length.
11 {used by: messages, fileshare}
15 type tPID
=Store1
.tFID
;
23 procedure NodeBootstrap(const contact
:tNetAddr
);
24 procedure GetNextNode(var ibkt
:pointer; var ix
:byte; out peer
:tPeerPub
);
25 procedure InsertNode(const peer
:tPeerPub
);
28 uses ServerLoop
,MemStream
,opcode
;
31 tPeer
=object(tPeerPub
)
41 peer
: array [1..4] of tPeer
;
46 function MatchPrefix(const tp
:tFID
):boolean;
53 function PrefixLength(const a
,b
:tFID
):byte;
59 i
:=0; while(i
<=19) do begin
60 if a
[i
]<>b
[i
] then break
;
67 if (a
[i
] and m
)<>(b
[i
] and m
) then break
;
74 function tBucket
.MatchPrefix(const tp
:tFID
):boolean;
76 result
:=(depth
=0)or(PrefixLength(prefix
,tp
)>=depth
);
79 function FindBucket(const prefix
:tFID
):tBucket_ptr
;
84 while (cur
<>nil) and (result
=nil) do begin
85 if cur
^.MatchPrefix(prefix
) {first matching is deepest}
91 operator
=(const a
,b
:tFID
):boolean;
93 result
:=CompareWord(a
,b
,10)=0;
96 procedure SplitBucket(ob
:tBucket_ptr
);
97 procedure Toggle(var prefix
:tPID
; bit
:byte);
99 prefix
[bit
div 8]:= prefix
[bit
div 8] xor ($80 shr (bit
mod 8));
104 writeln('DHT: SplitBucket ',string(ob
^.prefix
),'/',ob
^.depth
);
105 {find pref to old bucket, in order to unlink}
106 if ob
=Table
then table
:=table
^.next
else begin
108 while assigned(nb
) and (nb
^.next
<>ob
) do nb
:=nb
^.next
;
109 assert(assigned(nb
),'old bucket not in table');
111 nb
^.next
:=nb
^.next
^.next
; nb
:=nil;
113 {increase depth of this bucket}
115 ob
^.ModifyTime
:=mNow
;
116 {create new bucket with toggled bit}
119 Toggle(nb
^.Prefix
,nb
^.depth
-1);
121 {clear nodes that do not belong in bucket}
122 for i
:=1 to high(tBucket
.peer
) do begin
123 if ob
^.peer
[i
].addr
.isNil
then continue
;
124 if ob
^.MatchPrefix(ob
^.peer
[i
].id
)
125 then nb
^.peer
[i
].addr
.clear
126 else ob
^.peer
[i
].addr
.clear
;
128 writeln('-> ',string(ob
^.prefix
),'/',ob
^.depth
);
129 for i
:=1 to high(tBucket
.peer
) do if not ob
^.peer
[i
].addr
.isnil
130 then writeln('-> -> ',string(ob
^.peer
[i
].id
));
131 writeln('-> ',string(nb
^.prefix
),'/',nb
^.depth
);
132 for i
:=1 to high(tBucket
.peer
) do if not nb
^.peer
[i
].addr
.isnil
133 then writeln('-> -> ',string(nb
^.peer
[i
].id
));
134 if table
=nil then table
:=nb
else begin
136 while assigned(ob
^.next
)and (ob
^.next
^.depth
>nb
^.depth
) do ob
:=ob
^.next
;
138 writeln('-> after /',ob
^.depth
);
140 Shedule(2000,@nb
^.Refresh
);
143 procedure UpdateNode(const id
:tFID
; const addr
:tNetAddr
);
148 if id
=MyID
then exit
;
151 if not assigned(bkt
) then begin
156 bkt
^.ModifyTime
:=mNow
;
159 for i
:=1 to high(bkt
^.peer
) do bkt
^.peer
[i
].addr
.Clear
;
160 Shedule(2000,@bkt
^.Refresh
);
163 for i
:=1 to high(bkt
^.peer
)
164 do if (fr
=0)and bkt
^.peer
[i
].addr
.isNil
then fr
:=i
165 else if (bkt
^.peer
[i
].ReqDelta
<2) then begin
166 {found node in the bucket}
167 if (bkt
^.peer
[i
].id
=id
) then begin
168 bkt
^.peer
[i
].LastMsgFrom
:=mNow
;
169 bkt
^.peer
[i
].ReqDelta
:=0;
172 if bkt
^.peer
[i
].addr
=addr
then exit
;
174 else if (fr
=0)or (bkt
^.peer
[i
].id
=id
)
177 if bkt
^.MatchPrefix(MyID
)
181 end; {the bucket is full!}
182 {drop new node and hope nodes in the bucket are good}
184 writeln('DHT: AddNode ',string(id
),string(addr
),' to ',string(bkt
^.prefix
),'/',bkt
^.depth
,'#',fr
);
185 bkt
^.ModifyTime
:=mNow
;
186 bkt
^.peer
[fr
].ID
:=ID
;
187 bkt
^.peer
[fr
].Addr
:=Addr
;
188 bkt
^.peer
[fr
].LastMsgFrom
:=mNow
;
189 bkt
^.peer
[fr
].LastResFrom
:=0;
190 bkt
^.peer
[fr
].ReqDelta
:=0;
194 procedure InsertNode(const peer
:tPeerPub
);
196 UpdateNode(peer
.id
,peer
.addr
);
199 procedure GetNextNode(var ibkt
:tBucket_ptr
; var ix
:byte; const id
:tPID
; maxrd
:word);
202 if not assigned(ibkt
) then exit
;
206 if ix
>high(tBucket
.peer
) then begin
209 if not assigned(bkt
) then break
;
211 until (not bkt
^.peer
[ix
].Addr
.isNil
)and(bkt
^.peer
[ix
].ReqDelta
<maxrd
);
215 procedure GetNextNode(var ibkt
:pointer; var ix
:byte; out peer
:tPeerPub
);
217 if ibkt
=nil then ibkt
:=Table
;
218 GetNextNode(ibkt
,ix
,MyID
,3);
220 then peer
:=tBucket(ibkt
^).peer
[ix
]
221 else peer
.addr
.clear
;
224 procedure RecvRequest(msg
:tSMsg
);
225 var s
:tMemoryStream
absolute msg
.stream
;
237 //writeln('DHT: ',string(msg.source^),' Request for ',string(rID^));
238 UpdateNode(hID
^,msg
.source
^);
239 {Select peers only from The bucket,
240 if it is broken, send none, but still Ack}
241 bkt
:=FindBucket(rID
^);
243 if assigned(bkt
) then begin
244 r
.WriteByte(opcode
.dhtSelect
);
246 r
.Write(msg
.Source
^,sizeof(tNetAddr
));
249 if (s
.RdBufLen
>0)and(s
.RdBufLen
<=8) then r
.Write(s
.RdBuf
^,s
.RdBufLen
);
250 for i
:=1 to high(tBucket
.peer
) do begin
251 if bkt
^.peer
[i
].addr
.isNil
then continue
;
252 if bkt
^.peer
[i
].addr
=msg
.source
^ then continue
;
253 if bkt
^.peer
[i
].ReqDelta
>1 then continue
;
254 //writeln('-> Select to ',string(bkt^.peer[i].addr));
255 SendMessage(r
.base
^,r
.length
,bkt
^.peer
[i
].addr
);
260 //else writeln('-> empty bucket')
262 r
.WriteByte(opcode
.dhtReqAck
);
264 //writeln('-> ReqAck to ',string(msg.Source^));
265 SendMessage(r
.base
^,r
.length
,msg
.source
^);
266 FreeMem(r
.base
,r
.size
);
269 procedure SendRequest(const contact
:tNetAddr
; const forid
: tPID
; caps
:byte);
273 r
.WriteByte(opcode
.dhtRequest
);
274 r
.Write(MyID
,sizeof(tFID
));
275 r
.Write(ForID
,sizeof(tFID
));
277 SendMessage(r
.base
^,r
.length
,contact
);
278 FreeMem(r
.base
,r
.size
);
281 procedure RecvReqAck(msg
:tSMsg
);
282 var s
:tMemoryStream
absolute msg
.stream
;
287 //writeln('DHT: ',string(msg.source^),' is ',string(hID^),' (ReqAck)');
288 UpdateNode(hID
^,msg
.source
^);
291 procedure RecvWazzup(msg
:tSMsg
);
292 var s
:tMemoryStream
absolute msg
.stream
;
297 //writeln('DHT: ',string(msg.source^),' is ',string(hID^),' (Wazzup)');
298 UpdateNode(hID
^,msg
.source
^);
299 //UpdateSearch(hID^,msg.source^);
302 procedure NodeBootstrap(const contact
:tNetAddr
);
304 SendRequest(contact
,MyID
,0);
307 procedure RecvSelect(msg
:tSMsg
);
308 var s
:tMemoryStream
absolute msg
.stream
;
316 addr
:=s
.ReadPtr(sizeof(tNetAddr
));
318 //writeln('DHT: ',string(msg.source^),' Select for ',string(addr^));
319 if rID
^=MyID
then begin
320 //writeln('-> self');
323 r
.WriteByte(opcode
.dhtWazzup
);
325 //writeln('-> Wazzup to ',string(addr^));
326 SendMessage(r
.base
^,r
.length
,addr
^);
327 FreeMem(r
.base
,r
.size
);
331 procedure tBucket
.Refresh
;
332 var my
,rtr
,stich
:boolean;
336 procedure lSend(var peer
:tPeer
; const trg
:tPID
);
338 SendRequest(peer
.Addr
,trg
,0);
342 my
:=MatchPrefix(MyID
);
344 {1 of 10 times try to contact dead nodes in attempt to recover from network split}
345 stich
:=Random(cStichRar
)=0;
346 for i
:=1 to high(tBucket
.peer
)
347 do if (not peer
[i
].Addr
.isNil
) then begin
348 if peer
[i
].ReqDelta
>0 then begin
349 if (peer
[i
].ReqDelta
<=3)xor stich
then begin
350 {this will get rid of half-dead nodes}
351 writeln('DHT: Refresh (R',peer
[i
].ReqDelta
,') ',copy(string(peer
[i
].id
),1,6),string(peer
[i
].addr
));
352 lSend(peer
[i
],prefix
);
356 else if (ol
=0) or (peer
[i
].LastMsgFrom
<peer
[ol
].LastMsgFrom
)
359 {now nudge the most quiet peer, but not too often}
360 if (ol
>0) and ((mNow
-peer
[ol
].LastMsgFrom
)>10000) then begin
361 //writeln('DHT: Refresh (T',mNow-peer[ol].LastMsgFrom,') #',ol,' ',string(peer[ol].addr));
362 lSend(peer
[ol
],MyID
);
364 {try to recover bucket full of bad nodes}
365 if (ol
=0){and(not rtr)} then begin
367 GetNextNode(rvb
,rv
,prefix
,desperate
);
368 if not assigned(rvb
) then begin
369 rv
:=0; rvb
:=Table
; {in extreme cases, try the whole table}
370 GetNextNode(rvb
,rv
,prefix
,desperate
);
372 if assigned(rvb
) then begin
373 writeln('DHT: Recover ',string(prefix
),'/',depth
,' try ',copy(string(rvb
^.peer
[rv
].id
),1,6),string(rvb
^.peer
[rv
].addr
));
374 lSend(rvb
^.peer
[rv
],prefix
);
375 end else inc(desperate
);
376 end else desperate
:=3;
378 then wait
:=18000+(depth
*600)
380 if rtr
and(not stich
) then wait
:=wait
div 3;
381 Shedule(wait
,@Refresh
);
385 {to bootstrap: ping address to get ID and insert to bucket/il
386 ping may get lost: separate bootstrap unit :)
387 now jut Ass-U-Me wont get lost}
390 SetMsgHandler(opcode
.dhtRequest
,@recvRequest
);
391 SetMsgHandler(opcode
.dhtSelect
,@recvSelect
);
392 SetMsgHandler(opcode
.dhtReqAck
,@recvReqAck
);
393 SetMsgHandler(opcode
.dhtWazzup
,@recvWazzup
);