2 * 2008+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
16 #ifndef __DNET_ELLIPTICS_H
17 #define __DNET_ELLIPTICS_H
20 #include <sys/socket.h>
21 #include <sys/epoll.h>
29 #include <netinet/in.h>
30 #include <arpa/inet.h>
32 #include <eblob/blob.h>
35 typedef unsigned char u_char
;
36 typedef unsigned short u_short
;
47 #include "elliptics/packet.h"
48 #include "elliptics/interface.h"
55 #define __unused __attribute__ ((unused))
60 struct dnet_net_state
;
62 #define dnet_log(n, mask, format, a...) do { if (n->log && (n->log->log_mask & mask)) dnet_log_raw(n, mask, format, ##a); } while (0)
63 #define dnet_log_err(n, f, a...) dnet_log(n, DNET_LOG_ERROR, f ": %s [%d].\n", ##a, strerror(errno), errno)
66 struct list_head req_entry
;
68 struct dnet_net_state
*st
;
83 * Currently executed network state machine:
84 * receives and sends command and data.
87 /* Reading a command */
88 #define DNET_IO_CMD (1<<0)
90 /* Attached data should be discarded */
91 #define DNET_IO_DROP (1<<1)
93 #define DNET_STATE_MAX_WEIGHT (1024 * 10)
97 struct list_head state_entry
;
98 struct list_head storage_state_entry
;
111 struct dnet_addr addr
;
113 int (* process
)(struct dnet_net_state
*st
, struct epoll_event
*ev
);
115 struct dnet_cmd rcv_cmd
;
118 unsigned int rcv_flags
;
123 pthread_mutex_t send_lock
;
124 struct list_head send_list
;
126 pthread_mutex_t trans_lock
;
127 struct rb_root trans_root
;
128 struct list_head trans_list
;
132 unsigned long long free
;
134 long median_read_time
;
136 struct dnet_idc
*idc
;
138 struct dnet_stat_count stat
[__DNET_CMD_MAX
];
142 struct dnet_state_id
{
143 struct dnet_raw_id raw
;
144 struct dnet_idc
*idc
;
148 struct dnet_net_state
*st
;
149 struct dnet_group
*group
;
151 struct dnet_state_id ids
[];
154 int dnet_idc_create(struct dnet_net_state
*st
, int group_id
, struct dnet_raw_id
*ids
, int id_num
);
155 void dnet_idc_destroy_nolock(struct dnet_net_state
*st
);
157 struct dnet_net_state
*dnet_state_create(struct dnet_node
*n
,
158 int group_id
, struct dnet_raw_id
*ids
, int id_num
,
159 struct dnet_addr
*addr
, int s
, int *errp
, int join
,
160 int (* process
)(struct dnet_net_state
*st
, struct epoll_event
*ev
));
162 void dnet_state_reset(struct dnet_net_state
*st
);
163 void dnet_state_remove_nolock(struct dnet_net_state
*st
);
165 struct dnet_net_state
*dnet_state_search_by_addr(struct dnet_node
*n
, struct dnet_addr
*addr
);
166 struct dnet_net_state
*dnet_state_get_first(struct dnet_node
*n
, struct dnet_id
*id
);
167 struct dnet_net_state
*dnet_state_search_nolock(struct dnet_node
*n
, struct dnet_id
*id
);
168 struct dnet_net_state
*dnet_node_state(struct dnet_node
*n
);
170 int dnet_search_range(struct dnet_node
*n
, struct dnet_id
*id
,
171 struct dnet_raw_id
*start
, struct dnet_raw_id
*next
);
173 int dnet_recv_route_list(struct dnet_net_state
*st
);
175 void dnet_state_destroy(struct dnet_net_state
*st
);
177 void dnet_schedule_command(struct dnet_net_state
*st
);
179 int dnet_schedule_send(struct dnet_net_state
*st
);
180 int dnet_schedule_recv(struct dnet_net_state
*st
);
182 void dnet_unschedule_send(struct dnet_net_state
*st
);
183 void dnet_unschedule_recv(struct dnet_net_state
*st
);
185 int dnet_setup_control_nolock(struct dnet_net_state
*st
);
187 int dnet_add_reconnect_state(struct dnet_node
*n
, struct dnet_addr
*addr
, unsigned int join_state
);
189 static inline struct dnet_net_state
*dnet_state_get(struct dnet_net_state
*st
)
191 atomic_inc(&st
->refcnt
);
194 static inline void dnet_state_put(struct dnet_net_state
*st
)
197 * State can be NULL here when we just want to kick IO thread.
199 if (st
&& atomic_dec_and_test(&st
->refcnt
))
200 dnet_state_destroy(st
);
206 pthread_mutex_t wait_lock
;
216 #define dnet_wait_event(w, condition, wts) \
219 struct timespec __ts; \
220 struct timeval __tv; \
221 gettimeofday(&__tv, NULL); \
222 __ts.tv_nsec = __tv.tv_usec * 1000 + (wts)->tv_nsec; \
223 __ts.tv_sec = __tv.tv_sec + (wts)->tv_sec; \
224 pthread_mutex_lock(&(w)->wait_lock); \
225 while (!(condition) && !__err) \
226 __err = pthread_cond_timedwait(&(w)->wait, &(w)->wait_lock, &__ts); \
227 pthread_mutex_unlock(&(w)->wait_lock); \
231 #define dnet_wakeup(w, task) \
233 pthread_mutex_lock(&(w)->wait_lock); \
235 pthread_cond_broadcast(&(w)->wait); \
236 pthread_mutex_unlock(&(w)->wait_lock); \
239 struct dnet_wait
*dnet_wait_alloc(int cond
);
240 void dnet_wait_destroy(struct dnet_wait
*w
);
242 static inline struct dnet_wait
*dnet_wait_get(struct dnet_wait
*w
)
244 atomic_inc(&w
->refcnt
);
248 static inline void dnet_wait_put(struct dnet_wait
*w
)
250 if (atomic_dec_and_test(&w
->refcnt
))
251 dnet_wait_destroy(w
);
254 struct dnet_notify_bucket
256 struct list_head notify_list
;
257 pthread_rwlock_t notify_lock
;
260 int dnet_update_notify(struct dnet_net_state
*st
, struct dnet_cmd
*cmd
, void *data
);
262 int dnet_notify_add(struct dnet_net_state
*st
, struct dnet_cmd
*cmd
);
263 int dnet_notify_remove(struct dnet_net_state
*st
, struct dnet_cmd
*cmd
);
265 int dnet_notify_init(struct dnet_node
*n
);
266 void dnet_notify_exit(struct dnet_node
*n
);
270 struct list_head group_entry
;
272 unsigned int group_id
;
274 struct list_head state_list
;
279 struct dnet_state_id
*ids
;
282 static inline struct dnet_group
*dnet_group_get(struct dnet_group
*g
)
284 atomic_inc(&g
->refcnt
);
288 void dnet_group_destroy(struct dnet_group
*g
);
289 static inline void dnet_group_put(struct dnet_group
*g
)
291 if (g
&& atomic_dec_and_test(&g
->refcnt
))
292 dnet_group_destroy(g
);
295 struct dnet_transform
299 int (* transform
)(void *priv
, const void *src
, uint64_t size
,
300 void *dst
, unsigned int *dsize
, unsigned int flags
);
303 int dnet_crypto_init(struct dnet_node
*n
, void *ns
, int nsize
);
304 void dnet_crypto_cleanup(struct dnet_node
*n
);
312 struct dnet_work_io
{
322 int net_thread_num
, net_thread_pos
;
323 struct dnet_net_io
*net
;
325 pthread_mutex_t recv_lock
;
326 struct list_head nonblocking_recv_list
;
327 struct list_head recv_list
;
328 pthread_cond_t recv_wait
;
331 int nonblocking_thread_num
;
332 struct dnet_work_io
*wio
;
335 int dnet_state_accept_process(struct dnet_net_state
*st
, struct epoll_event
*ev
);
336 int dnet_state_net_process(struct dnet_net_state
*st
, struct epoll_event
*ev
);
337 int dnet_io_init(struct dnet_node
*n
, struct dnet_config
*cfg
);
338 void dnet_io_exit(struct dnet_node
*n
);
340 void dnet_io_req_free(struct dnet_io_req
*r
);
344 pthread_mutex_t lock
[0];
347 void dnet_locks_destroy(struct dnet_node
*n
);
348 int dnet_locks_init(struct dnet_node
*n
, int num
);
349 void dnet_oplock(struct dnet_node
*n
, struct dnet_id
*key
);
350 void dnet_opunlock(struct dnet_node
*n
, struct dnet_id
*key
);
351 int dnet_optrylock(struct dnet_node
*n
, struct dnet_id
*key
);
355 struct list_head check_entry
;
357 struct dnet_transform transform
;
359 pthread_mutex_t group_lock
;
372 struct dnet_addr addr
;
373 int sock_type
, proto
, family
;
375 pthread_mutex_t state_lock
;
376 struct list_head group_list
;
378 /* hosts client states, i.e. those who didn't join network */
379 struct list_head empty_state_list
;
381 /* hosts all states added to given node */
382 struct list_head storage_state_list
;
386 struct dnet_net_state
*st
;
390 struct dnet_log
*log
;
392 struct dnet_wait
*wait
;
393 struct timespec wait_ts
;
397 int check_in_progress
;
402 pthread_t monitor_tid
;
407 struct dnet_backend_callbacks
*cb
;
409 unsigned int notify_hash_size
;
410 struct dnet_notify_bucket
*notify_hash
;
412 pthread_mutex_t reconnect_lock
;
413 struct list_head reconnect_list
;
415 struct dnet_lock counters_lock
;
416 struct dnet_stat_count counters
[__DNET_CNTR_MAX
];
422 char cookie
[DNET_AUTH_COOKIE_SIZE
];
429 struct dnet_locks
*locks
;
432 static inline int dnet_counter_init(struct dnet_node
*n
)
434 memset(&n
->counters
, 0, __DNET_CNTR_MAX
* sizeof(struct dnet_stat_count
));
435 return dnet_lock_init(&n
->counters_lock
);
438 static inline void dnet_counter_destroy(struct dnet_node
*n
)
440 return dnet_lock_destroy(&n
->counters_lock
);
443 static inline void dnet_counter_inc(struct dnet_node
*n
, int counter
, int err
)
445 if (counter
>= __DNET_CNTR_MAX
)
446 counter
= DNET_CNTR_UNKNOWN
;
448 dnet_lock_lock(&n
->counters_lock
);
450 n
->counters
[counter
].count
++;
452 n
->counters
[counter
].err
++;
453 dnet_lock_unlock(&n
->counters_lock
);
455 dnet_log(n
, DNET_LOG_DSA
, "Incrementing counter: %d, err: %d, value is: %llu %llu.\n",
457 (unsigned long long)n
->counters
[counter
].count
,
458 (unsigned long long)n
->counters
[counter
].err
);
461 static inline void dnet_counter_set(struct dnet_node
*n
, int counter
, int err
, int64_t val
)
463 if (counter
>= __DNET_CNTR_MAX
)
464 counter
= DNET_CNTR_UNKNOWN
;
466 dnet_lock_lock(&n
->counters_lock
);
468 n
->counters
[counter
].count
= val
;
470 n
->counters
[counter
].err
= val
;
471 dnet_lock_unlock(&n
->counters_lock
);
474 static inline char *dnet_dump_node(struct dnet_node
*n
)
476 static char buf
[128];
478 return dnet_server_convert_dnet_addr_raw(&n
->addr
, buf
, sizeof(buf
));
482 int dnet_process_cmd_raw(struct dnet_net_state
*st
, struct dnet_cmd
*cmd
, void *data
);
483 int dnet_process_recv(struct dnet_net_state
*st
, struct dnet_io_req
*r
);
485 int dnet_recv(struct dnet_net_state
*st
, void *data
, unsigned int size
);
486 int dnet_sendfile(struct dnet_net_state
*st
, int fd
, uint64_t *offset
, uint64_t size
);
488 int dnet_send_request(struct dnet_net_state
*st
, struct dnet_io_req
*r
);
491 int dnet_socket_create(struct dnet_node
*n
, struct dnet_config
*cfg
, struct dnet_addr
*addr
, int listening
);
492 int dnet_socket_create_addr(struct dnet_node
*n
, int sock_type
, int proto
, int family
,
493 struct sockaddr
*sa
, unsigned int salen
, int listening
);
495 void dnet_set_sockopt(int s
);
496 void dnet_sock_close(int s
);
498 enum dnet_join_state
{
499 DNET_JOIN
= 1, /* Node joined the network */
500 DNET_WANT_RECONNECT
, /* State must be reconnected, when remote peer failed */
503 int dnet_state_join_nolock(struct dnet_net_state
*st
);
507 struct rb_node trans_entry
;
508 struct list_head trans_list_entry
;
510 struct timeval time
, start
;
512 struct dnet_net_state
*orig
; /* only for forward */
514 struct dnet_net_state
*st
;
515 uint64_t trans
, rcv_trans
;
520 int command
; /* main command this transaction carries */
523 int (* complete
)(struct dnet_net_state
*st
,
524 struct dnet_cmd
*cmd
,
528 void dnet_trans_destroy(struct dnet_trans
*t
);
529 struct dnet_trans
*dnet_trans_alloc(struct dnet_node
*n
, uint64_t size
);
530 int dnet_trans_alloc_send_state(struct dnet_net_state
*st
, struct dnet_trans_control
*ctl
);
531 int dnet_trans_timer_setup(struct dnet_trans
*t
);
533 static inline struct dnet_trans
*dnet_trans_get(struct dnet_trans
*t
)
535 atomic_inc(&t
->refcnt
);
539 static inline void dnet_trans_put(struct dnet_trans
*t
)
541 if (t
&& atomic_dec_and_test(&t
->refcnt
))
542 dnet_trans_destroy(t
);
545 int dnet_trans_insert_nolock(struct rb_root
*root
, struct dnet_trans
*a
);
546 void dnet_trans_remove(struct dnet_trans
*t
);
547 void dnet_trans_remove_nolock(struct rb_root
*root
, struct dnet_trans
*t
);
548 struct dnet_trans
*dnet_trans_search(struct rb_root
*root
, uint64_t trans
);
550 int dnet_trans_send(struct dnet_trans
*t
, struct dnet_io_req
*req
);
552 int dnet_trans_create_send_all(struct dnet_node
*n
, struct dnet_io_control
*ctl
);
554 int dnet_recv_list(struct dnet_node
*n
, struct dnet_net_state
*st
);
556 ssize_t
dnet_send_fd(struct dnet_net_state
*st
, void *header
, uint64_t hsize
,
557 int fd
, uint64_t offset
, uint64_t dsize
, int close_on_exit
);
558 ssize_t
dnet_send_data(struct dnet_net_state
*st
, void *header
, uint64_t hsize
, void *data
, uint64_t dsize
);
559 ssize_t
dnet_send(struct dnet_net_state
*st
, void *data
, uint64_t size
);
560 ssize_t
dnet_send_nolock(struct dnet_net_state
*st
, void *data
, uint64_t size
);
562 struct dnet_io_completion
564 struct dnet_wait
*wait
;
569 struct dnet_addr_storage
571 int reconnect_time
, reconnect_time_max
;
572 struct list_head reconnect_entry
;
573 struct dnet_addr addr
;
574 unsigned int __join_state
;
578 * Returns true if t1 is before than t2.
580 static inline int dnet_time_before(struct timespec
*t1
, struct timespec
*t2
)
582 if ((long)(t1
->tv_sec
- t2
->tv_sec
) < 0)
585 if ((long)(t2
->tv_sec
- t1
->tv_sec
) < 0)
588 return ((long)(t1
->tv_nsec
- t2
->tv_nsec
) < 0);
590 #define dnet_time_after(t2, t1) dnet_time_before(t1, t2)
592 int dnet_check_thread_start(struct dnet_node
*n
);
593 void dnet_check_thread_stop(struct dnet_node
*n
);
594 int dnet_try_reconnect(struct dnet_node
*n
);
596 #define DNET_CHECK_TYPE_COPIES_HISTORY 1
597 #define DNET_CHECK_TYPE_COPIES_FULL 2
598 #define DNET_CHECK_TYPE_MERGE 3
599 #define DNET_CHECK_TYPE_DELETE 4
601 #define DNET_BULK_IDS_SIZE 1000
602 #define DNET_BULK_CHECK_PING 100
603 #define DNET_BULK_STATES_ALLOC_STEP 10
604 #define DNET_BULK_META_UPD_SIZE 1000
608 struct dnet_raw_id id
;
609 struct dnet_meta_update last_update
;
610 } __attribute__ ((packed
));
612 struct dnet_bulk_state
614 struct dnet_addr addr
;
615 pthread_mutex_t state_lock
;
617 struct dnet_bulk_id
*ids
;
620 struct dnet_bulk_array
623 struct dnet_bulk_state
*states
;
627 static inline int dnet_compare_bulk_state(const void *k1
, const void *k2
)
629 const struct dnet_bulk_state
*st1
= (const struct dnet_bulk_state
*)k1
;
630 const struct dnet_bulk_state
*st2
= (const struct dnet_bulk_state
*)k2
;
632 if (st1
->addr
.addr_len
> st2
->addr
.addr_len
)
634 if (st1
->addr
.addr_len
< st2
->addr
.addr_len
)
636 return memcmp(st1
->addr
.addr
, st2
->addr
.addr
, st1
->addr
.addr_len
);
639 struct dnet_check_temp_db
{
640 struct eblob_backend
*b
;
641 struct eblob_log log
;
645 struct dnet_check_params
{
646 struct dnet_check_temp_db
*db
;
652 static inline struct dnet_check_temp_db
* dnet_check_temp_db_get(struct dnet_check_temp_db
*db
) {
653 atomic_inc(&db
->refcnt
);
657 static inline void dnet_check_temp_db_put(struct dnet_check_temp_db
*db
) {
658 if (atomic_dec_and_test(&db
->refcnt
)) {
659 eblob_remove_blobs(db
->b
);
660 eblob_cleanup(db
->b
);
665 int dnet_check(struct dnet_node
*n
, struct dnet_meta_container
*mc
, struct dnet_bulk_array
*bulk_array
, int check_copies
, struct dnet_check_params
*params
);
666 int dnet_db_list(struct dnet_net_state
*st
, struct dnet_cmd
*cmd
);
667 int dnet_cmd_bulk_check(struct dnet_net_state
*orig
, struct dnet_cmd
*cmd
, void *data
);
668 int dnet_request_bulk_check(struct dnet_node
*n
, struct dnet_bulk_state
*state
, struct dnet_check_params
*params
);
670 struct dnet_meta_update
* dnet_get_meta_update(struct dnet_node
*n
, struct dnet_meta_container
*mc
,
671 struct dnet_meta_update
*meta_update
);
673 int dnet_update_ts_metadata(struct eblob_backend
*b
, struct dnet_raw_id
*id
, uint64_t flags_set
, uint64_t flags_clear
);
675 int dnet_process_meta(struct dnet_net_state
*st
, struct dnet_cmd
*cmd
, struct dnet_io_attr
*io
);
676 void dnet_convert_metadata(struct dnet_node
*n __unused
, void *data
, int size
);
678 void dnet_monitor_exit(struct dnet_node
*n
);
679 int dnet_monitor_init(struct dnet_node
*n
, struct dnet_config
*cfg
);
681 int dnet_set_name(char *name
);
682 int dnet_ioprio_set(long pid
, int class_id
, int prio
);
683 int dnet_ioprio_get(long pid
);
687 uint64_t offset
, size
;
691 uint64_t mapped_size
;
695 int dnet_data_map(struct dnet_map_fd
*map
);
696 void dnet_data_unmap(struct dnet_map_fd
*map
);
698 void *dnet_read_data_wait_raw(struct dnet_node
*n
, struct dnet_id
*id
, struct dnet_io_attr
*io
, int cmd
, uint64_t cflags
, int *errp
);
700 int dnet_srw_init(struct dnet_node
*n
, struct dnet_config
*cfg
);
701 void dnet_srw_cleanup(struct dnet_node
*n
);
702 int dnet_cmd_exec_raw(struct dnet_net_state
*st
, struct dnet_cmd
*cmd
, struct sph
*header
, const void *data
);
708 #endif /* __DNET_ELLIPTICS_H */