Populate cache on READ command when DNET_IO_FLAGS_CACHE ioflag is set
[elliptics.git] / library / elliptics.h
blobc2992efe222952997ddd163123a8215cb0f8bfae
1 /*
2 * 2008+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
3 * All rights reserved.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
16 #ifndef __DNET_ELLIPTICS_H
17 #define __DNET_ELLIPTICS_H
19 #include <sys/time.h>
20 #include <sys/socket.h>
21 #include <sys/epoll.h>
23 #include <errno.h>
24 #include <netdb.h>
25 #include <string.h>
26 #include <pthread.h>
27 #include <time.h>
29 #include <netinet/in.h>
30 #include <arpa/inet.h>
32 #include <eblob/blob.h>
34 #ifndef HAVE_UCHAR
35 typedef unsigned char u_char;
36 typedef unsigned short u_short;
37 #endif
39 #include "list.h"
41 #undef offsetof
42 #include "rbtree.h"
44 #include "atomic.h"
45 #include "lock.h"
47 #include "elliptics/packet.h"
48 #include "elliptics/interface.h"
50 #ifdef __cplusplus
51 extern "C" {
52 #endif
54 #ifndef __unused
55 #define __unused __attribute__ ((unused))
56 #endif
58 struct dnet_node;
59 struct dnet_group;
60 struct dnet_net_state;
62 #define dnet_log(n, level, format, a...) do { if (n->log && (n->log->log_level >= level)) dnet_log_raw(n, level, format, ##a); } while (0)
63 #define dnet_log_err(n, f, a...) dnet_log(n, DNET_LOG_ERROR, f ": %s [%d].\n", ##a, strerror(errno), errno)
65 struct dnet_io_req {
66 struct list_head req_entry;
68 struct dnet_net_state *st;
70 void *header;
71 size_t hsize;
73 void *data;
74 size_t dsize;
76 int close_on_exit;
77 int fd;
78 off_t local_offset;
79 size_t fsize;
83 * Currently executed network state machine:
84 * receives and sends command and data.
87 /* Reading a command */
88 #define DNET_IO_CMD (1<<0)
90 /* Attached data should be discarded */
91 #define DNET_IO_DROP (1<<1)
93 #define DNET_STATE_MAX_WEIGHT (1024 * 10)
95 struct dnet_net_state
97 struct list_head state_entry;
98 struct list_head storage_state_entry;
100 struct dnet_node *n;
102 atomic_t refcnt;
103 int read_s, write_s;
105 int need_exit;
107 int stall;
109 int __join_state;
111 struct dnet_addr addr;
113 int (* process)(struct dnet_net_state *st, struct epoll_event *ev);
115 struct dnet_cmd rcv_cmd;
116 uint64_t rcv_offset;
117 uint64_t rcv_end;
118 unsigned int rcv_flags;
119 void *rcv_data;
121 int epoll_fd;
122 size_t send_offset;
123 pthread_mutex_t send_lock;
124 struct list_head send_list;
126 pthread_mutex_t trans_lock;
127 struct rb_root trans_root;
128 struct list_head trans_list;
131 int la;
132 unsigned long long free;
133 float weight;
134 long median_read_time;
136 struct dnet_idc *idc;
138 struct dnet_stat_count stat[__DNET_CMD_MAX];
141 struct dnet_idc;
142 struct dnet_state_id {
143 struct dnet_raw_id raw;
144 struct dnet_idc *idc;
147 struct dnet_idc {
148 struct dnet_net_state *st;
149 struct dnet_group *group;
150 int id_num;
151 struct dnet_state_id ids[];
154 int dnet_idc_create(struct dnet_net_state *st, int group_id, struct dnet_raw_id *ids, int id_num);
155 void dnet_idc_destroy_nolock(struct dnet_net_state *st);
157 struct dnet_net_state *dnet_state_create(struct dnet_node *n,
158 int group_id, struct dnet_raw_id *ids, int id_num,
159 struct dnet_addr *addr, int s, int *errp, int join,
160 int (* process)(struct dnet_net_state *st, struct epoll_event *ev));
162 void dnet_state_reset(struct dnet_net_state *st);
163 void dnet_state_remove_nolock(struct dnet_net_state *st);
165 struct dnet_net_state *dnet_state_search_by_addr(struct dnet_node *n, struct dnet_addr *addr);
166 struct dnet_net_state *dnet_state_get_first(struct dnet_node *n, struct dnet_id *id);
167 struct dnet_net_state *dnet_state_search_nolock(struct dnet_node *n, struct dnet_id *id);
168 struct dnet_net_state *dnet_node_state(struct dnet_node *n);
170 void dnet_node_cleanup_common_resources(struct dnet_node *n);
172 int dnet_search_range(struct dnet_node *n, struct dnet_id *id,
173 struct dnet_raw_id *start, struct dnet_raw_id *next);
175 int dnet_recv_route_list(struct dnet_net_state *st);
177 void dnet_state_destroy(struct dnet_net_state *st);
179 void dnet_schedule_command(struct dnet_net_state *st);
181 int dnet_schedule_send(struct dnet_net_state *st);
182 int dnet_schedule_recv(struct dnet_net_state *st);
184 void dnet_unschedule_send(struct dnet_net_state *st);
185 void dnet_unschedule_recv(struct dnet_net_state *st);
187 int dnet_setup_control_nolock(struct dnet_net_state *st);
189 int dnet_add_reconnect_state(struct dnet_node *n, struct dnet_addr *addr, unsigned int join_state);
191 static inline struct dnet_net_state *dnet_state_get(struct dnet_net_state *st)
193 atomic_inc(&st->refcnt);
194 return st;
196 static inline void dnet_state_put(struct dnet_net_state *st)
199 * State can be NULL here when we just want to kick IO thread.
201 if (st && atomic_dec_and_test(&st->refcnt))
202 dnet_state_destroy(st);
205 struct dnet_wait
207 pthread_cond_t wait;
208 pthread_mutex_t wait_lock;
209 int cond;
210 int status;
212 void *ret;
213 int size;
215 atomic_t refcnt;
218 #define dnet_wait_event(w, condition, wts) \
219 ({ \
220 int __err = 0; \
221 struct timespec __ts; \
222 struct timeval __tv; \
223 gettimeofday(&__tv, NULL); \
224 __ts.tv_nsec = __tv.tv_usec * 1000 + (wts)->tv_nsec; \
225 __ts.tv_sec = __tv.tv_sec + (wts)->tv_sec; \
226 pthread_mutex_lock(&(w)->wait_lock); \
227 while (!(condition) && !__err) \
228 __err = pthread_cond_timedwait(&(w)->wait, &(w)->wait_lock, &__ts); \
229 pthread_mutex_unlock(&(w)->wait_lock); \
230 -__err; \
233 #define dnet_wakeup(w, task) \
234 ({ \
235 pthread_mutex_lock(&(w)->wait_lock); \
236 task; \
237 pthread_cond_broadcast(&(w)->wait); \
238 pthread_mutex_unlock(&(w)->wait_lock); \
241 struct dnet_wait *dnet_wait_alloc(int cond);
242 void dnet_wait_destroy(struct dnet_wait *w);
244 static inline struct dnet_wait *dnet_wait_get(struct dnet_wait *w)
246 atomic_inc(&w->refcnt);
247 return w;
250 static inline void dnet_wait_put(struct dnet_wait *w)
252 if (atomic_dec_and_test(&w->refcnt))
253 dnet_wait_destroy(w);
256 struct dnet_notify_bucket
258 struct list_head notify_list;
259 pthread_rwlock_t notify_lock;
262 int dnet_update_notify(struct dnet_net_state *st, struct dnet_cmd *cmd, void *data);
264 int dnet_notify_add(struct dnet_net_state *st, struct dnet_cmd *cmd);
265 int dnet_notify_remove(struct dnet_net_state *st, struct dnet_cmd *cmd);
267 int dnet_notify_init(struct dnet_node *n);
268 void dnet_notify_exit(struct dnet_node *n);
270 struct dnet_group
272 struct list_head group_entry;
274 unsigned int group_id;
276 struct list_head state_list;
278 atomic_t refcnt;
280 int id_num;
281 struct dnet_state_id *ids;
284 static inline struct dnet_group *dnet_group_get(struct dnet_group *g)
286 atomic_inc(&g->refcnt);
287 return g;
290 void dnet_group_destroy(struct dnet_group *g);
291 static inline void dnet_group_put(struct dnet_group *g)
293 if (g && atomic_dec_and_test(&g->refcnt))
294 dnet_group_destroy(g);
297 struct dnet_transform
299 void *priv;
301 int (* transform)(void *priv, const void *src, uint64_t size,
302 void *dst, unsigned int *dsize, unsigned int flags);
305 int dnet_crypto_init(struct dnet_node *n, void *ns, int nsize);
306 void dnet_crypto_cleanup(struct dnet_node *n);
308 struct dnet_net_io {
309 int epoll_fd;
310 pthread_t tid;
311 struct dnet_node *n;
314 enum dnet_work_io_mode {
315 DNET_WORK_IO_MODE_BLOCKING = 0,
316 DNET_WORK_IO_MODE_NONBLOCKING,
317 DNET_WORK_IO_MODE_EXEC_BLOCKING,
320 struct dnet_work_pool;
321 struct dnet_work_io {
322 int thread_index;
323 pthread_t tid;
324 struct dnet_work_pool *pool;
327 struct dnet_work_pool {
328 struct dnet_node *n;
329 int mode;
330 int num;
331 struct list_head list;
332 pthread_mutex_t lock;
333 pthread_cond_t wait;
334 struct dnet_work_io wio[0];
337 struct dnet_io {
338 int need_exit;
340 int net_thread_num, net_thread_pos;
341 struct dnet_net_io *net;
343 struct dnet_work_pool *recv_pool;
344 struct dnet_work_pool *recv_pool_nb;
345 struct dnet_work_pool *recv_pool_eblock;
348 int dnet_state_accept_process(struct dnet_net_state *st, struct epoll_event *ev);
349 int dnet_state_net_process(struct dnet_net_state *st, struct epoll_event *ev);
350 int dnet_io_init(struct dnet_node *n, struct dnet_config *cfg);
351 void dnet_io_exit(struct dnet_node *n);
353 void dnet_io_req_free(struct dnet_io_req *r);
355 struct dnet_locks {
356 int num;
357 pthread_mutex_t lock[0];
360 void dnet_locks_destroy(struct dnet_node *n);
361 int dnet_locks_init(struct dnet_node *n, int num);
362 void dnet_oplock(struct dnet_node *n, struct dnet_id *key);
363 void dnet_opunlock(struct dnet_node *n, struct dnet_id *key);
364 int dnet_optrylock(struct dnet_node *n, struct dnet_id *key);
366 struct dnet_node
368 struct list_head check_entry;
370 struct dnet_transform transform;
372 pthread_mutex_t group_lock;
373 int group_num;
374 int *groups;
376 int need_exit;
378 struct dnet_id id;
380 int flags;
381 int ro;
383 pthread_attr_t attr;
385 struct dnet_addr addr;
386 int sock_type, proto, family;
388 pthread_mutex_t state_lock;
389 struct list_head group_list;
391 /* hosts client states, i.e. those who didn't join network */
392 struct list_head empty_state_list;
394 /* hosts all states added to given node */
395 struct list_head storage_state_list;
397 atomic_t trans;
399 struct dnet_net_state *st;
401 int error;
403 struct dnet_log *log;
405 struct dnet_wait *wait;
406 struct timespec wait_ts;
408 struct dnet_io *io;
410 int check_in_progress;
411 long check_timeout;
412 pthread_t check_tid;
413 long stall_count;
415 pthread_t monitor_tid;
416 int monitor_fd;
418 char *temp_meta_env;
420 struct dnet_backend_callbacks *cb;
422 unsigned int notify_hash_size;
423 struct dnet_notify_bucket *notify_hash;
425 pthread_mutex_t reconnect_lock;
426 struct list_head reconnect_list;
428 struct dnet_lock counters_lock;
429 struct dnet_stat_count counters[__DNET_CNTR_MAX];
431 int bg_ionice_class;
432 int bg_ionice_prio;
433 int removal_delay;
435 char cookie[DNET_AUTH_COOKIE_SIZE];
437 void *srw;
439 int server_prio;
440 int client_prio;
442 struct dnet_locks *locks;
444 size_t cache_size;
445 void *cache;
448 static inline int dnet_counter_init(struct dnet_node *n)
450 memset(&n->counters, 0, __DNET_CNTR_MAX * sizeof(struct dnet_stat_count));
451 return dnet_lock_init(&n->counters_lock);
454 static inline void dnet_counter_destroy(struct dnet_node *n)
456 return dnet_lock_destroy(&n->counters_lock);
459 static inline void dnet_counter_inc(struct dnet_node *n, int counter, int err)
461 if (counter >= __DNET_CNTR_MAX)
462 counter = DNET_CNTR_UNKNOWN;
464 dnet_lock_lock(&n->counters_lock);
465 if (!err)
466 n->counters[counter].count++;
467 else
468 n->counters[counter].err++;
469 dnet_lock_unlock(&n->counters_lock);
471 dnet_log(n, DNET_LOG_DEBUG, "Incrementing counter: %d, err: %d, value is: %llu %llu.\n",
472 counter, err,
473 (unsigned long long)n->counters[counter].count,
474 (unsigned long long)n->counters[counter].err);
477 static inline void dnet_counter_set(struct dnet_node *n, int counter, int err, int64_t val)
479 if (counter >= __DNET_CNTR_MAX)
480 counter = DNET_CNTR_UNKNOWN;
482 dnet_lock_lock(&n->counters_lock);
483 if (!err)
484 n->counters[counter].count = val;
485 else
486 n->counters[counter].err = val;
487 dnet_lock_unlock(&n->counters_lock);
490 static inline char *dnet_dump_node(struct dnet_node *n)
492 static char buf[128];
494 return dnet_server_convert_dnet_addr_raw(&n->addr, buf, sizeof(buf));
497 struct dnet_trans;
498 int __attribute__((weak)) dnet_process_cmd_raw(struct dnet_net_state *st, struct dnet_cmd *cmd, void *data);
499 int dnet_process_recv(struct dnet_net_state *st, struct dnet_io_req *r);
501 int dnet_recv(struct dnet_net_state *st, void *data, unsigned int size);
502 int dnet_sendfile(struct dnet_net_state *st, int fd, uint64_t *offset, uint64_t size);
504 int dnet_send_request(struct dnet_net_state *st, struct dnet_io_req *r);
506 struct dnet_config;
507 int dnet_socket_create(struct dnet_node *n, struct dnet_config *cfg, struct dnet_addr *addr, int listening);
508 int dnet_socket_create_addr(struct dnet_node *n, int sock_type, int proto, int family,
509 struct sockaddr *sa, unsigned int salen, int listening);
511 void dnet_set_sockopt(int s);
512 void dnet_sock_close(int s);
514 enum dnet_join_state {
515 DNET_JOIN = 1, /* Node joined the network */
516 DNET_WANT_RECONNECT, /* State must be reconnected, when remote peer failed */
519 int __attribute__((weak)) dnet_state_join_nolock(struct dnet_net_state *st);
521 struct dnet_trans
523 struct rb_node trans_entry;
524 struct list_head trans_list_entry;
526 struct timeval time, start;
528 struct dnet_net_state *orig; /* only for forward */
530 struct dnet_net_state *st;
531 uint64_t trans, rcv_trans;
532 struct dnet_cmd cmd;
534 atomic_t refcnt;
536 int command; /* main command this transaction carries */
538 void *priv;
539 int (* complete)(struct dnet_net_state *st,
540 struct dnet_cmd *cmd,
541 void *priv);
544 void dnet_trans_destroy(struct dnet_trans *t);
545 struct dnet_trans *dnet_trans_alloc(struct dnet_node *n, uint64_t size);
546 int dnet_trans_alloc_send_state(struct dnet_net_state *st, struct dnet_trans_control *ctl);
547 int dnet_trans_timer_setup(struct dnet_trans *t);
549 static inline struct dnet_trans *dnet_trans_get(struct dnet_trans *t)
551 atomic_inc(&t->refcnt);
552 return t;
555 static inline void dnet_trans_put(struct dnet_trans *t)
557 if (t && atomic_dec_and_test(&t->refcnt))
558 dnet_trans_destroy(t);
561 int dnet_trans_insert_nolock(struct rb_root *root, struct dnet_trans *a);
562 void dnet_trans_remove(struct dnet_trans *t);
563 void dnet_trans_remove_nolock(struct rb_root *root, struct dnet_trans *t);
564 struct dnet_trans *dnet_trans_search(struct rb_root *root, uint64_t trans);
566 int dnet_trans_send(struct dnet_trans *t, struct dnet_io_req *req);
568 int dnet_trans_create_send_all(struct dnet_node *n, struct dnet_io_control *ctl);
570 int dnet_recv_list(struct dnet_node *n, struct dnet_net_state *st);
572 ssize_t dnet_send_fd(struct dnet_net_state *st, void *header, uint64_t hsize,
573 int fd, uint64_t offset, uint64_t dsize, int close_on_exit);
574 ssize_t dnet_send_data(struct dnet_net_state *st, void *header, uint64_t hsize, void *data, uint64_t dsize);
575 ssize_t dnet_send(struct dnet_net_state *st, void *data, uint64_t size);
576 ssize_t dnet_send_nolock(struct dnet_net_state *st, void *data, uint64_t size);
578 struct dnet_io_completion
580 struct dnet_wait *wait;
581 char *file;
582 uint64_t offset;
585 struct dnet_addr_storage
587 int reconnect_time, reconnect_time_max;
588 struct list_head reconnect_entry;
589 struct dnet_addr addr;
590 unsigned int __join_state;
594 * Returns true if t1 is before than t2.
596 static inline int dnet_time_before(struct timespec *t1, struct timespec *t2)
598 if ((long)(t1->tv_sec - t2->tv_sec) < 0)
599 return 1;
601 if ((long)(t2->tv_sec - t1->tv_sec) < 0)
602 return 0;
604 return ((long)(t1->tv_nsec - t2->tv_nsec) < 0);
606 #define dnet_time_after(t2, t1) dnet_time_before(t1, t2)
608 int dnet_check_thread_start(struct dnet_node *n);
609 void dnet_check_thread_stop(struct dnet_node *n);
610 int dnet_try_reconnect(struct dnet_node *n);
612 #define DNET_CHECK_TYPE_COPIES_HISTORY 1
613 #define DNET_CHECK_TYPE_COPIES_FULL 2
614 #define DNET_CHECK_TYPE_MERGE 3
615 #define DNET_CHECK_TYPE_DELETE 4
617 #define DNET_BULK_IDS_SIZE 1000
618 #define DNET_BULK_CHECK_PING 100
619 #define DNET_BULK_STATES_ALLOC_STEP 10
620 #define DNET_BULK_META_UPD_SIZE 1000
622 struct dnet_bulk_id
624 struct dnet_raw_id id;
625 struct dnet_meta_update last_update;
626 } __attribute__ ((packed));
628 struct dnet_bulk_state
630 struct dnet_addr addr;
631 pthread_mutex_t state_lock;
632 int num;
633 struct dnet_bulk_id *ids;
636 struct dnet_bulk_array
638 int num;
639 struct dnet_bulk_state *states;
640 atomic_t refcnt;
643 static inline int dnet_compare_bulk_state(const void *k1, const void *k2)
645 const struct dnet_bulk_state *st1 = (const struct dnet_bulk_state *)k1;
646 const struct dnet_bulk_state *st2 = (const struct dnet_bulk_state *)k2;
648 if (st1->addr.addr_len > st2->addr.addr_len)
649 return 1;
650 if (st1->addr.addr_len < st2->addr.addr_len)
651 return -1;
652 return memcmp(st1->addr.addr, st2->addr.addr, st1->addr.addr_len);
655 struct dnet_check_temp_db {
656 struct eblob_backend *b;
657 struct eblob_log log;
658 atomic_t refcnt;
661 struct dnet_check_params {
662 struct dnet_check_temp_db *db;
663 int group_num;
664 int *groups;
668 static inline struct dnet_check_temp_db * dnet_check_temp_db_get(struct dnet_check_temp_db *db) {
669 atomic_inc(&db->refcnt);
670 return db;
673 static inline void dnet_check_temp_db_put(struct dnet_check_temp_db *db) {
674 if (atomic_dec_and_test(&db->refcnt)) {
675 eblob_remove_blobs(db->b);
676 eblob_cleanup(db->b);
677 free(db);
681 int dnet_check(struct dnet_node *n, struct dnet_meta_container *mc, struct dnet_bulk_array *bulk_array, int check_copies, struct dnet_check_params *params);
682 int dnet_db_list(struct dnet_net_state *st, struct dnet_cmd *cmd);
683 int dnet_cmd_bulk_check(struct dnet_net_state *orig, struct dnet_cmd *cmd, void *data);
684 int dnet_request_bulk_check(struct dnet_node *n, struct dnet_bulk_state *state, struct dnet_check_params *params);
686 struct dnet_meta_update * dnet_get_meta_update(struct dnet_node *n, struct dnet_meta_container *mc,
687 struct dnet_meta_update *meta_update);
689 int dnet_update_ts_metadata(struct eblob_backend *b, struct dnet_raw_id *id, uint64_t flags_set, uint64_t flags_clear);
690 int dnet_update_ts_metadata_raw(struct dnet_meta_container *mc, uint64_t flags_set, uint64_t flags_clear);
692 int dnet_process_meta(struct dnet_net_state *st, struct dnet_cmd *cmd, struct dnet_io_attr *io);
693 void dnet_convert_metadata(struct dnet_node *n __unused, void *data, int size);
695 void dnet_monitor_exit(struct dnet_node *n);
696 int dnet_monitor_init(struct dnet_node *n, struct dnet_config *cfg);
698 int dnet_set_name(char *name);
699 int dnet_ioprio_set(long pid, int class_id, int prio);
700 int dnet_ioprio_get(long pid);
702 struct dnet_map_fd {
703 int fd;
704 uint64_t offset, size;
706 void *data;
708 uint64_t mapped_size;
709 void *mapped_data;
712 int dnet_data_map(struct dnet_map_fd *map);
713 void dnet_data_unmap(struct dnet_map_fd *map);
715 void *dnet_read_data_wait_raw(struct dnet_node *n, struct dnet_id *id, struct dnet_io_attr *io, int cmd, uint64_t cflags, int *errp);
717 int dnet_srw_init(struct dnet_node *n, struct dnet_config *cfg);
718 void dnet_srw_cleanup(struct dnet_node *n);
719 int dnet_cmd_exec_raw(struct dnet_net_state *st, struct dnet_cmd *cmd, struct sph *header, const void *data);
721 int dnet_cache_init(struct dnet_node *n);
722 void dnet_cache_cleanup(struct dnet_node *n);
723 int dnet_cmd_cache_io(struct dnet_net_state *st, struct dnet_cmd *cmd, struct dnet_io_attr *io, char *data);
725 int __attribute__((weak)) dnet_remove_local(struct dnet_node *n, struct dnet_id *id);
727 #ifdef __cplusplus
729 #endif
731 #endif /* __DNET_ELLIPTICS_H */