1 // SPDX-License-Identifier: GPL-2.0
3 * Simple benchmark program that uses the various features of io_uring
4 * to provide fast random access to a device/file. It has various
5 * options that are control how we use io_uring, see the OPTIONS section
6 * below. This uses the raw io_uring interface.
8 * Copyright (C) 2018-2019 Jens Axboe
18 #include <sys/types.h>
20 #include <sys/ioctl.h>
21 #include <sys/syscall.h>
22 #include <sys/resource.h>
35 #define min(a, b) ((a < b) ? (a) : (b))
41 unsigned *ring_entries
;
50 unsigned *ring_entries
;
51 struct io_uring_cqe
*cqes
;
56 #define BATCH_SUBMIT 32
57 #define BATCH_COMPLETE 32
63 static unsigned sq_ring_mask
, cq_ring_mask
;
66 unsigned long max_blocks
;
75 struct drand48_data rand
;
76 struct io_sq_ring sq_ring
;
77 struct io_uring_sqe
*sqes
;
78 struct iovec iovecs
[DEPTH
];
79 struct io_cq_ring cq_ring
;
88 struct file files
[MAX_FDS
];
93 static struct submitter submitters
[1];
94 static volatile int finish
;
97 * OPTIONS: Set these to test the various features of io_uring.
99 static int polled
= 1; /* use IO polling */
100 static int fixedbufs
= 1; /* use fixed user buffers */
101 static int register_files
= 1; /* use fixed files */
102 static int buffered
= 0; /* use buffered IO, not O_DIRECT */
103 static int sq_thread_poll
= 0; /* use kernel submission/poller thread */
104 static int sq_thread_cpu
= -1; /* pin above thread to this CPU */
105 static int do_nop
= 0; /* no-op SQ ring commands */
107 static int io_uring_register_buffers(struct submitter
*s
)
112 return io_uring_register(s
->ring_fd
, IORING_REGISTER_BUFFERS
, s
->iovecs
,
116 static int io_uring_register_files(struct submitter
*s
)
123 s
->fds
= calloc(s
->nr_files
, sizeof(__s32
));
124 for (i
= 0; i
< s
->nr_files
; i
++) {
125 s
->fds
[i
] = s
->files
[i
].real_fd
;
126 s
->files
[i
].fixed_fd
= i
;
129 return io_uring_register(s
->ring_fd
, IORING_REGISTER_FILES
, s
->fds
,
133 static int gettid(void)
135 return syscall(__NR_gettid
);
138 static unsigned file_depth(struct submitter
*s
)
140 return (DEPTH
+ s
->nr_files
- 1) / s
->nr_files
;
143 static void init_io(struct submitter
*s
, unsigned index
)
145 struct io_uring_sqe
*sqe
= &s
->sqes
[index
];
146 unsigned long offset
;
151 sqe
->opcode
= IORING_OP_NOP
;
155 if (s
->nr_files
== 1) {
158 f
= &s
->files
[s
->cur_file
];
159 if (f
->pending_ios
>= file_depth(s
)) {
161 if (s
->cur_file
== s
->nr_files
)
163 f
= &s
->files
[s
->cur_file
];
168 lrand48_r(&s
->rand
, &r
);
169 offset
= (r
% (f
->max_blocks
- 1)) * BS
;
171 if (register_files
) {
172 sqe
->flags
= IOSQE_FIXED_FILE
;
173 sqe
->fd
= f
->fixed_fd
;
176 sqe
->fd
= f
->real_fd
;
179 sqe
->opcode
= IORING_OP_READ_FIXED
;
180 sqe
->addr
= (unsigned long) s
->iovecs
[index
].iov_base
;
182 sqe
->buf_index
= index
;
184 sqe
->opcode
= IORING_OP_READV
;
185 sqe
->addr
= (unsigned long) &s
->iovecs
[index
];
191 sqe
->user_data
= (unsigned long) f
;
194 static int prep_more_ios(struct submitter
*s
, unsigned max_ios
)
196 struct io_sq_ring
*ring
= &s
->sq_ring
;
197 unsigned index
, tail
, next_tail
, prepped
= 0;
199 next_tail
= tail
= *ring
->tail
;
203 if (next_tail
== *ring
->head
)
206 index
= tail
& sq_ring_mask
;
208 ring
->array
[index
] = index
;
211 } while (prepped
< max_ios
);
213 if (*ring
->tail
!= tail
) {
214 /* order tail store with writes to sqes above */
222 static int get_file_size(struct file
*f
)
226 if (fstat(f
->real_fd
, &st
) < 0)
228 if (S_ISBLK(st
.st_mode
)) {
229 unsigned long long bytes
;
231 if (ioctl(f
->real_fd
, BLKGETSIZE64
, &bytes
) != 0)
234 f
->max_blocks
= bytes
/ BS
;
236 } else if (S_ISREG(st
.st_mode
)) {
237 f
->max_blocks
= st
.st_size
/ BS
;
244 static int reap_events(struct submitter
*s
)
246 struct io_cq_ring
*ring
= &s
->cq_ring
;
247 struct io_uring_cqe
*cqe
;
248 unsigned head
, reaped
= 0;
255 if (head
== *ring
->tail
)
257 cqe
= &ring
->cqes
[head
& cq_ring_mask
];
259 f
= (struct file
*) (uintptr_t) cqe
->user_data
;
261 if (cqe
->res
!= BS
) {
262 printf("io: unexpected ret=%d\n", cqe
->res
);
263 if (polled
&& cqe
->res
== -EOPNOTSUPP
)
264 printf("Your filesystem doesn't support poll\n");
272 s
->inflight
-= reaped
;
278 static void *submitter_fn(void *data
)
280 struct submitter
*s
= data
;
281 struct io_sq_ring
*ring
= &s
->sq_ring
;
284 printf("submitter=%d\n", gettid());
286 srand48_r(pthread_self(), &s
->rand
);
290 int to_wait
, to_submit
, this_reap
, to_prep
;
292 if (!prepped
&& s
->inflight
< DEPTH
) {
293 to_prep
= min(DEPTH
- s
->inflight
, BATCH_SUBMIT
);
294 prepped
= prep_more_ios(s
, to_prep
);
296 s
->inflight
+= prepped
;
300 if (to_submit
&& (s
->inflight
+ to_submit
<= DEPTH
))
303 to_wait
= min(s
->inflight
+ to_submit
, BATCH_COMPLETE
);
306 * Only need to call io_uring_enter if we're not using SQ thread
307 * poll, or if IORING_SQ_NEED_WAKEUP is set.
309 if (!sq_thread_poll
|| (*ring
->flags
& IORING_SQ_NEED_WAKEUP
)) {
313 flags
= IORING_ENTER_GETEVENTS
;
314 if ((*ring
->flags
& IORING_SQ_NEED_WAKEUP
))
315 flags
|= IORING_ENTER_SQ_WAKEUP
;
316 ret
= io_uring_enter(s
->ring_fd
, to_submit
, to_wait
,
322 * For non SQ thread poll, we already got the events we needed
323 * through the io_uring_enter() above. For SQ thread poll, we
324 * need to loop here until we find enough events.
335 } while (sq_thread_poll
&& this_reap
< to_wait
);
336 s
->reaps
+= this_reap
;
344 } else if (ret
< to_submit
) {
345 int diff
= to_submit
- ret
;
354 } else if (ret
< 0) {
355 if (errno
== EAGAIN
) {
363 printf("io_submit: %s\n", strerror(errno
));
366 } while (!s
->finish
);
372 static void sig_int(int sig
)
374 printf("Exiting on signal %d\n", sig
);
375 submitters
[0].finish
= 1;
379 static void arm_sig_int(void)
381 struct sigaction act
;
383 memset(&act
, 0, sizeof(act
));
384 act
.sa_handler
= sig_int
;
385 act
.sa_flags
= SA_RESTART
;
386 sigaction(SIGINT
, &act
, NULL
);
389 static int setup_ring(struct submitter
*s
)
391 struct io_sq_ring
*sring
= &s
->sq_ring
;
392 struct io_cq_ring
*cring
= &s
->cq_ring
;
393 struct io_uring_params p
;
397 memset(&p
, 0, sizeof(p
));
399 if (polled
&& !do_nop
)
400 p
.flags
|= IORING_SETUP_IOPOLL
;
401 if (sq_thread_poll
) {
402 p
.flags
|= IORING_SETUP_SQPOLL
;
403 if (sq_thread_cpu
!= -1) {
404 p
.flags
|= IORING_SETUP_SQ_AFF
;
405 p
.sq_thread_cpu
= sq_thread_cpu
;
409 fd
= io_uring_setup(DEPTH
, &p
);
411 perror("io_uring_setup");
417 ret
= io_uring_register_buffers(s
);
419 perror("io_uring_register_buffers");
424 if (register_files
) {
425 ret
= io_uring_register_files(s
);
427 perror("io_uring_register_files");
432 ptr
= mmap(0, p
.sq_off
.array
+ p
.sq_entries
* sizeof(__u32
),
433 PROT_READ
| PROT_WRITE
, MAP_SHARED
| MAP_POPULATE
, fd
,
435 printf("sq_ring ptr = 0x%p\n", ptr
);
436 sring
->head
= ptr
+ p
.sq_off
.head
;
437 sring
->tail
= ptr
+ p
.sq_off
.tail
;
438 sring
->ring_mask
= ptr
+ p
.sq_off
.ring_mask
;
439 sring
->ring_entries
= ptr
+ p
.sq_off
.ring_entries
;
440 sring
->flags
= ptr
+ p
.sq_off
.flags
;
441 sring
->array
= ptr
+ p
.sq_off
.array
;
442 sq_ring_mask
= *sring
->ring_mask
;
444 s
->sqes
= mmap(0, p
.sq_entries
* sizeof(struct io_uring_sqe
),
445 PROT_READ
| PROT_WRITE
, MAP_SHARED
| MAP_POPULATE
, fd
,
447 printf("sqes ptr = 0x%p\n", s
->sqes
);
449 ptr
= mmap(0, p
.cq_off
.cqes
+ p
.cq_entries
* sizeof(struct io_uring_cqe
),
450 PROT_READ
| PROT_WRITE
, MAP_SHARED
| MAP_POPULATE
, fd
,
452 printf("cq_ring ptr = 0x%p\n", ptr
);
453 cring
->head
= ptr
+ p
.cq_off
.head
;
454 cring
->tail
= ptr
+ p
.cq_off
.tail
;
455 cring
->ring_mask
= ptr
+ p
.cq_off
.ring_mask
;
456 cring
->ring_entries
= ptr
+ p
.cq_off
.ring_entries
;
457 cring
->cqes
= ptr
+ p
.cq_off
.cqes
;
458 cq_ring_mask
= *cring
->ring_mask
;
462 static void file_depths(char *buf
)
464 struct submitter
*s
= &submitters
[0];
470 for (i
= 0; i
< s
->nr_files
; i
++) {
471 struct file
*f
= &s
->files
[i
];
473 if (i
+ 1 == s
->nr_files
)
474 p
+= sprintf(p
, "%d", f
->pending_ios
);
476 p
+= sprintf(p
, "%d, ", f
->pending_ios
);
480 int main(int argc
, char *argv
[])
482 struct submitter
*s
= &submitters
[0];
483 unsigned long done
, calls
, reap
;
484 int err
, i
, flags
, fd
;
488 if (!do_nop
&& argc
< 2) {
489 printf("%s: filename\n", argv
[0]);
493 flags
= O_RDONLY
| O_NOATIME
;
498 while (!do_nop
&& i
< argc
) {
501 if (s
->nr_files
== MAX_FDS
) {
502 printf("Max number of files (%d) reached\n", MAX_FDS
);
505 fd
= open(argv
[i
], flags
);
511 f
= &s
->files
[s
->nr_files
];
513 if (get_file_size(f
)) {
514 printf("failed getting size of device/file\n");
517 if (f
->max_blocks
<= 1) {
518 printf("Zero file/device size?\n");
523 printf("Added file %s\n", argv
[i
]);
531 rlim
.rlim_cur
= RLIM_INFINITY
;
532 rlim
.rlim_max
= RLIM_INFINITY
;
533 if (setrlimit(RLIMIT_MEMLOCK
, &rlim
) < 0) {
541 for (i
= 0; i
< DEPTH
; i
++) {
544 if (posix_memalign(&buf
, BS
, BS
)) {
545 printf("failed alloc\n");
548 s
->iovecs
[i
].iov_base
= buf
;
549 s
->iovecs
[i
].iov_len
= BS
;
554 printf("ring setup failed: %s, %d\n", strerror(errno
), err
);
557 printf("polled=%d, fixedbufs=%d, buffered=%d", polled
, fixedbufs
, buffered
);
558 printf(" QD=%d, sq_ring=%d, cq_ring=%d\n", DEPTH
, *s
->sq_ring
.ring_entries
, *s
->cq_ring
.ring_entries
);
560 pthread_create(&s
->thread
, NULL
, submitter_fn
, s
);
562 fdepths
= malloc(8 * s
->nr_files
);
563 reap
= calls
= done
= 0;
565 unsigned long this_done
= 0;
566 unsigned long this_reap
= 0;
567 unsigned long this_call
= 0;
568 unsigned long rpc
= 0, ipc
= 0;
571 this_done
+= s
->done
;
572 this_call
+= s
->calls
;
573 this_reap
+= s
->reaps
;
574 if (this_call
- calls
) {
575 rpc
= (this_done
- done
) / (this_call
- calls
);
576 ipc
= (this_reap
- reap
) / (this_call
- calls
);
579 file_depths(fdepths
);
580 printf("IOPS=%lu, IOS/call=%ld/%ld, inflight=%u (%s)\n",
581 this_done
- done
, rpc
, ipc
, s
->inflight
,
588 pthread_join(s
->thread
, &ret
);