2 * Copyright (C) 2016 Red Hat, Inc.
3 * Author: Michael S. Tsirkin <mst@redhat.com>
4 * This work is licensed under the terms of the GNU GPL, version 2.
6 * Simple descriptor-based ring. virtio 0.9 compatible event index is used for
7 * signalling, unconditionally.
15 /* Next - Where next entry will be written.
16 * Prev - "Next" value when event triggered previously.
17 * Event - Peer requested event after writing this entry.
19 static inline bool need_event(unsigned short event
,
23 return (unsigned short)(next
- event
- 1) < (unsigned short)(next
- prev
);
27 * Guest adds descriptors with unique index values and DESC_HW in flags.
28 * Host overwrites used descriptors with correct len, index, and DESC_HW clear.
29 * Flags are always set last.
37 unsigned long long addr
;
40 /* how much padding is needed to avoid false cache sharing */
41 #define HOST_GUEST_PADDING 0x80
45 unsigned short kick_index
;
46 unsigned char reserved0
[HOST_GUEST_PADDING
- 2];
47 unsigned short call_index
;
48 unsigned char reserved1
[HOST_GUEST_PADDING
- 2];
52 void *buf
; /* descriptor is writeable, we can't get buf from there */
61 unsigned last_used_idx
;
63 unsigned kicked_avail_idx
;
64 unsigned char reserved
[HOST_GUEST_PADDING
- 12];
68 /* we do not need to track last avail index
69 * unless we have more than one in flight.
72 unsigned called_used_idx
;
73 unsigned char reserved
[HOST_GUEST_PADDING
- 4];
76 /* implemented by ring */
82 ret
= posix_memalign((void **)&ring
, 0x1000, ring_size
* sizeof *ring
);
84 perror("Unable to allocate ring buffer.\n");
87 event
= malloc(sizeof *event
);
89 perror("Unable to allocate event buffer.\n");
92 memset(event
, 0, sizeof *event
);
94 guest
.kicked_avail_idx
= -1;
95 guest
.last_used_idx
= 0;
97 host
.called_used_idx
= -1;
98 for (i
= 0; i
< ring_size
; ++i
) {
104 guest
.num_free
= ring_size
;
105 data
= malloc(ring_size
* sizeof *data
);
107 perror("Unable to allocate data buffer.\n");
110 memset(data
, 0, ring_size
* sizeof *data
);
114 int add_inbuf(unsigned len
, void *buf
, void *datap
)
116 unsigned head
, index
;
122 head
= (ring_size
- 1) & (guest
.avail_idx
++);
124 /* Start with a write. On MESI architectures this helps
125 * avoid a shared state with consumer that is polling this descriptor.
127 ring
[head
].addr
= (unsigned long)(void*)buf
;
128 ring
[head
].len
= len
;
129 /* read below might bypass write above. That is OK because it's just an
130 * optimization. If this happens, we will get the cache line in a
131 * shared state which is unfortunate, but probably not worth it to
132 * add an explicit full barrier to avoid this.
135 index
= ring
[head
].index
;
136 data
[index
].buf
= buf
;
137 data
[index
].data
= datap
;
138 /* Barrier A (for pairing) */
140 ring
[head
].flags
= DESC_HW
;
145 void *get_buf(unsigned *lenp
, void **bufp
)
147 unsigned head
= (ring_size
- 1) & guest
.last_used_idx
;
151 if (ring
[head
].flags
& DESC_HW
)
153 /* Barrier B (for pairing) */
155 *lenp
= ring
[head
].len
;
156 index
= ring
[head
].index
& (ring_size
- 1);
157 datap
= data
[index
].data
;
158 *bufp
= data
[index
].buf
;
159 data
[index
].buf
= NULL
;
160 data
[index
].data
= NULL
;
162 guest
.last_used_idx
++;
168 unsigned head
= (ring_size
- 1) & guest
.last_used_idx
;
170 while (ring
[head
].flags
& DESC_HW
)
176 /* Doing nothing to disable calls might cause
177 * extra interrupts, but reduces the number of cache misses.
183 unsigned head
= (ring_size
- 1) & guest
.last_used_idx
;
185 event
->call_index
= guest
.last_used_idx
;
186 /* Flush call index write */
187 /* Barrier D (for pairing) */
189 return ring
[head
].flags
& DESC_HW
;
192 void kick_available(void)
194 /* Flush in previous flags write */
195 /* Barrier C (for pairing) */
197 if (!need_event(event
->kick_index
,
199 guest
.kicked_avail_idx
))
202 guest
.kicked_avail_idx
= guest
.avail_idx
;
209 /* Doing nothing to disable kicks might cause
210 * extra interrupts, but reduces the number of cache misses.
216 unsigned head
= (ring_size
- 1) & host
.used_idx
;
218 event
->kick_index
= host
.used_idx
;
219 /* Barrier C (for pairing) */
221 return !(ring
[head
].flags
& DESC_HW
);
224 void poll_avail(void)
226 unsigned head
= (ring_size
- 1) & host
.used_idx
;
228 while (!(ring
[head
].flags
& DESC_HW
))
232 bool use_buf(unsigned *lenp
, void **bufp
)
234 unsigned head
= (ring_size
- 1) & host
.used_idx
;
236 if (!(ring
[head
].flags
& DESC_HW
))
239 /* make sure length read below is not speculated */
240 /* Barrier A (for pairing) */
243 /* simple in-order completion: we don't need
244 * to touch index at all. This also means we
245 * can just modify the descriptor in-place.
248 /* Make sure len is valid before flags.
249 * Note: alternative is to write len and flags in one access -
250 * possible on 64 bit architectures but wmb is free on Intel anyway
251 * so I have no way to test whether it's a gain.
253 /* Barrier B (for pairing) */
255 ring
[head
].flags
= 0;
262 /* Flush in previous flags write */
263 /* Barrier D (for pairing) */
265 if (!need_event(event
->call_index
,
267 host
.called_used_idx
))
270 host
.called_used_idx
= host
.used_idx
;