1 // SPDX-License-Identifier: BSD-3-Clause
3 * Copyright (c) 2009-2012,2016,2023 Microsoft Corp.
4 * Copyright (c) 2012 NetApp Inc.
5 * Copyright (c) 2012 Citrix Inc.
11 #include <emmintrin.h>
12 #include <linux/limits.h>
20 #include "vmbus_bufring.h"
25 * Guarantees that operation reordering does not occur at compile time
26 * for operations directly before and after the barrier.
28 #define rte_compiler_barrier() ({ asm volatile ("" : : : "memory"); })
30 #define VMBUS_RQST_ERROR 0xFFFFFFFFFFFFFFFF
31 #define ALIGN(val, align) ((typeof(val))((val) & (~((typeof(val))((align) - 1)))))
33 void *vmbus_uio_map(int *fd
, int size
)
37 map
= mmap(NULL
, 2 * size
, PROT_READ
| PROT_WRITE
, MAP_SHARED
, *fd
, 0);
38 if (map
== MAP_FAILED
)
44 /* Increase bufring index by inc with wraparound */
45 static inline uint32_t vmbus_br_idxinc(uint32_t idx
, uint32_t inc
, uint32_t sz
)
54 void vmbus_br_setup(struct vmbus_br
*br
, void *buf
, unsigned int blen
)
57 br
->windex
= br
->vbr
->windex
;
58 br
->dsize
= blen
- sizeof(struct vmbus_bufring
);
61 static inline __always_inline
void
64 asm volatile("lock addl $0, -128(%%rsp); " ::: "memory");
68 rte_atomic32_cmpset(volatile uint32_t *dst
, uint32_t exp
, uint32_t src
)
72 asm volatile("lock ; "
73 "cmpxchgl %[src], %[dst];"
75 : [res
] "=a" (res
), /* output */
77 : [src
] "r" (src
), /* input */
80 : "memory"); /* no-clobber list */
84 static inline uint32_t
85 vmbus_txbr_copyto(const struct vmbus_br
*tbr
, uint32_t windex
,
86 const void *src0
, uint32_t cplen
)
88 uint8_t *br_data
= tbr
->vbr
->data
;
89 uint32_t br_dsize
= tbr
->dsize
;
90 const uint8_t *src
= src0
;
92 /* XXX use double mapping like Linux kernel? */
93 if (cplen
> br_dsize
- windex
) {
94 uint32_t fraglen
= br_dsize
- windex
;
96 /* Wrap-around detected */
97 memcpy(br_data
+ windex
, src
, fraglen
);
98 memcpy(br_data
, src
+ fraglen
, cplen
- fraglen
);
100 memcpy(br_data
+ windex
, src
, cplen
);
103 return vmbus_br_idxinc(windex
, cplen
, br_dsize
);
107 * Write scattered channel packet to TX bufring.
109 * The offset of this channel packet is written as a 64bits value
110 * immediately after this channel packet.
112 * The write goes through three stages:
113 * 1. Reserve space in ring buffer for the new data.
114 * Writer atomically moves priv_write_index.
115 * 2. Copy the new data into the ring.
116 * 3. Update the tail of the ring (visible to host) that indicates
117 * next read location. Writer updates write_index
120 vmbus_txbr_write(struct vmbus_br
*tbr
, const struct iovec iov
[], int iovlen
)
122 struct vmbus_bufring
*vbr
= tbr
->vbr
;
123 uint32_t ring_size
= tbr
->dsize
;
124 uint32_t old_windex
, next_windex
, windex
, total
;
125 uint64_t save_windex
;
129 for (i
= 0; i
< iovlen
; i
++)
130 total
+= iov
[i
].iov_len
;
131 total
+= sizeof(save_windex
);
133 /* Reserve space in ring */
137 /* Get current free location */
138 old_windex
= tbr
->windex
;
140 /* Prevent compiler reordering this with calculation */
141 rte_compiler_barrier();
143 avail
= vmbus_br_availwrite(tbr
, old_windex
);
145 /* If not enough space in ring, then tell caller. */
149 next_windex
= vmbus_br_idxinc(old_windex
, total
, ring_size
);
151 /* Atomic update of next write_index for other threads */
152 } while (!rte_atomic32_cmpset(&tbr
->windex
, old_windex
, next_windex
));
154 /* Space from old..new is now reserved */
156 for (i
= 0; i
< iovlen
; i
++)
157 windex
= vmbus_txbr_copyto(tbr
, windex
, iov
[i
].iov_base
, iov
[i
].iov_len
);
159 /* Set the offset of the current channel packet. */
160 save_windex
= ((uint64_t)old_windex
) << 32;
161 windex
= vmbus_txbr_copyto(tbr
, windex
, &save_windex
,
162 sizeof(save_windex
));
164 /* The region reserved should match region used */
165 if (windex
!= next_windex
)
168 /* Ensure that data is available before updating host index */
169 rte_compiler_barrier();
171 /* Checkin for our reservation. wait for our turn to update host */
172 while (!rte_atomic32_cmpset(&vbr
->windex
, old_windex
, next_windex
))
178 int rte_vmbus_chan_send(struct vmbus_br
*txbr
, uint16_t type
, void *data
,
179 uint32_t dlen
, uint32_t flags
)
181 struct vmbus_chanpkt pkt
;
182 unsigned int pktlen
, pad_pktlen
;
183 const uint32_t hlen
= sizeof(pkt
);
188 pktlen
= hlen
+ dlen
;
189 pad_pktlen
= ALIGN(pktlen
, sizeof(uint64_t));
192 pkt
.hdr
.flags
= flags
;
193 pkt
.hdr
.hlen
= hlen
>> VMBUS_CHANPKT_SIZE_SHIFT
;
194 pkt
.hdr
.tlen
= pad_pktlen
>> VMBUS_CHANPKT_SIZE_SHIFT
;
195 pkt
.hdr
.xactid
= VMBUS_RQST_ERROR
;
197 iov
[0].iov_base
= &pkt
;
198 iov
[0].iov_len
= hlen
;
199 iov
[1].iov_base
= data
;
200 iov
[1].iov_len
= dlen
;
201 iov
[2].iov_base
= &pad
;
202 iov
[2].iov_len
= pad_pktlen
- pktlen
;
204 error
= vmbus_txbr_write(txbr
, iov
, 3);
209 static inline uint32_t
210 vmbus_rxbr_copyfrom(const struct vmbus_br
*rbr
, uint32_t rindex
,
211 void *dst0
, size_t cplen
)
213 const uint8_t *br_data
= rbr
->vbr
->data
;
214 uint32_t br_dsize
= rbr
->dsize
;
217 if (cplen
> br_dsize
- rindex
) {
218 uint32_t fraglen
= br_dsize
- rindex
;
220 /* Wrap-around detected. */
221 memcpy(dst
, br_data
+ rindex
, fraglen
);
222 memcpy(dst
+ fraglen
, br_data
, cplen
- fraglen
);
224 memcpy(dst
, br_data
+ rindex
, cplen
);
227 return vmbus_br_idxinc(rindex
, cplen
, br_dsize
);
230 /* Copy data from receive ring but don't change index */
232 vmbus_rxbr_peek(const struct vmbus_br
*rbr
, void *data
, size_t dlen
)
237 * The requested data and the 64bits channel packet
238 * offset should be there at least.
240 avail
= vmbus_br_availread(rbr
);
241 if (avail
< dlen
+ sizeof(uint64_t))
244 vmbus_rxbr_copyfrom(rbr
, rbr
->vbr
->rindex
, data
, dlen
);
249 * Copy data from receive ring and change index
251 * We assume (dlen + skip) == sizeof(channel packet).
254 vmbus_rxbr_read(struct vmbus_br
*rbr
, void *data
, size_t dlen
, size_t skip
)
256 struct vmbus_bufring
*vbr
= rbr
->vbr
;
257 uint32_t br_dsize
= rbr
->dsize
;
260 if (vmbus_br_availread(rbr
) < dlen
+ skip
+ sizeof(uint64_t))
263 /* Record where host was when we started read (for debug) */
264 rbr
->windex
= rbr
->vbr
->windex
;
267 * Copy channel packet from RX bufring.
269 rindex
= vmbus_br_idxinc(rbr
->vbr
->rindex
, skip
, br_dsize
);
270 rindex
= vmbus_rxbr_copyfrom(rbr
, rindex
, data
, dlen
);
273 * Discard this channel packet's 64bits offset, which is useless to us.
275 rindex
= vmbus_br_idxinc(rindex
, sizeof(uint64_t), br_dsize
);
277 /* Update the read index _after_ the channel packet is fetched. */
278 rte_compiler_barrier();
280 vbr
->rindex
= rindex
;
285 int rte_vmbus_chan_recv_raw(struct vmbus_br
*rxbr
,
286 void *data
, uint32_t *len
)
288 struct vmbus_chanpkt_hdr pkt
;
289 uint32_t dlen
, bufferlen
= *len
;
292 error
= vmbus_rxbr_peek(rxbr
, &pkt
, sizeof(pkt
));
296 if (unlikely(pkt
.hlen
< VMBUS_CHANPKT_HLEN_MIN
))
297 /* XXX this channel is dead actually. */
300 if (unlikely(pkt
.hlen
> pkt
.tlen
))
303 /* Length are in quad words */
304 dlen
= pkt
.tlen
<< VMBUS_CHANPKT_SIZE_SHIFT
;
307 /* If caller buffer is not large enough */
308 if (unlikely(dlen
> bufferlen
))
311 /* Read data and skip packet header */
312 error
= vmbus_rxbr_read(rxbr
, data
, dlen
, 0);
316 /* Return the number of bytes read */
317 return dlen
+ sizeof(uint64_t);