1 // SPDX-License-Identifier: GPL-2.0-or-later
5 * Authors: Mina Almasry <almasrymina@google.com>
6 * Willem de Bruijn <willemdebruijn.kernel@gmail.com>
7 * Kaiyuan Zhang <kaiyuanz@google.com
10 #include <linux/dma-buf.h>
11 #include <linux/ethtool_netlink.h>
12 #include <linux/genalloc.h>
14 #include <linux/netdevice.h>
15 #include <linux/types.h>
16 #include <net/netdev_queues.h>
17 #include <net/netdev_rx_queue.h>
18 #include <net/page_pool/helpers.h>
19 #include <trace/events/page_pool.h>
22 #include "mp_dmabuf_devmem.h"
23 #include "page_pool_priv.h"
25 /* Device memory support */
27 /* Protected by rtnl_lock() */
28 static DEFINE_XARRAY_FLAGS(net_devmem_dmabuf_bindings
, XA_FLAGS_ALLOC1
);
30 static void net_devmem_dmabuf_free_chunk_owner(struct gen_pool
*genpool
,
31 struct gen_pool_chunk
*chunk
,
34 struct dmabuf_genpool_chunk_owner
*owner
= chunk
->owner
;
40 static dma_addr_t
net_devmem_get_dma_addr(const struct net_iov
*niov
)
42 struct dmabuf_genpool_chunk_owner
*owner
= net_iov_owner(niov
);
44 return owner
->base_dma_addr
+
45 ((dma_addr_t
)net_iov_idx(niov
) << PAGE_SHIFT
);
48 void __net_devmem_dmabuf_binding_free(struct net_devmem_dmabuf_binding
*binding
)
52 gen_pool_for_each_chunk(binding
->chunk_pool
,
53 net_devmem_dmabuf_free_chunk_owner
, NULL
);
55 size
= gen_pool_size(binding
->chunk_pool
);
56 avail
= gen_pool_avail(binding
->chunk_pool
);
58 if (!WARN(size
!= avail
, "can't destroy genpool. size=%zu, avail=%zu",
60 gen_pool_destroy(binding
->chunk_pool
);
62 dma_buf_unmap_attachment_unlocked(binding
->attachment
, binding
->sgt
,
64 dma_buf_detach(binding
->dmabuf
, binding
->attachment
);
65 dma_buf_put(binding
->dmabuf
);
66 xa_destroy(&binding
->bound_rxqs
);
71 net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding
*binding
)
73 struct dmabuf_genpool_chunk_owner
*owner
;
74 unsigned long dma_addr
;
79 dma_addr
= gen_pool_alloc_owner(binding
->chunk_pool
, PAGE_SIZE
,
84 offset
= dma_addr
- owner
->base_dma_addr
;
85 index
= offset
/ PAGE_SIZE
;
86 niov
= &owner
->niovs
[index
];
90 atomic_long_set(&niov
->pp_ref_count
, 0);
95 void net_devmem_free_dmabuf(struct net_iov
*niov
)
97 struct net_devmem_dmabuf_binding
*binding
= net_iov_binding(niov
);
98 unsigned long dma_addr
= net_devmem_get_dma_addr(niov
);
100 if (WARN_ON(!gen_pool_has_addr(binding
->chunk_pool
, dma_addr
,
104 gen_pool_free(binding
->chunk_pool
, dma_addr
, PAGE_SIZE
);
107 void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding
*binding
)
109 struct netdev_rx_queue
*rxq
;
110 unsigned long xa_idx
;
111 unsigned int rxq_idx
;
113 if (binding
->list
.next
)
114 list_del(&binding
->list
);
116 xa_for_each(&binding
->bound_rxqs
, xa_idx
, rxq
) {
117 WARN_ON(rxq
->mp_params
.mp_priv
!= binding
);
119 rxq
->mp_params
.mp_priv
= NULL
;
121 rxq_idx
= get_netdev_rx_queue_index(rxq
);
123 WARN_ON(netdev_rx_queue_restart(binding
->dev
, rxq_idx
));
126 xa_erase(&net_devmem_dmabuf_bindings
, binding
->id
);
128 net_devmem_dmabuf_binding_put(binding
);
131 int net_devmem_bind_dmabuf_to_queue(struct net_device
*dev
, u32 rxq_idx
,
132 struct net_devmem_dmabuf_binding
*binding
,
133 struct netlink_ext_ack
*extack
)
135 struct netdev_rx_queue
*rxq
;
139 if (rxq_idx
>= dev
->real_num_rx_queues
) {
140 NL_SET_ERR_MSG(extack
, "rx queue index out of range");
144 if (dev
->cfg
->hds_config
!= ETHTOOL_TCP_DATA_SPLIT_ENABLED
) {
145 NL_SET_ERR_MSG(extack
, "tcp-data-split is disabled");
149 if (dev
->cfg
->hds_thresh
) {
150 NL_SET_ERR_MSG(extack
, "hds-thresh is not zero");
154 rxq
= __netif_get_rx_queue(dev
, rxq_idx
);
155 if (rxq
->mp_params
.mp_priv
) {
156 NL_SET_ERR_MSG(extack
, "designated queue already memory provider bound");
160 #ifdef CONFIG_XDP_SOCKETS
162 NL_SET_ERR_MSG(extack
, "designated queue already in use by AF_XDP");
167 err
= xa_alloc(&binding
->bound_rxqs
, &xa_idx
, rxq
, xa_limit_32b
,
172 rxq
->mp_params
.mp_priv
= binding
;
174 err
= netdev_rx_queue_restart(dev
, rxq_idx
);
181 rxq
->mp_params
.mp_priv
= NULL
;
182 xa_erase(&binding
->bound_rxqs
, xa_idx
);
187 struct net_devmem_dmabuf_binding
*
188 net_devmem_bind_dmabuf(struct net_device
*dev
, unsigned int dmabuf_fd
,
189 struct netlink_ext_ack
*extack
)
191 struct net_devmem_dmabuf_binding
*binding
;
192 static u32 id_alloc_next
;
193 struct scatterlist
*sg
;
194 struct dma_buf
*dmabuf
;
195 unsigned int sg_idx
, i
;
196 unsigned long virtual;
199 dmabuf
= dma_buf_get(dmabuf_fd
);
201 return ERR_CAST(dmabuf
);
203 binding
= kzalloc_node(sizeof(*binding
), GFP_KERNEL
,
204 dev_to_node(&dev
->dev
));
212 err
= xa_alloc_cyclic(&net_devmem_dmabuf_bindings
, &binding
->id
,
213 binding
, xa_limit_32b
, &id_alloc_next
,
216 goto err_free_binding
;
218 xa_init_flags(&binding
->bound_rxqs
, XA_FLAGS_ALLOC
);
220 refcount_set(&binding
->ref
, 1);
222 binding
->dmabuf
= dmabuf
;
224 binding
->attachment
= dma_buf_attach(binding
->dmabuf
, dev
->dev
.parent
);
225 if (IS_ERR(binding
->attachment
)) {
226 err
= PTR_ERR(binding
->attachment
);
227 NL_SET_ERR_MSG(extack
, "Failed to bind dmabuf to device");
231 binding
->sgt
= dma_buf_map_attachment_unlocked(binding
->attachment
,
233 if (IS_ERR(binding
->sgt
)) {
234 err
= PTR_ERR(binding
->sgt
);
235 NL_SET_ERR_MSG(extack
, "Failed to map dmabuf attachment");
239 /* For simplicity we expect to make PAGE_SIZE allocations, but the
240 * binding can be much more flexible than that. We may be able to
241 * allocate MTU sized chunks here. Leave that for future work...
243 binding
->chunk_pool
=
244 gen_pool_create(PAGE_SHIFT
, dev_to_node(&dev
->dev
));
245 if (!binding
->chunk_pool
) {
251 for_each_sgtable_dma_sg(binding
->sgt
, sg
, sg_idx
) {
252 dma_addr_t dma_addr
= sg_dma_address(sg
);
253 struct dmabuf_genpool_chunk_owner
*owner
;
254 size_t len
= sg_dma_len(sg
);
255 struct net_iov
*niov
;
257 owner
= kzalloc_node(sizeof(*owner
), GFP_KERNEL
,
258 dev_to_node(&dev
->dev
));
261 goto err_free_chunks
;
264 owner
->base_virtual
= virtual;
265 owner
->base_dma_addr
= dma_addr
;
266 owner
->num_niovs
= len
/ PAGE_SIZE
;
267 owner
->binding
= binding
;
269 err
= gen_pool_add_owner(binding
->chunk_pool
, dma_addr
,
270 dma_addr
, len
, dev_to_node(&dev
->dev
),
275 goto err_free_chunks
;
278 owner
->niovs
= kvmalloc_array(owner
->num_niovs
,
279 sizeof(*owner
->niovs
),
283 goto err_free_chunks
;
286 for (i
= 0; i
< owner
->num_niovs
; i
++) {
287 niov
= &owner
->niovs
[i
];
289 page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov
),
290 net_devmem_get_dma_addr(niov
));
299 gen_pool_for_each_chunk(binding
->chunk_pool
,
300 net_devmem_dmabuf_free_chunk_owner
, NULL
);
301 gen_pool_destroy(binding
->chunk_pool
);
303 dma_buf_unmap_attachment_unlocked(binding
->attachment
, binding
->sgt
,
306 dma_buf_detach(dmabuf
, binding
->attachment
);
308 xa_erase(&net_devmem_dmabuf_bindings
, binding
->id
);
316 void dev_dmabuf_uninstall(struct net_device
*dev
)
318 struct net_devmem_dmabuf_binding
*binding
;
319 struct netdev_rx_queue
*rxq
;
320 unsigned long xa_idx
;
323 for (i
= 0; i
< dev
->real_num_rx_queues
; i
++) {
324 binding
= dev
->_rx
[i
].mp_params
.mp_priv
;
328 xa_for_each(&binding
->bound_rxqs
, xa_idx
, rxq
)
329 if (rxq
== &dev
->_rx
[i
]) {
330 xa_erase(&binding
->bound_rxqs
, xa_idx
);
336 /*** "Dmabuf devmem memory provider" ***/
338 int mp_dmabuf_devmem_init(struct page_pool
*pool
)
340 struct net_devmem_dmabuf_binding
*binding
= pool
->mp_priv
;
345 /* dma-buf dma addresses do not need and should not be used with
346 * dma_sync_for_cpu/device. Force disable dma_sync.
348 pool
->dma_sync
= false;
349 pool
->dma_sync_for_cpu
= false;
351 if (pool
->p
.order
!= 0)
354 net_devmem_dmabuf_binding_get(binding
);
358 netmem_ref
mp_dmabuf_devmem_alloc_netmems(struct page_pool
*pool
, gfp_t gfp
)
360 struct net_devmem_dmabuf_binding
*binding
= pool
->mp_priv
;
361 struct net_iov
*niov
;
364 niov
= net_devmem_alloc_dmabuf(binding
);
368 netmem
= net_iov_to_netmem(niov
);
370 page_pool_set_pp_info(pool
, netmem
);
372 pool
->pages_state_hold_cnt
++;
373 trace_page_pool_state_hold(pool
, netmem
, pool
->pages_state_hold_cnt
);
377 void mp_dmabuf_devmem_destroy(struct page_pool
*pool
)
379 struct net_devmem_dmabuf_binding
*binding
= pool
->mp_priv
;
381 net_devmem_dmabuf_binding_put(binding
);
384 bool mp_dmabuf_devmem_release_page(struct page_pool
*pool
, netmem_ref netmem
)
386 long refcount
= atomic_long_read(netmem_get_pp_ref_count_ref(netmem
));
388 if (WARN_ON_ONCE(!netmem_is_net_iov(netmem
)))
391 if (WARN_ON_ONCE(refcount
!= 1))
394 page_pool_clear_pp_info(netmem
);
396 net_devmem_free_dmabuf(netmem_to_net_iov(netmem
));
398 /* We don't want the page pool put_page()ing our net_iovs. */