include/net/page_pool/helpers.h

   1 /* SPDX-License-Identifier: GPL-2.0
   2  *
   3  * page_pool/helpers.h
   4  *      Author: Jesper Dangaard Brouer <netoptimizer@brouer.com>
   5  *      Copyright (C) 2016 Red Hat, Inc.
   6  */
   7
   8 /**
   9  * DOC: page_pool allocator
  10  *
  11  * The page_pool allocator is optimized for recycling page or page fragment used
  12  * by skb packet and xdp frame.
  13  *
  14  * Basic use involves replacing any alloc_pages() calls with page_pool_alloc(),
  15  * which allocate memory with or without page splitting depending on the
  16  * requested memory size.
  17  *
  18  * If the driver knows that it always requires full pages or its allocations are
  19  * always smaller than half a page, it can use one of the more specific API
  20  * calls:
  21  *
  22  * 1. page_pool_alloc_pages(): allocate memory without page splitting when
  23  * driver knows that the memory it need is always bigger than half of the page
  24  * allocated from page pool. There is no cache line dirtying for 'struct page'
  25  * when a page is recycled back to the page pool.
  26  *
  27  * 2. page_pool_alloc_frag(): allocate memory with page splitting when driver
  28  * knows that the memory it need is always smaller than or equal to half of the
  29  * page allocated from page pool. Page splitting enables memory saving and thus
  30  * avoids TLB/cache miss for data access, but there also is some cost to
  31  * implement page splitting, mainly some cache line dirtying/bouncing for
  32  * 'struct page' and atomic operation for page->pp_ref_count.
  33  *
  34  * The API keeps track of in-flight pages, in order to let API users know when
  35  * it is safe to free a page_pool object, the API users must call
  36  * page_pool_put_page() or page_pool_free_va() to free the page_pool object, or
  37  * attach the page_pool object to a page_pool-aware object like skbs marked with
  38  * skb_mark_for_recycle().
  39  *
  40  * page_pool_put_page() may be called multiple times on the same page if a page
  41  * is split into multiple fragments. For the last fragment, it will either
  42  * recycle the page, or in case of page->_refcount > 1, it will release the DMA
  43  * mapping and in-flight state accounting.
  44  *
  45  * dma_sync_single_range_for_device() is only called for the last fragment when
  46  * page_pool is created with PP_FLAG_DMA_SYNC_DEV flag, so it depends on the
  47  * last freed fragment to do the sync_for_device operation for all fragments in
  48  * the same page when a page is split. The API user must setup pool->p.max_len
  49  * and pool->p.offset correctly and ensure that page_pool_put_page() is called
  50  * with dma_sync_size being -1 for fragment API.
  51  */
  52 #ifndef _NET_PAGE_POOL_HELPERS_H
  53 #define _NET_PAGE_POOL_HELPERS_H
  54
  55 #include <linux/dma-mapping.h>
  56
  57 #include <net/page_pool/types.h>
  58 #include <net/net_debug.h>
  59 #include <net/netmem.h>
  60
  61 #ifdef CONFIG_PAGE_POOL_STATS
  62 /* Deprecated driver-facing API, use netlink instead */
  63 int page_pool_ethtool_stats_get_count(void);
  64 u8 *page_pool_ethtool_stats_get_strings(u8 *data);
  65 u64 *page_pool_ethtool_stats_get(u64 *data, const void *stats);
  66
  67 bool page_pool_get_stats(const struct page_pool *pool,
  68                          struct page_pool_stats *stats);
  69 #else
  70 static inline int page_pool_ethtool_stats_get_count(void)
  71 {
  72         return 0;
  73 }
  74
  75 static inline u8 *page_pool_ethtool_stats_get_strings(u8 *data)
  76 {
  77         return data;
  78 }
  79
  80 static inline u64 *page_pool_ethtool_stats_get(u64 *data, const void *stats)
  81 {
  82         return data;
  83 }
  84 #endif
  85
  86 /**
  87  * page_pool_dev_alloc_pages() - allocate a page.
  88  * @pool:       pool from which to allocate
  89  *
  90  * Get a page from the page allocator or page_pool caches.
  91  */
  92 static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool)
  93 {
  94         gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN);
  95
  96         return page_pool_alloc_pages(pool, gfp);
  97 }
  98
  99 /**
 100  * page_pool_dev_alloc_frag() - allocate a page fragment.
 101  * @pool: pool from which to allocate
 102  * @offset: offset to the allocated page
 103  * @size: requested size
 104  *
 105  * Get a page fragment from the page allocator or page_pool caches.
 106  *
 107  * Return:
 108  * Return allocated page fragment, otherwise return NULL.
 109  */
 110 static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool,
 111                                                     unsigned int *offset,
 112                                                     unsigned int size)
 113 {
 114         gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN);
 115
 116         return page_pool_alloc_frag(pool, offset, size, gfp);
 117 }
 118
 119 static inline struct page *page_pool_alloc(struct page_pool *pool,
 120                                            unsigned int *offset,
 121                                            unsigned int *size, gfp_t gfp)
 122 {
 123         unsigned int max_size = PAGE_SIZE << pool->p.order;
 124         struct page *page;
 125
 126         if ((*size << 1) > max_size) {
 127                 *size = max_size;
 128                 *offset = 0;
 129                 return page_pool_alloc_pages(pool, gfp);
 130         }
 131
 132         page = page_pool_alloc_frag(pool, offset, *size, gfp);
 133         if (unlikely(!page))
 134                 return NULL;
 135
 136         /* There is very likely not enough space for another fragment, so append
 137          * the remaining size to the current fragment to avoid truesize
 138          * underestimate problem.
 139          */
 140         if (pool->frag_offset + *size > max_size) {
 141                 *size = max_size - *offset;
 142                 pool->frag_offset = max_size;
 143         }
 144
 145         return page;
 146 }
 147
 148 /**
 149  * page_pool_dev_alloc() - allocate a page or a page fragment.
 150  * @pool: pool from which to allocate
 151  * @offset: offset to the allocated page
 152  * @size: in as the requested size, out as the allocated size
 153  *
 154  * Get a page or a page fragment from the page allocator or page_pool caches
 155  * depending on the requested size in order to allocate memory with least memory
 156  * utilization and performance penalty.
 157  *
 158  * Return:
 159  * Return allocated page or page fragment, otherwise return NULL.
 160  */
 161 static inline struct page *page_pool_dev_alloc(struct page_pool *pool,
 162                                                unsigned int *offset,
 163                                                unsigned int *size)
 164 {
 165         gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN);
 166
 167         return page_pool_alloc(pool, offset, size, gfp);
 168 }
 169
 170 static inline void *page_pool_alloc_va(struct page_pool *pool,
 171                                        unsigned int *size, gfp_t gfp)
 172 {
 173         unsigned int offset;
 174         struct page *page;
 175
 176         /* Mask off __GFP_HIGHMEM to ensure we can use page_address() */
 177         page = page_pool_alloc(pool, &offset, size, gfp & ~__GFP_HIGHMEM);
 178         if (unlikely(!page))
 179                 return NULL;
 180
 181         return page_address(page) + offset;
 182 }
 183
 184 /**
 185  * page_pool_dev_alloc_va() - allocate a page or a page fragment and return its
 186  *                            va.
 187  * @pool: pool from which to allocate
 188  * @size: in as the requested size, out as the allocated size
 189  *
 190  * This is just a thin wrapper around the page_pool_alloc() API, and
 191  * it returns va of the allocated page or page fragment.
 192  *
 193  * Return:
 194  * Return the va for the allocated page or page fragment, otherwise return NULL.
 195  */
 196 static inline void *page_pool_dev_alloc_va(struct page_pool *pool,
 197                                            unsigned int *size)
 198 {
 199         gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN);
 200
 201         return page_pool_alloc_va(pool, size, gfp);
 202 }
 203
 204 /**
 205  * page_pool_get_dma_dir() - Retrieve the stored DMA direction.
 206  * @pool:       pool from which page was allocated
 207  *
 208  * Get the stored dma direction. A driver might decide to store this locally
 209  * and avoid the extra cache line from page_pool to determine the direction.
 210  */
 211 static inline enum dma_data_direction
 212 page_pool_get_dma_dir(const struct page_pool *pool)
 213 {
 214         return pool->p.dma_dir;
 215 }
 216
 217 static inline void page_pool_fragment_netmem(netmem_ref netmem, long nr)
 218 {
 219         atomic_long_set(netmem_get_pp_ref_count_ref(netmem), nr);
 220 }
 221
 222 /**
 223  * page_pool_fragment_page() - split a fresh page into fragments
 224  * @page:       page to split
 225  * @nr:         references to set
 226  *
 227  * pp_ref_count represents the number of outstanding references to the page,
 228  * which will be freed using page_pool APIs (rather than page allocator APIs
 229  * like put_page()). Such references are usually held by page_pool-aware
 230  * objects like skbs marked for page pool recycling.
 231  *
 232  * This helper allows the caller to take (set) multiple references to a
 233  * freshly allocated page. The page must be freshly allocated (have a
 234  * pp_ref_count of 1). This is commonly done by drivers and
 235  * "fragment allocators" to save atomic operations - either when they know
 236  * upfront how many references they will need; or to take MAX references and
 237  * return the unused ones with a single atomic dec(), instead of performing
 238  * multiple atomic inc() operations.
 239  */
 240 static inline void page_pool_fragment_page(struct page *page, long nr)
 241 {
 242         page_pool_fragment_netmem(page_to_netmem(page), nr);
 243 }
 244
 245 static inline long page_pool_unref_netmem(netmem_ref netmem, long nr)
 246 {
 247         atomic_long_t *pp_ref_count = netmem_get_pp_ref_count_ref(netmem);
 248         long ret;
 249
 250         /* If nr == pp_ref_count then we have cleared all remaining
 251          * references to the page:
 252          * 1. 'n == 1': no need to actually overwrite it.
 253          * 2. 'n != 1': overwrite it with one, which is the rare case
 254          *              for pp_ref_count draining.
 255          *
 256          * The main advantage to doing this is that not only we avoid a atomic
 257          * update, as an atomic_read is generally a much cheaper operation than
 258          * an atomic update, especially when dealing with a page that may be
 259          * referenced by only 2 or 3 users; but also unify the pp_ref_count
 260          * handling by ensuring all pages have partitioned into only 1 piece
 261          * initially, and only overwrite it when the page is partitioned into
 262          * more than one piece.
 263          */
 264         if (atomic_long_read(pp_ref_count) == nr) {
 265                 /* As we have ensured nr is always one for constant case using
 266                  * the BUILD_BUG_ON(), only need to handle the non-constant case
 267                  * here for pp_ref_count draining, which is a rare case.
 268                  */
 269                 BUILD_BUG_ON(__builtin_constant_p(nr) && nr != 1);
 270                 if (!__builtin_constant_p(nr))
 271                         atomic_long_set(pp_ref_count, 1);
 272
 273                 return 0;
 274         }
 275
 276         ret = atomic_long_sub_return(nr, pp_ref_count);
 277         WARN_ON(ret < 0);
 278
 279         /* We are the last user here too, reset pp_ref_count back to 1 to
 280          * ensure all pages have been partitioned into 1 piece initially,
 281          * this should be the rare case when the last two fragment users call
 282          * page_pool_unref_page() currently.
 283          */
 284         if (unlikely(!ret))
 285                 atomic_long_set(pp_ref_count, 1);
 286
 287         return ret;
 288 }
 289
 290 static inline long page_pool_unref_page(struct page *page, long nr)
 291 {
 292         return page_pool_unref_netmem(page_to_netmem(page), nr);
 293 }
 294
 295 static inline void page_pool_ref_netmem(netmem_ref netmem)
 296 {
 297         atomic_long_inc(&netmem_to_page(netmem)->pp_ref_count);
 298 }
 299
 300 static inline void page_pool_ref_page(struct page *page)
 301 {
 302         page_pool_ref_netmem(page_to_netmem(page));
 303 }
 304
 305 static inline bool page_pool_is_last_ref(netmem_ref netmem)
 306 {
 307         /* If page_pool_unref_page() returns 0, we were the last user */
 308         return page_pool_unref_netmem(netmem, 1) == 0;
 309 }
 310
 311 static inline void page_pool_put_netmem(struct page_pool *pool,
 312                                         netmem_ref netmem,
 313                                         unsigned int dma_sync_size,
 314                                         bool allow_direct)
 315 {
 316         /* When page_pool isn't compiled-in, net/core/xdp.c doesn't
 317          * allow registering MEM_TYPE_PAGE_POOL, but shield linker.
 318          */
 319 #ifdef CONFIG_PAGE_POOL
 320         if (!page_pool_is_last_ref(netmem))
 321                 return;
 322
 323         page_pool_put_unrefed_netmem(pool, netmem, dma_sync_size, allow_direct);
 324 #endif
 325 }
 326
 327 /**
 328  * page_pool_put_page() - release a reference to a page pool page
 329  * @pool:       pool from which page was allocated
 330  * @page:       page to release a reference on
 331  * @dma_sync_size: how much of the page may have been touched by the device
 332  * @allow_direct: released by the consumer, allow lockless caching
 333  *
 334  * The outcome of this depends on the page refcnt. If the driver bumps
 335  * the refcnt > 1 this will unmap the page. If the page refcnt is 1
 336  * the allocator owns the page and will try to recycle it in one of the pool
 337  * caches. If PP_FLAG_DMA_SYNC_DEV is set, the page will be synced for_device
 338  * using dma_sync_single_range_for_device().
 339  */
 340 static inline void page_pool_put_page(struct page_pool *pool,
 341                                       struct page *page,
 342                                       unsigned int dma_sync_size,
 343                                       bool allow_direct)
 344 {
 345         page_pool_put_netmem(pool, page_to_netmem(page), dma_sync_size,
 346                              allow_direct);
 347 }
 348
 349 static inline void page_pool_put_full_netmem(struct page_pool *pool,
 350                                              netmem_ref netmem,
 351                                              bool allow_direct)
 352 {
 353         page_pool_put_netmem(pool, netmem, -1, allow_direct);
 354 }
 355
 356 /**
 357  * page_pool_put_full_page() - release a reference on a page pool page
 358  * @pool:       pool from which page was allocated
 359  * @page:       page to release a reference on
 360  * @allow_direct: released by the consumer, allow lockless caching
 361  *
 362  * Similar to page_pool_put_page(), but will DMA sync the entire memory area
 363  * as configured in &page_pool_params.max_len.
 364  */
 365 static inline void page_pool_put_full_page(struct page_pool *pool,
 366                                            struct page *page, bool allow_direct)
 367 {
 368         page_pool_put_netmem(pool, page_to_netmem(page), -1, allow_direct);
 369 }
 370
 371 /**
 372  * page_pool_recycle_direct() - release a reference on a page pool page
 373  * @pool:       pool from which page was allocated
 374  * @page:       page to release a reference on
 375  *
 376  * Similar to page_pool_put_full_page() but caller must guarantee safe context
 377  * (e.g NAPI), since it will recycle the page directly into the pool fast cache.
 378  */
 379 static inline void page_pool_recycle_direct(struct page_pool *pool,
 380                                             struct page *page)
 381 {
 382         page_pool_put_full_page(pool, page, true);
 383 }
 384
 385 #define PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA     \
 386                 (sizeof(dma_addr_t) > sizeof(unsigned long))
 387
 388 /**
 389  * page_pool_free_va() - free a va into the page_pool
 390  * @pool: pool from which va was allocated
 391  * @va: va to be freed
 392  * @allow_direct: freed by the consumer, allow lockless caching
 393  *
 394  * Free a va allocated from page_pool_allo_va().
 395  */
 396 static inline void page_pool_free_va(struct page_pool *pool, void *va,
 397                                      bool allow_direct)
 398 {
 399         page_pool_put_page(pool, virt_to_head_page(va), -1, allow_direct);
 400 }
 401
 402 static inline dma_addr_t page_pool_get_dma_addr_netmem(netmem_ref netmem)
 403 {
 404         dma_addr_t ret = netmem_get_dma_addr(netmem);
 405
 406         if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA)
 407                 ret <<= PAGE_SHIFT;
 408
 409         return ret;
 410 }
 411
 412 /**
 413  * page_pool_get_dma_addr() - Retrieve the stored DMA address.
 414  * @page:       page allocated from a page pool
 415  *
 416  * Fetch the DMA address of the page. The page pool to which the page belongs
 417  * must had been created with PP_FLAG_DMA_MAP.
 418  */
 419 static inline dma_addr_t page_pool_get_dma_addr(const struct page *page)
 420 {
 421         return page_pool_get_dma_addr_netmem(page_to_netmem((struct page *)page));
 422 }
 423
 424 /**
 425  * page_pool_dma_sync_for_cpu - sync Rx page for CPU after it's written by HW
 426  * @pool: &page_pool the @page belongs to
 427  * @page: page to sync
 428  * @offset: offset from page start to "hard" start if using PP frags
 429  * @dma_sync_size: size of the data written to the page
 430  *
 431  * Can be used as a shorthand to sync Rx pages before accessing them in the
 432  * driver. Caller must ensure the pool was created with ``PP_FLAG_DMA_MAP``.
 433  * Note that this version performs DMA sync unconditionally, even if the
 434  * associated PP doesn't perform sync-for-device.
 435  */
 436 static inline void page_pool_dma_sync_for_cpu(const struct page_pool *pool,
 437                                               const struct page *page,
 438                                               u32 offset, u32 dma_sync_size)
 439 {
 440         dma_sync_single_range_for_cpu(pool->p.dev,
 441                                       page_pool_get_dma_addr(page),
 442                                       offset + pool->p.offset, dma_sync_size,
 443                                       page_pool_get_dma_dir(pool));
 444 }
 445
 446 static inline bool page_pool_put(struct page_pool *pool)
 447 {
 448         return refcount_dec_and_test(&pool->user_cnt);
 449 }
 450
 451 static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid)
 452 {
 453         if (unlikely(pool->p.nid != new_nid))
 454                 page_pool_update_nid(pool, new_nid);
 455 }
 456
 457 #endif /* _NET_PAGE_POOL_HELPERS_H */