2 * (C) 2001 Clemson University and The University of Chicago
4 * See COPYING in top-level directory.
7 #include "orangefs-kernel.h"
8 #include "orangefs-bufmap.h"
17 static struct slot_map rw_map
= {
19 .q
= __WAIT_QUEUE_HEAD_INITIALIZER(rw_map
.q
)
21 static struct slot_map readdir_map
= {
23 .q
= __WAIT_QUEUE_HEAD_INITIALIZER(readdir_map
.q
)
27 static void install(struct slot_map
*m
, int count
, unsigned long *map
)
29 spin_lock(&m
->q
.lock
);
30 m
->c
= m
->count
= count
;
32 wake_up_all_locked(&m
->q
);
33 spin_unlock(&m
->q
.lock
);
36 static void mark_killed(struct slot_map
*m
)
38 spin_lock(&m
->q
.lock
);
40 spin_unlock(&m
->q
.lock
);
43 static void run_down(struct slot_map
*m
)
46 spin_lock(&m
->q
.lock
);
49 if (likely(list_empty(&wait
.task_list
)))
50 __add_wait_queue_tail(&m
->q
, &wait
);
51 set_current_state(TASK_UNINTERRUPTIBLE
);
56 spin_unlock(&m
->q
.lock
);
58 spin_lock(&m
->q
.lock
);
60 __remove_wait_queue(&m
->q
, &wait
);
61 __set_current_state(TASK_RUNNING
);
64 spin_unlock(&m
->q
.lock
);
67 static void put(struct slot_map
*m
, int slot
)
70 spin_lock(&m
->q
.lock
);
71 __clear_bit(slot
, m
->map
);
73 if (unlikely(v
== 1)) /* no free slots -> one free slot */
74 wake_up_locked(&m
->q
);
75 else if (unlikely(v
== -1)) /* finished dying */
76 wake_up_all_locked(&m
->q
);
77 spin_unlock(&m
->q
.lock
);
80 static int wait_for_free(struct slot_map
*m
)
82 long left
= slot_timeout_secs
* HZ
;
87 if (likely(list_empty(&wait
.task_list
)))
88 __add_wait_queue_tail_exclusive(&m
->q
, &wait
);
89 set_current_state(TASK_INTERRUPTIBLE
);
95 /* we are waiting for map to be installed */
96 /* it would better be there soon, or we go away */
97 if (n
> ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS
* HZ
)
98 n
= ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS
* HZ
;
100 spin_unlock(&m
->q
.lock
);
101 t
= schedule_timeout(n
);
102 spin_lock(&m
->q
.lock
);
103 if (unlikely(!t
) && n
!= left
&& m
->c
< 0)
106 left
= t
+ (left
- n
);
107 if (unlikely(signal_pending(current
)))
111 if (!list_empty(&wait
.task_list
))
112 list_del(&wait
.task_list
);
113 else if (left
<= 0 && waitqueue_active(&m
->q
))
114 __wake_up_locked_key(&m
->q
, TASK_INTERRUPTIBLE
, NULL
);
115 __set_current_state(TASK_RUNNING
);
117 if (likely(left
> 0))
120 return left
< 0 ? -EINTR
: -ETIMEDOUT
;
123 static int get(struct slot_map
*m
)
126 spin_lock(&m
->q
.lock
);
127 if (unlikely(m
->c
<= 0))
128 res
= wait_for_free(m
);
131 res
= find_first_zero_bit(m
->map
, m
->count
);
132 __set_bit(res
, m
->map
);
134 spin_unlock(&m
->q
.lock
);
138 /* used to describe mapped buffers */
139 struct orangefs_bufmap_desc
{
140 void *uaddr
; /* user space address pointer */
141 struct page
**page_array
; /* array of mapped pages */
142 int array_count
; /* size of above arrays */
143 struct list_head list_link
;
146 static struct orangefs_bufmap
{
153 struct page
**page_array
;
154 struct orangefs_bufmap_desc
*desc_array
;
156 /* array to track usage of buffer descriptors */
157 unsigned long *buffer_index_array
;
159 /* array to track usage of buffer descriptors for readdir */
160 #define N DIV_ROUND_UP(ORANGEFS_READDIR_DEFAULT_DESC_COUNT, BITS_PER_LONG)
161 unsigned long readdir_index_array
[N
];
163 } *__orangefs_bufmap
;
165 static DEFINE_SPINLOCK(orangefs_bufmap_lock
);
168 orangefs_bufmap_unmap(struct orangefs_bufmap
*bufmap
)
172 for (i
= 0; i
< bufmap
->page_count
; i
++)
173 put_page(bufmap
->page_array
[i
]);
177 orangefs_bufmap_free(struct orangefs_bufmap
*bufmap
)
179 kfree(bufmap
->page_array
);
180 kfree(bufmap
->desc_array
);
181 kfree(bufmap
->buffer_index_array
);
186 * XXX: Can the size and shift change while the caller gives up the
187 * XXX: lock between calling this and doing something useful?
190 int orangefs_bufmap_size_query(void)
192 struct orangefs_bufmap
*bufmap
;
194 spin_lock(&orangefs_bufmap_lock
);
195 bufmap
= __orangefs_bufmap
;
197 size
= bufmap
->desc_size
;
198 spin_unlock(&orangefs_bufmap_lock
);
202 int orangefs_bufmap_shift_query(void)
204 struct orangefs_bufmap
*bufmap
;
206 spin_lock(&orangefs_bufmap_lock
);
207 bufmap
= __orangefs_bufmap
;
209 shift
= bufmap
->desc_shift
;
210 spin_unlock(&orangefs_bufmap_lock
);
214 static DECLARE_WAIT_QUEUE_HEAD(bufmap_waitq
);
215 static DECLARE_WAIT_QUEUE_HEAD(readdir_waitq
);
218 * orangefs_get_bufmap_init
220 * If bufmap_init is 1, then the shared memory system, including the
221 * buffer_index_array, is available. Otherwise, it is not.
223 * returns the value of bufmap_init
225 int orangefs_get_bufmap_init(void)
227 return __orangefs_bufmap
? 1 : 0;
231 static struct orangefs_bufmap
*
232 orangefs_bufmap_alloc(struct ORANGEFS_dev_map_desc
*user_desc
)
234 struct orangefs_bufmap
*bufmap
;
236 bufmap
= kzalloc(sizeof(*bufmap
), GFP_KERNEL
);
240 bufmap
->total_size
= user_desc
->total_size
;
241 bufmap
->desc_count
= user_desc
->count
;
242 bufmap
->desc_size
= user_desc
->size
;
243 bufmap
->desc_shift
= ilog2(bufmap
->desc_size
);
245 bufmap
->buffer_index_array
=
246 kzalloc(DIV_ROUND_UP(bufmap
->desc_count
, BITS_PER_LONG
), GFP_KERNEL
);
247 if (!bufmap
->buffer_index_array
) {
248 gossip_err("orangefs: could not allocate %d buffer indices\n",
250 goto out_free_bufmap
;
254 kcalloc(bufmap
->desc_count
, sizeof(struct orangefs_bufmap_desc
),
256 if (!bufmap
->desc_array
) {
257 gossip_err("orangefs: could not allocate %d descriptors\n",
259 goto out_free_index_array
;
262 bufmap
->page_count
= bufmap
->total_size
/ PAGE_SIZE
;
264 /* allocate storage to track our page mappings */
266 kcalloc(bufmap
->page_count
, sizeof(struct page
*), GFP_KERNEL
);
267 if (!bufmap
->page_array
)
268 goto out_free_desc_array
;
273 kfree(bufmap
->desc_array
);
274 out_free_index_array
:
275 kfree(bufmap
->buffer_index_array
);
283 orangefs_bufmap_map(struct orangefs_bufmap
*bufmap
,
284 struct ORANGEFS_dev_map_desc
*user_desc
)
286 int pages_per_desc
= bufmap
->desc_size
/ PAGE_SIZE
;
287 int offset
= 0, ret
, i
;
290 ret
= get_user_pages_fast((unsigned long)user_desc
->ptr
,
291 bufmap
->page_count
, 1, bufmap
->page_array
);
296 if (ret
!= bufmap
->page_count
) {
297 gossip_err("orangefs error: asked for %d pages, only got %d.\n",
298 bufmap
->page_count
, ret
);
300 for (i
= 0; i
< ret
; i
++) {
301 SetPageError(bufmap
->page_array
[i
]);
302 put_page(bufmap
->page_array
[i
]);
308 * ideally we want to get kernel space pointers for each page, but
309 * we can't kmap that many pages at once if highmem is being used.
310 * so instead, we just kmap/kunmap the page address each time the
313 for (i
= 0; i
< bufmap
->page_count
; i
++)
314 flush_dcache_page(bufmap
->page_array
[i
]);
316 /* build a list of available descriptors */
317 for (offset
= 0, i
= 0; i
< bufmap
->desc_count
; i
++) {
318 bufmap
->desc_array
[i
].page_array
= &bufmap
->page_array
[offset
];
319 bufmap
->desc_array
[i
].array_count
= pages_per_desc
;
320 bufmap
->desc_array
[i
].uaddr
=
321 (user_desc
->ptr
+ (i
* pages_per_desc
* PAGE_SIZE
));
322 offset
+= pages_per_desc
;
329 * orangefs_bufmap_initialize()
331 * initializes the mapped buffer interface
333 * returns 0 on success, -errno on failure
335 int orangefs_bufmap_initialize(struct ORANGEFS_dev_map_desc
*user_desc
)
337 struct orangefs_bufmap
*bufmap
;
340 gossip_debug(GOSSIP_BUFMAP_DEBUG
,
341 "orangefs_bufmap_initialize: called (ptr ("
342 "%p) sz (%d) cnt(%d).\n",
348 * sanity check alignment and size of buffer that caller wants to
351 if (PAGE_ALIGN((unsigned long)user_desc
->ptr
) !=
352 (unsigned long)user_desc
->ptr
) {
353 gossip_err("orangefs error: memory alignment (front). %p\n",
358 if (PAGE_ALIGN(((unsigned long)user_desc
->ptr
+ user_desc
->total_size
))
359 != (unsigned long)(user_desc
->ptr
+ user_desc
->total_size
)) {
360 gossip_err("orangefs error: memory alignment (back).(%p + %d)\n",
362 user_desc
->total_size
);
366 if (user_desc
->total_size
!= (user_desc
->size
* user_desc
->count
)) {
367 gossip_err("orangefs error: user provided an oddly sized buffer: (%d, %d, %d)\n",
368 user_desc
->total_size
,
374 if ((user_desc
->size
% PAGE_SIZE
) != 0) {
375 gossip_err("orangefs error: bufmap size not page size divisible (%d).\n",
381 bufmap
= orangefs_bufmap_alloc(user_desc
);
385 ret
= orangefs_bufmap_map(bufmap
, user_desc
);
387 goto out_free_bufmap
;
390 spin_lock(&orangefs_bufmap_lock
);
391 if (__orangefs_bufmap
) {
392 spin_unlock(&orangefs_bufmap_lock
);
393 gossip_err("orangefs: error: bufmap already initialized.\n");
395 goto out_unmap_bufmap
;
397 __orangefs_bufmap
= bufmap
;
400 bufmap
->buffer_index_array
);
401 install(&readdir_map
,
402 ORANGEFS_READDIR_DEFAULT_DESC_COUNT
,
403 bufmap
->readdir_index_array
);
404 spin_unlock(&orangefs_bufmap_lock
);
406 gossip_debug(GOSSIP_BUFMAP_DEBUG
,
407 "orangefs_bufmap_initialize: exiting normally\n");
411 orangefs_bufmap_unmap(bufmap
);
413 orangefs_bufmap_free(bufmap
);
419 * orangefs_bufmap_finalize()
421 * shuts down the mapped buffer interface and releases any resources
426 void orangefs_bufmap_finalize(void)
428 struct orangefs_bufmap
*bufmap
= __orangefs_bufmap
;
431 gossip_debug(GOSSIP_BUFMAP_DEBUG
, "orangefs_bufmap_finalize: called\n");
432 mark_killed(&rw_map
);
433 mark_killed(&readdir_map
);
434 gossip_debug(GOSSIP_BUFMAP_DEBUG
,
435 "orangefs_bufmap_finalize: exiting normally\n");
438 void orangefs_bufmap_run_down(void)
440 struct orangefs_bufmap
*bufmap
= __orangefs_bufmap
;
444 run_down(&readdir_map
);
445 spin_lock(&orangefs_bufmap_lock
);
446 __orangefs_bufmap
= NULL
;
447 spin_unlock(&orangefs_bufmap_lock
);
448 orangefs_bufmap_unmap(bufmap
);
449 orangefs_bufmap_free(bufmap
);
453 * orangefs_bufmap_get()
455 * gets a free mapped buffer descriptor, will sleep until one becomes
456 * available if necessary
458 * returns slot on success, -errno on failure
460 int orangefs_bufmap_get(void)
466 * orangefs_bufmap_put()
468 * returns a mapped buffer descriptor to the collection
472 void orangefs_bufmap_put(int buffer_index
)
474 put(&rw_map
, buffer_index
);
478 * orangefs_readdir_index_get()
480 * gets a free descriptor, will sleep until one becomes
481 * available if necessary.
482 * Although the readdir buffers are not mapped into kernel space
483 * we could do that at a later point of time. Regardless, these
484 * indices are used by the client-core.
486 * returns slot on success, -errno on failure
488 int orangefs_readdir_index_get(void)
490 return get(&readdir_map
);
493 void orangefs_readdir_index_put(int buffer_index
)
495 put(&readdir_map
, buffer_index
);
499 * we've been handed an iovec, we need to copy it to
500 * the shared memory descriptor at "buffer_index".
502 int orangefs_bufmap_copy_from_iovec(struct iov_iter
*iter
,
506 struct orangefs_bufmap_desc
*to
;
509 gossip_debug(GOSSIP_BUFMAP_DEBUG
,
510 "%s: buffer_index:%d: size:%zu:\n",
511 __func__
, buffer_index
, size
);
513 to
= &__orangefs_bufmap
->desc_array
[buffer_index
];
514 for (i
= 0; size
; i
++) {
515 struct page
*page
= to
->page_array
[i
];
519 n
= copy_page_from_iter(page
, 0, n
, iter
);
529 * we've been handed an iovec, we need to fill it from
530 * the shared memory descriptor at "buffer_index".
532 int orangefs_bufmap_copy_to_iovec(struct iov_iter
*iter
,
536 struct orangefs_bufmap_desc
*from
;
539 from
= &__orangefs_bufmap
->desc_array
[buffer_index
];
540 gossip_debug(GOSSIP_BUFMAP_DEBUG
,
541 "%s: buffer_index:%d: size:%zu:\n",
542 __func__
, buffer_index
, size
);
545 for (i
= 0; size
; i
++) {
546 struct page
*page
= from
->page_array
[i
];
550 n
= copy_page_to_iter(page
, 0, n
, iter
);