2 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
6 * This file contains code imported from the OFED rds source file message.c
7 * Oracle elects to have and use the contents of message.c under and governed
8 * by the OpenIB.org BSD license (see below for full license text). However,
9 * the following notice accompanied the original version of this file:
13 * Copyright (c) 2006 Oracle. All rights reserved.
15 * This software is available to you under a choice of one of two
16 * licenses. You may choose to be licensed under the terms of the GNU
17 * General Public License (GPL) Version 2, available from the file
18 * COPYING in the main directory of this source tree, or the
19 * OpenIB.org BSD license below:
21 * Redistribution and use in source and binary forms, with or
22 * without modification, are permitted provided that the following
25 * - Redistributions of source code must retain the above
26 * copyright notice, this list of conditions and the following
29 * - Redistributions in binary form must reproduce the above
30 * copyright notice, this list of conditions and the following
31 * disclaimer in the documentation and/or other materials
32 * provided with the distribution.
34 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
35 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
36 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
37 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
38 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
39 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
40 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
45 #include <sys/containerof.h>
47 #include <sys/ib/clients/rdsv3/rdsv3.h>
48 #include <sys/ib/clients/rdsv3/rdma.h>
49 #include <sys/ib/clients/rdsv3/rdsv3_debug.h>
52 static unsigned int rdsv3_exthdr_size
[__RDSV3_EXTHDR_MAX
] = {
53 [RDSV3_EXTHDR_NONE
] = 0,
54 [RDSV3_EXTHDR_VERSION
] = sizeof (struct rdsv3_ext_header_version
),
55 [RDSV3_EXTHDR_RDMA
] = sizeof (struct rdsv3_ext_header_rdma
),
56 [RDSV3_EXTHDR_RDMA_DEST
] = sizeof (struct rdsv3_ext_header_rdma_dest
),
59 static unsigned int rdsv3_exthdr_size
[__RDSV3_EXTHDR_MAX
] = {
61 sizeof (struct rdsv3_ext_header_version
),
62 sizeof (struct rdsv3_ext_header_rdma
),
63 sizeof (struct rdsv3_ext_header_rdma_dest
),
68 rdsv3_message_addref(struct rdsv3_message
*rm
)
70 RDSV3_DPRINTF5("rdsv3_message_addref", "addref rm %p ref %d",
71 rm
, atomic_get(&rm
->m_refcount
));
72 atomic_inc_32(&rm
->m_refcount
);
76 * This relies on dma_map_sg() not touching sg[].page during merging.
79 rdsv3_message_purge(struct rdsv3_message
*rm
)
83 RDSV3_DPRINTF4("rdsv3_message_purge", "Enter(rm: %p)", rm
);
85 if (test_bit(RDSV3_MSG_PAGEVEC
, &rm
->m_flags
))
88 for (i
= 0; i
< rm
->m_nents
; i
++) {
89 RDSV3_DPRINTF5("rdsv3_message_purge", "putting data page %p\n",
90 (void *)rdsv3_sg_page(&rm
->m_sg
[i
]));
91 /* XXX will have to put_page for page refs */
92 kmem_free(rdsv3_sg_page(&rm
->m_sg
[i
]),
93 rdsv3_sg_len(&rm
->m_sg
[i
]));
97 rdsv3_rdma_free_op(rm
->m_rdma_op
);
99 struct rdsv3_mr
*mr
= rm
->m_rdma_mr
;
100 if (mr
->r_refcount
== 0) {
101 RDSV3_DPRINTF4("rdsv3_message_purge ASSERT 0",
102 "rm %p mr %p", rm
, mr
);
105 if (mr
->r_refcount
== 0xdeadbeef) {
106 RDSV3_DPRINTF4("rdsv3_message_purge ASSERT deadbeef",
107 "rm %p mr %p", rm
, mr
);
110 if (atomic_dec_and_test(&mr
->r_refcount
)) {
111 rm
->m_rdma_mr
= NULL
;
112 __rdsv3_put_mr_final(mr
);
116 RDSV3_DPRINTF4("rdsv3_message_purge", "Return(rm: %p)", rm
);
121 rdsv3_message_put(struct rdsv3_message
*rm
)
123 RDSV3_DPRINTF5("rdsv3_message_put",
124 "put rm %p ref %d\n", rm
, atomic_get(&rm
->m_refcount
));
126 if (atomic_dec_and_test(&rm
->m_refcount
)) {
127 ASSERT(!list_link_active(&rm
->m_sock_item
));
128 ASSERT(!list_link_active(&rm
->m_conn_item
));
129 rdsv3_message_purge(rm
);
131 kmem_free(rm
, sizeof (struct rdsv3_message
) +
132 (rm
->m_nents
* sizeof (struct rdsv3_scatterlist
)));
137 rdsv3_message_inc_free(struct rdsv3_incoming
*inc
)
139 struct rdsv3_message
*rm
=
140 __containerof(inc
, struct rdsv3_message
, m_inc
);
141 rdsv3_message_put(rm
);
145 rdsv3_message_populate_header(struct rdsv3_header
*hdr
, uint16_be_t sport
,
146 uint16_be_t dport
, uint64_t seq
)
149 hdr
->h_sport
= sport
;
150 hdr
->h_dport
= dport
;
151 hdr
->h_sequence
= htonll(seq
);
152 hdr
->h_exthdr
[0] = RDSV3_EXTHDR_NONE
;
156 rdsv3_message_add_extension(struct rdsv3_header
*hdr
,
157 unsigned int type
, const void *data
, unsigned int len
)
159 unsigned int ext_len
= sizeof (uint8_t) + len
;
162 RDSV3_DPRINTF4("rdsv3_message_add_extension", "Enter");
164 /* For now, refuse to add more than one extension header */
165 if (hdr
->h_exthdr
[0] != RDSV3_EXTHDR_NONE
)
168 if (type
>= __RDSV3_EXTHDR_MAX
||
169 len
!= rdsv3_exthdr_size
[type
])
172 if (ext_len
>= RDSV3_HEADER_EXT_SPACE
)
177 (void) memcpy(dst
, data
, len
);
179 dst
[len
] = RDSV3_EXTHDR_NONE
;
181 RDSV3_DPRINTF4("rdsv3_message_add_extension", "Return");
186 * If a message has extension headers, retrieve them here.
189 * unsigned int pos = 0;
192 * buflen = sizeof(buffer);
193 * type = rdsv3_message_next_extension(hdr, &pos, buffer, &buflen);
194 * if (type == RDSV3_EXTHDR_NONE)
200 rdsv3_message_next_extension(struct rdsv3_header
*hdr
,
201 unsigned int *pos
, void *buf
, unsigned int *buflen
)
203 unsigned int offset
, ext_type
, ext_len
;
204 uint8_t *src
= hdr
->h_exthdr
;
206 RDSV3_DPRINTF4("rdsv3_message_next_extension", "Enter");
209 if (offset
>= RDSV3_HEADER_EXT_SPACE
)
213 * Get the extension type and length. For now, the
214 * length is implied by the extension type.
216 ext_type
= src
[offset
++];
218 if (ext_type
== RDSV3_EXTHDR_NONE
|| ext_type
>= __RDSV3_EXTHDR_MAX
)
220 ext_len
= rdsv3_exthdr_size
[ext_type
];
221 if (offset
+ ext_len
> RDSV3_HEADER_EXT_SPACE
)
224 *pos
= offset
+ ext_len
;
225 if (ext_len
< *buflen
)
227 (void) memcpy(buf
, src
+ offset
, *buflen
);
231 *pos
= RDSV3_HEADER_EXT_SPACE
;
233 return (RDSV3_EXTHDR_NONE
);
237 rdsv3_message_add_version_extension(struct rdsv3_header
*hdr
,
238 unsigned int version
)
240 struct rdsv3_ext_header_version ext_hdr
;
242 ext_hdr
.h_version
= htonl(version
);
243 return (rdsv3_message_add_extension(hdr
, RDSV3_EXTHDR_VERSION
,
244 &ext_hdr
, sizeof (ext_hdr
)));
248 rdsv3_message_get_version_extension(struct rdsv3_header
*hdr
,
249 unsigned int *version
)
251 struct rdsv3_ext_header_version ext_hdr
;
252 unsigned int pos
= 0, len
= sizeof (ext_hdr
);
254 RDSV3_DPRINTF4("rdsv3_message_get_version_extension", "Enter");
257 * We assume the version extension is the only one present
259 if (rdsv3_message_next_extension(hdr
, &pos
, &ext_hdr
, &len
) !=
260 RDSV3_EXTHDR_VERSION
)
262 *version
= ntohl(ext_hdr
.h_version
);
267 rdsv3_message_add_rdma_dest_extension(struct rdsv3_header
*hdr
, uint32_t r_key
,
270 struct rdsv3_ext_header_rdma_dest ext_hdr
;
272 ext_hdr
.h_rdma_rkey
= htonl(r_key
);
273 ext_hdr
.h_rdma_offset
= htonl(offset
);
274 return (rdsv3_message_add_extension(hdr
, RDSV3_EXTHDR_RDMA_DEST
,
275 &ext_hdr
, sizeof (ext_hdr
)));
278 struct rdsv3_message
*
279 rdsv3_message_alloc(unsigned int nents
, int gfp
)
281 struct rdsv3_message
*rm
;
283 RDSV3_DPRINTF4("rdsv3_message_alloc", "Enter(nents: %d)", nents
);
285 rm
= kmem_zalloc(sizeof (struct rdsv3_message
) +
286 (nents
* sizeof (struct rdsv3_scatterlist
)), gfp
);
291 list_link_init(&rm
->m_sock_item
);
292 list_link_init(&rm
->m_conn_item
);
293 mutex_init(&rm
->m_rs_lock
, NULL
, MUTEX_DRIVER
, NULL
);
294 rdsv3_init_waitqueue(&rm
->m_flush_wait
);
296 RDSV3_DPRINTF4("rdsv3_message_alloc", "Return(rm: %p)", rm
);
301 struct rdsv3_message
*
302 rdsv3_message_map_pages(unsigned long *page_addrs
, unsigned int total_len
)
304 struct rdsv3_message
*rm
;
307 RDSV3_DPRINTF4("rdsv3_message_map_pages", "Enter(len: %d)", total_len
);
310 rm
= rdsv3_message_alloc(ceil(total_len
, PAGE_SIZE
), KM_NOSLEEP
);
315 return (ERR_PTR(-ENOMEM
));
317 set_bit(RDSV3_MSG_PAGEVEC
, &rm
->m_flags
);
318 rm
->m_inc
.i_hdr
.h_len
= htonl(total_len
);
320 rm
->m_nents
= ceil(total_len
, PAGE_SIZE
);
325 for (i
= 0; i
< rm
->m_nents
; ++i
) {
326 rdsv3_sg_set_page(&rm
->m_sg
[i
],
334 struct rdsv3_message
*
335 rdsv3_message_copy_from_user(struct uio
*uiop
,
338 struct rdsv3_message
*rm
;
339 struct rdsv3_scatterlist
*sg
;
342 RDSV3_DPRINTF4("rdsv3_message_copy_from_user", "Enter: %d", total_len
);
345 rm
= rdsv3_message_alloc(ceil(total_len
, PAGE_SIZE
), KM_NOSLEEP
);
354 rm
->m_inc
.i_hdr
.h_len
= htonl(total_len
);
357 * now allocate and copy in the data payload.
362 if (rdsv3_sg_page(sg
) == NULL
) {
363 ret
= rdsv3_page_remainder_alloc(sg
, total_len
, 0);
369 ret
= uiomove(rdsv3_sg_page(sg
), rdsv3_sg_len(sg
), UIO_WRITE
,
372 RDSV3_DPRINTF2("rdsv3_message_copy_from_user",
378 total_len
-= rdsv3_sg_len(sg
);
385 rdsv3_message_put(rm
);
392 rdsv3_message_inc_copy_to_user(struct rdsv3_incoming
*inc
,
393 uio_t
*uiop
, size_t size
)
395 struct rdsv3_message
*rm
;
396 struct rdsv3_scatterlist
*sg
;
397 unsigned long to_copy
;
398 unsigned long vec_off
;
403 rm
= __containerof(inc
, struct rdsv3_message
, m_inc
);
404 len
= ntohl(rm
->m_inc
.i_hdr
.h_len
);
406 RDSV3_DPRINTF4("rdsv3_message_inc_copy_to_user",
407 "Enter(rm: %p, len: %d)", rm
, len
);
413 while (copied
< size
&& copied
< len
) {
415 to_copy
= min(len
- copied
, sg
->length
- vec_off
);
416 to_copy
= min(size
- copied
, to_copy
);
418 RDSV3_DPRINTF5("rdsv3_message_inc_copy_to_user",
419 "copying %lu bytes to user iov %p from sg [%p, %u] + %lu\n",
421 rdsv3_sg_page(sg
), sg
->length
, vec_off
);
423 ret
= uiomove(rdsv3_sg_page(sg
), to_copy
, UIO_READ
, uiop
);
430 if (vec_off
== sg
->length
) {
440 * If the message is still on the send queue, wait until the transport
441 * is done with it. This is particularly important for RDMA operations.
445 rdsv3_message_wait(struct rdsv3_message
*rm
)
447 rdsv3_wait_event(&rm
->m_flush_wait
,
448 !test_bit(RDSV3_MSG_MAPPED
, &rm
->m_flags
));
452 rdsv3_message_unmapped(struct rdsv3_message
*rm
)
454 clear_bit(RDSV3_MSG_MAPPED
, &rm
->m_flags
);
455 rdsv3_wake_up_all(&rm
->m_flush_wait
);