2 * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or any later version.
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 #include <gpxe/if_arp.h>
26 #include <gpxe/iobuf.h>
27 #include <gpxe/netdevice.h>
28 #include <gpxe/infiniband.h>
29 #include <gpxe/ipoib.h>
37 #define IPOIB_MTU 2048
39 /** Number of IPoIB data send work queue entries */
40 #define IPOIB_DATA_NUM_SEND_WQES 2
42 /** Number of IPoIB data receive work queue entries */
43 #define IPOIB_DATA_NUM_RECV_WQES 4
45 /** Number of IPoIB data completion entries */
46 #define IPOIB_DATA_NUM_CQES 8
48 /** Number of IPoIB metadata send work queue entries */
49 #define IPOIB_META_NUM_SEND_WQES 2
51 /** Number of IPoIB metadata receive work queue entries */
52 #define IPOIB_META_NUM_RECV_WQES 2
54 /** Number of IPoIB metadata completion entries */
55 #define IPOIB_META_NUM_CQES 8
57 /** An IPoIB queue set */
58 struct ipoib_queue_set
{
59 /** Completion queue */
60 struct ib_completion_queue
*cq
;
62 struct ib_queue_pair
*qp
;
63 /** Receive work queue fill level */
64 unsigned int recv_fill
;
65 /** Receive work queue maximum fill level */
66 unsigned int recv_max_fill
;
69 /** An IPoIB device */
72 struct net_device
*netdev
;
73 /** Underlying Infiniband device */
74 struct ib_device
*ibdev
;
76 struct ipoib_queue_set data
;
78 struct ipoib_queue_set meta
;
80 struct ib_gid broadcast_gid
;
82 unsigned int broadcast_lid
;
83 /** Joined to broadcast group */
86 unsigned long data_qkey
;
90 * IPoIB path cache entry
92 * This serves a similar role to the ARP cache for Ethernet. (ARP
93 * *is* used on IPoIB; we have two caches to maintain.)
95 struct ipoib_cached_path
{
96 /** Destination GID */
98 /** Destination LID */
106 /** Number of IPoIB path cache entries */
107 #define IPOIB_NUM_CACHED_PATHS 2
109 /** IPoIB path cache */
110 static struct ipoib_cached_path ipoib_path_cache
[IPOIB_NUM_CACHED_PATHS
];
112 /** Oldest IPoIB path cache entry index */
113 static unsigned int ipoib_path_cache_idx
= 0;
115 /** TID half used to identify get path record replies */
116 #define IPOIB_TID_GET_PATH_REC 0x11111111UL
118 /** TID half used to identify multicast member record replies */
119 #define IPOIB_TID_MC_MEMBER_REC 0x22222222UL
121 /** IPoIB metadata TID */
122 static uint32_t ipoib_meta_tid
= 0;
124 /** IPv4 broadcast GID */
125 static const struct ib_gid ipv4_broadcast_gid
= {
126 { { 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00,
127 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff } }
130 /** Maximum time we will wait for the broadcast join to succeed */
131 #define IPOIB_JOIN_MAX_DELAY_MS 1000
133 /****************************************************************************
137 ****************************************************************************
140 /** Broadcast QPN used in IPoIB MAC addresses
142 * This is a guaranteed invalid real QPN
144 #define IPOIB_BROADCAST_QPN 0xffffffffUL
146 /** Broadcast IPoIB address */
147 static struct ipoib_mac ipoib_broadcast
= {
148 .qpn
= ntohl ( IPOIB_BROADCAST_QPN
),
152 * Transmit IPoIB packet
154 * @v iobuf I/O buffer
155 * @v netdev Network device
156 * @v net_protocol Network-layer protocol
157 * @v ll_dest Link-layer destination address
159 * Prepends the IPoIB link-layer header and transmits the packet.
161 static int ipoib_tx ( struct io_buffer
*iobuf
, struct net_device
*netdev
,
162 struct net_protocol
*net_protocol
,
163 const void *ll_dest
) {
164 struct ipoib_hdr
*ipoib_hdr
=
165 iob_push ( iobuf
, sizeof ( *ipoib_hdr
) );
167 /* Build IPoIB header */
168 memcpy ( &ipoib_hdr
->pseudo
.peer
, ll_dest
,
169 sizeof ( ipoib_hdr
->pseudo
.peer
) );
170 ipoib_hdr
->real
.proto
= net_protocol
->net_proto
;
171 ipoib_hdr
->real
.reserved
= 0;
173 /* Hand off to network device */
174 return netdev_tx ( netdev
, iobuf
);
178 * Process received IPoIB packet
180 * @v iobuf I/O buffer
181 * @v netdev Network device
183 * Strips off the IPoIB link-layer header and passes up to the
184 * network-layer protocol.
186 static int ipoib_rx ( struct io_buffer
*iobuf
, struct net_device
*netdev
) {
187 struct ipoib_hdr
*ipoib_hdr
= iobuf
->data
;
190 if ( iob_len ( iobuf
) < sizeof ( *ipoib_hdr
) ) {
191 DBG ( "IPoIB packet too short for link-layer header\n" );
192 DBG_HD ( iobuf
->data
, iob_len ( iobuf
) );
197 /* Strip off IPoIB header */
198 iob_pull ( iobuf
, sizeof ( *ipoib_hdr
) );
200 /* Hand off to network-layer protocol */
201 return net_rx ( iobuf
, netdev
, ipoib_hdr
->real
.proto
,
202 &ipoib_hdr
->pseudo
.peer
);
206 * Transcribe IPoIB address
208 * @v ll_addr Link-layer address
209 * @ret string Link-layer address in human-readable format
211 const char * ipoib_ntoa ( const void *ll_addr
) {
213 const struct ipoib_mac
*mac
= ll_addr
;
215 snprintf ( buf
, sizeof ( buf
), "%08lx:%08lx:%08lx:%08lx:%08lx",
216 htonl ( mac
->qpn
), htonl ( mac
->gid
.u
.dwords
[0] ),
217 htonl ( mac
->gid
.u
.dwords
[1] ),
218 htonl ( mac
->gid
.u
.dwords
[2] ),
219 htonl ( mac
->gid
.u
.dwords
[3] ) );
223 /** IPoIB protocol */
224 struct ll_protocol ipoib_protocol __ll_protocol
= {
226 .ll_proto
= htons ( ARPHRD_INFINIBAND
),
227 .ll_addr_len
= IPOIB_ALEN
,
228 .ll_header_len
= IPOIB_HLEN
,
229 .ll_broadcast
= ( uint8_t * ) &ipoib_broadcast
,
235 /****************************************************************************
237 * IPoIB network device
239 ****************************************************************************
245 * @v ipoib IPoIB device
248 static void ipoib_destroy_qset ( struct ipoib_device
*ipoib
,
249 struct ipoib_queue_set
*qset
) {
250 struct ib_device
*ibdev
= ipoib
->ibdev
;
253 ib_destroy_qp ( ibdev
, qset
->qp
);
255 ib_destroy_cq ( ibdev
, qset
->cq
);
256 memset ( qset
, 0, sizeof ( *qset
) );
262 * @v ipoib IPoIB device
264 * @ret rc Return status code
266 static int ipoib_create_qset ( struct ipoib_device
*ipoib
,
267 struct ipoib_queue_set
*qset
,
268 unsigned int num_cqes
,
269 unsigned int num_send_wqes
,
270 unsigned int num_recv_wqes
,
271 unsigned long qkey
) {
272 struct ib_device
*ibdev
= ipoib
->ibdev
;
275 /* Store queue parameters */
276 qset
->recv_max_fill
= num_recv_wqes
;
278 /* Allocate completion queue */
279 qset
->cq
= ib_create_cq ( ibdev
, num_cqes
);
281 DBGC ( ipoib
, "IPoIB %p could not allocate completion queue\n",
287 /* Allocate queue pair */
288 qset
->qp
= ib_create_qp ( ibdev
, num_send_wqes
, qset
->cq
,
289 num_recv_wqes
, qset
->cq
, qkey
);
291 DBGC ( ipoib
, "IPoIB %p could not allocate queue pair\n",
296 ib_qp_set_ownerdata ( qset
->qp
, ipoib
->netdev
);
301 ipoib_destroy_qset ( ipoib
, qset
);
306 * Find path cache entry by GID
309 * @ret entry Path cache entry, or NULL
311 static struct ipoib_cached_path
*
312 ipoib_find_cached_path ( struct ib_gid
*gid
) {
313 struct ipoib_cached_path
*path
;
316 for ( i
= 0 ; i
< IPOIB_NUM_CACHED_PATHS
; i
++ ) {
317 path
= &ipoib_path_cache
[i
];
318 if ( memcmp ( &path
->gid
, gid
, sizeof ( *gid
) ) == 0 )
321 DBG ( "IPoIB %08lx:%08lx:%08lx:%08lx cache miss\n",
322 htonl ( gid
->u
.dwords
[0] ), htonl ( gid
->u
.dwords
[1] ),
323 htonl ( gid
->u
.dwords
[2] ), htonl ( gid
->u
.dwords
[3] ) );
328 * Transmit path record request
330 * @v ipoib IPoIB device
331 * @v gid Destination GID
332 * @ret rc Return status code
334 static int ipoib_get_path_record ( struct ipoib_device
*ipoib
,
335 struct ib_gid
*gid
) {
336 struct ib_device
*ibdev
= ipoib
->ibdev
;
337 struct io_buffer
*iobuf
;
338 struct ib_mad_path_record
*path_record
;
339 struct ib_address_vector av
;
342 /* Allocate I/O buffer */
343 iobuf
= alloc_iob ( sizeof ( *path_record
) );
346 iob_put ( iobuf
, sizeof ( *path_record
) );
347 path_record
= iobuf
->data
;
348 memset ( path_record
, 0, sizeof ( *path_record
) );
350 /* Construct path record request */
351 path_record
->mad_hdr
.base_version
= IB_MGMT_BASE_VERSION
;
352 path_record
->mad_hdr
.mgmt_class
= IB_MGMT_CLASS_SUBN_ADM
;
353 path_record
->mad_hdr
.class_version
= 2;
354 path_record
->mad_hdr
.method
= IB_MGMT_METHOD_GET
;
355 path_record
->mad_hdr
.attr_id
= htons ( IB_SA_ATTR_PATH_REC
);
356 path_record
->mad_hdr
.tid
[0] = IPOIB_TID_GET_PATH_REC
;
357 path_record
->mad_hdr
.tid
[1] = ipoib_meta_tid
++;
358 path_record
->sa_hdr
.comp_mask
[1] =
359 htonl ( IB_SA_PATH_REC_DGID
| IB_SA_PATH_REC_SGID
);
360 memcpy ( &path_record
->dgid
, gid
, sizeof ( path_record
->dgid
) );
361 memcpy ( &path_record
->sgid
, &ibdev
->port_gid
,
362 sizeof ( path_record
->sgid
) );
364 /* Construct address vector */
365 memset ( &av
, 0, sizeof ( av
) );
366 av
.dlid
= ibdev
->sm_lid
;
367 av
.dest_qp
= IB_SA_QPN
;
368 av
.qkey
= IB_GLOBAL_QKEY
;
370 /* Post send request */
371 if ( ( rc
= ib_post_send ( ibdev
, ipoib
->meta
.qp
, &av
,
373 DBGC ( ipoib
, "IPoIB %p could not send get path record: %s\n",
374 ipoib
, strerror ( rc
) );
383 * Transmit multicast group membership request
385 * @v ipoib IPoIB device
386 * @v gid Multicast GID
387 * @v join Join (rather than leave) group
388 * @ret rc Return status code
390 static int ipoib_mc_member_record ( struct ipoib_device
*ipoib
,
391 struct ib_gid
*gid
, int join
) {
392 struct ib_device
*ibdev
= ipoib
->ibdev
;
393 struct io_buffer
*iobuf
;
394 struct ib_mad_mc_member_record
*mc_member_record
;
395 struct ib_address_vector av
;
398 /* Allocate I/O buffer */
399 iobuf
= alloc_iob ( sizeof ( *mc_member_record
) );
402 iob_put ( iobuf
, sizeof ( *mc_member_record
) );
403 mc_member_record
= iobuf
->data
;
404 memset ( mc_member_record
, 0, sizeof ( *mc_member_record
) );
406 /* Construct path record request */
407 mc_member_record
->mad_hdr
.base_version
= IB_MGMT_BASE_VERSION
;
408 mc_member_record
->mad_hdr
.mgmt_class
= IB_MGMT_CLASS_SUBN_ADM
;
409 mc_member_record
->mad_hdr
.class_version
= 2;
410 mc_member_record
->mad_hdr
.method
=
411 ( join
? IB_MGMT_METHOD_SET
: IB_MGMT_METHOD_DELETE
);
412 mc_member_record
->mad_hdr
.attr_id
= htons ( IB_SA_ATTR_MC_MEMBER_REC
);
413 mc_member_record
->mad_hdr
.tid
[0] = IPOIB_TID_MC_MEMBER_REC
;
414 mc_member_record
->mad_hdr
.tid
[1] = ipoib_meta_tid
++;
415 mc_member_record
->sa_hdr
.comp_mask
[1] =
416 htonl ( IB_SA_MCMEMBER_REC_MGID
| IB_SA_MCMEMBER_REC_PORT_GID
|
417 IB_SA_MCMEMBER_REC_JOIN_STATE
);
418 mc_member_record
->scope__join_state
= 1;
419 memcpy ( &mc_member_record
->mgid
, gid
,
420 sizeof ( mc_member_record
->mgid
) );
421 memcpy ( &mc_member_record
->port_gid
, &ibdev
->port_gid
,
422 sizeof ( mc_member_record
->port_gid
) );
424 /* Construct address vector */
425 memset ( &av
, 0, sizeof ( av
) );
426 av
.dlid
= ibdev
->sm_lid
;
427 av
.dest_qp
= IB_SA_QPN
;
428 av
.qkey
= IB_GLOBAL_QKEY
;
430 /* Post send request */
431 if ( ( rc
= ib_post_send ( ibdev
, ipoib
->meta
.qp
, &av
,
433 DBGC ( ipoib
, "IPoIB %p could not send get path record: %s\n",
434 ipoib
, strerror ( rc
) );
443 * Transmit packet via IPoIB network device
445 * @v netdev Network device
446 * @v iobuf I/O buffer
447 * @ret rc Return status code
449 static int ipoib_transmit ( struct net_device
*netdev
,
450 struct io_buffer
*iobuf
) {
451 struct ipoib_device
*ipoib
= netdev
->priv
;
452 struct ib_device
*ibdev
= ipoib
->ibdev
;
453 struct ipoib_pseudo_hdr
*ipoib_pshdr
= iobuf
->data
;
454 struct ib_address_vector av
;
456 struct ipoib_cached_path
*path
;
460 if ( iob_len ( iobuf
) < sizeof ( *ipoib_pshdr
) ) {
461 DBGC ( ipoib
, "IPoIB %p buffer too short\n", ipoib
);
464 iob_pull ( iobuf
, ( sizeof ( *ipoib_pshdr
) ) );
466 /* Construct address vector */
467 memset ( &av
, 0, sizeof ( av
) );
468 av
.qkey
= IB_GLOBAL_QKEY
;
470 if ( ipoib_pshdr
->peer
.qpn
== htonl ( IPOIB_BROADCAST_QPN
) ) {
471 /* Broadcast address */
472 av
.dest_qp
= IB_BROADCAST_QPN
;
473 av
.dlid
= ipoib
->broadcast_lid
;
474 gid
= &ipoib
->broadcast_gid
;
476 /* Unicast - look in path cache */
477 path
= ipoib_find_cached_path ( &ipoib_pshdr
->peer
.gid
);
479 /* No path entry - get path record */
480 rc
= ipoib_get_path_record ( ipoib
,
481 &ipoib_pshdr
->peer
.gid
);
482 netdev_tx_complete ( netdev
, iobuf
);
485 av
.dest_qp
= ntohl ( ipoib_pshdr
->peer
.qpn
);
486 av
.dlid
= path
->dlid
;
487 av
.rate
= path
->rate
;
489 gid
= &ipoib_pshdr
->peer
.gid
;
491 memcpy ( &av
.gid
, gid
, sizeof ( av
.gid
) );
493 return ib_post_send ( ibdev
, ipoib
->data
.qp
, &av
, iobuf
);
497 * Handle IPoIB data send completion
499 * @v ibdev Infiniband device
501 * @v completion Completion
502 * @v iobuf I/O buffer
504 static void ipoib_data_complete_send ( struct ib_device
*ibdev __unused
,
505 struct ib_queue_pair
*qp
,
506 struct ib_completion
*completion
,
507 struct io_buffer
*iobuf
) {
508 struct net_device
*netdev
= ib_qp_get_ownerdata ( qp
);
510 netdev_tx_complete_err ( netdev
, iobuf
,
511 ( completion
->syndrome
? -EIO
: 0 ) );
515 * Handle IPoIB data receive completion
517 * @v ibdev Infiniband device
519 * @v completion Completion
520 * @v iobuf I/O buffer
522 static void ipoib_data_complete_recv ( struct ib_device
*ibdev __unused
,
523 struct ib_queue_pair
*qp
,
524 struct ib_completion
*completion
,
525 struct io_buffer
*iobuf
) {
526 struct net_device
*netdev
= ib_qp_get_ownerdata ( qp
);
527 struct ipoib_device
*ipoib
= netdev
->priv
;
528 struct ipoib_pseudo_hdr
*ipoib_pshdr
;
530 if ( completion
->syndrome
) {
531 netdev_rx_err ( netdev
, iobuf
, -EIO
);
535 iob_put ( iobuf
, completion
->len
);
536 if ( iob_len ( iobuf
) < sizeof ( struct ib_global_route_header
) ) {
537 DBGC ( ipoib
, "IPoIB %p received data packet too short to "
538 "contain GRH\n", ipoib
);
539 DBGC_HD ( ipoib
, iobuf
->data
, iob_len ( iobuf
) );
540 netdev_rx_err ( netdev
, iobuf
, -EIO
);
543 iob_pull ( iobuf
, sizeof ( struct ib_global_route_header
) );
545 if ( iob_len ( iobuf
) < sizeof ( struct ipoib_real_hdr
) ) {
546 DBGC ( ipoib
, "IPoIB %p received data packet too short to "
547 "contain IPoIB header\n", ipoib
);
548 DBGC_HD ( ipoib
, iobuf
->data
, iob_len ( iobuf
) );
549 netdev_rx_err ( netdev
, iobuf
, -EIO
);
553 ipoib_pshdr
= iob_push ( iobuf
, sizeof ( *ipoib_pshdr
) );
554 /* FIXME: fill in a MAC address for the sake of AoE! */
556 netdev_rx ( netdev
, iobuf
);
559 ipoib
->data
.recv_fill
--;
563 * Handle IPoIB metadata send completion
565 * @v ibdev Infiniband device
567 * @v completion Completion
568 * @v iobuf I/O buffer
570 static void ipoib_meta_complete_send ( struct ib_device
*ibdev __unused
,
571 struct ib_queue_pair
*qp
,
572 struct ib_completion
*completion
,
573 struct io_buffer
*iobuf
) {
574 struct net_device
*netdev
= ib_qp_get_ownerdata ( qp
);
575 struct ipoib_device
*ipoib
= netdev
->priv
;
577 if ( completion
->syndrome
) {
578 DBGC ( ipoib
, "IPoIB %p metadata TX completion error %x\n",
579 ipoib
, completion
->syndrome
);
585 * Handle received IPoIB path record
587 * @v ipoib IPoIB device
588 * @v path_record Path record
590 static void ipoib_recv_path_record ( struct ipoib_device
*ipoib __unused
,
591 struct ib_mad_path_record
*path_record
) {
592 struct ipoib_cached_path
*path
;
594 /* Update path cache entry */
595 path
= &ipoib_path_cache
[ipoib_path_cache_idx
];
596 memcpy ( &path
->gid
, &path_record
->dgid
, sizeof ( path
->gid
) );
597 path
->dlid
= ntohs ( path_record
->dlid
);
598 path
->sl
= ( path_record
->reserved__sl
& 0x0f );
599 path
->rate
= ( path_record
->rate_selector__rate
& 0x3f );
601 DBG ( "IPoIB %08lx:%08lx:%08lx:%08lx dlid %x sl %x rate %x\n",
602 htonl ( path
->gid
.u
.dwords
[0] ), htonl ( path
->gid
.u
.dwords
[1] ),
603 htonl ( path
->gid
.u
.dwords
[2] ), htonl ( path
->gid
.u
.dwords
[3] ),
604 path
->dlid
, path
->sl
, path
->rate
);
606 /* Update path cache index */
607 ipoib_path_cache_idx
++;
608 if ( ipoib_path_cache_idx
== IPOIB_NUM_CACHED_PATHS
)
609 ipoib_path_cache_idx
= 0;
613 * Handle received IPoIB multicast membership record
615 * @v ipoib IPoIB device
616 * @v mc_member_record Multicast membership record
618 static void ipoib_recv_mc_member_record ( struct ipoib_device
*ipoib
,
619 struct ib_mad_mc_member_record
*mc_member_record
) {
620 /* Record parameters */
621 ipoib
->broadcast_joined
=
622 ( mc_member_record
->scope__join_state
& 0x0f );
623 ipoib
->data_qkey
= ntohl ( mc_member_record
->qkey
);
624 ipoib
->broadcast_lid
= ntohs ( mc_member_record
->mlid
);
625 DBGC ( ipoib
, "IPoIB %p %s broadcast group: qkey %lx mlid %x\n",
626 ipoib
, ( ipoib
->broadcast_joined
? "joined" : "left" ),
627 ipoib
->data_qkey
, ipoib
->broadcast_lid
);
631 * Handle IPoIB metadata receive completion
633 * @v ibdev Infiniband device
635 * @v completion Completion
636 * @v iobuf I/O buffer
638 static void ipoib_meta_complete_recv ( struct ib_device
*ibdev __unused
,
639 struct ib_queue_pair
*qp
,
640 struct ib_completion
*completion
,
641 struct io_buffer
*iobuf
) {
642 struct net_device
*netdev
= ib_qp_get_ownerdata ( qp
);
643 struct ipoib_device
*ipoib
= netdev
->priv
;
646 if ( completion
->syndrome
) {
647 DBGC ( ipoib
, "IPoIB %p metadata RX completion error %x\n",
648 ipoib
, completion
->syndrome
);
652 iob_put ( iobuf
, completion
->len
);
653 if ( iob_len ( iobuf
) < sizeof ( struct ib_global_route_header
) ) {
654 DBGC ( ipoib
, "IPoIB %p received metadata packet too short "
655 "to contain GRH\n", ipoib
);
656 DBGC_HD ( ipoib
, iobuf
->data
, iob_len ( iobuf
) );
659 iob_pull ( iobuf
, sizeof ( struct ib_global_route_header
) );
660 if ( iob_len ( iobuf
) < sizeof ( *mad
) ) {
661 DBGC ( ipoib
, "IPoIB %p received metadata packet too short "
662 "to contain reply\n", ipoib
);
663 DBGC_HD ( ipoib
, iobuf
->data
, iob_len ( iobuf
) );
668 if ( mad
->mad_hdr
.status
!= 0 ) {
669 DBGC ( ipoib
, "IPoIB %p metadata RX err status %04x\n",
670 ipoib
, ntohs ( mad
->mad_hdr
.status
) );
674 switch ( mad
->mad_hdr
.tid
[0] ) {
675 case IPOIB_TID_GET_PATH_REC
:
676 ipoib_recv_path_record ( ipoib
, &mad
->path_record
);
678 case IPOIB_TID_MC_MEMBER_REC
:
679 ipoib_recv_mc_member_record ( ipoib
, &mad
->mc_member_record
);
682 DBGC ( ipoib
, "IPoIB %p unwanted response:\n",
684 DBGC_HD ( ipoib
, mad
, sizeof ( *mad
) );
689 ipoib
->meta
.recv_fill
--;
694 * Refill IPoIB receive ring
696 * @v ipoib IPoIB device
698 static void ipoib_refill_recv ( struct ipoib_device
*ipoib
,
699 struct ipoib_queue_set
*qset
) {
700 struct ib_device
*ibdev
= ipoib
->ibdev
;
701 struct io_buffer
*iobuf
;
704 while ( qset
->recv_fill
< qset
->recv_max_fill
) {
705 iobuf
= alloc_iob ( IPOIB_MTU
);
708 if ( ( rc
= ib_post_recv ( ibdev
, qset
->qp
, iobuf
) ) != 0 ) {
717 * Poll IPoIB network device
719 * @v netdev Network device
721 static void ipoib_poll ( struct net_device
*netdev
) {
722 struct ipoib_device
*ipoib
= netdev
->priv
;
723 struct ib_device
*ibdev
= ipoib
->ibdev
;
725 ib_poll_cq ( ibdev
, ipoib
->meta
.cq
, ipoib_meta_complete_send
,
726 ipoib_meta_complete_recv
);
727 ib_poll_cq ( ibdev
, ipoib
->data
.cq
, ipoib_data_complete_send
,
728 ipoib_data_complete_recv
);
729 ipoib_refill_recv ( ipoib
, &ipoib
->meta
);
730 ipoib_refill_recv ( ipoib
, &ipoib
->data
);
734 * Enable/disable interrupts on IPoIB network device
736 * @v netdev Network device
737 * @v enable Interrupts should be enabled
739 static void ipoib_irq ( struct net_device
*netdev __unused
,
740 int enable __unused
) {
741 /* No implementation */
745 * Open IPoIB network device
747 * @v netdev Network device
748 * @ret rc Return status code
750 static int ipoib_open ( struct net_device
*netdev
) {
751 struct ipoib_device
*ipoib
= netdev
->priv
;
752 struct ib_device
*ibdev
= ipoib
->ibdev
;
755 /* Attach to broadcast multicast GID */
756 if ( ( rc
= ib_mcast_attach ( ibdev
, ipoib
->data
.qp
,
757 &ipoib
->broadcast_gid
) ) != 0 ) {
758 DBG ( "Could not attach to broadcast GID: %s\n",
763 /* Fill receive rings */
764 ipoib_refill_recv ( ipoib
, &ipoib
->meta
);
765 ipoib_refill_recv ( ipoib
, &ipoib
->data
);
771 * Close IPoIB network device
773 * @v netdev Network device
775 static void ipoib_close ( struct net_device
*netdev
) {
776 struct ipoib_device
*ipoib
= netdev
->priv
;
777 struct ib_device
*ibdev
= ipoib
->ibdev
;
779 /* Detach from broadcast multicast GID */
780 ib_mcast_detach ( ibdev
, ipoib
->data
.qp
, &ipoib
->broadcast_gid
);
782 /* FIXME: should probably flush the receive ring */
785 /** IPoIB network device operations */
786 static struct net_device_operations ipoib_operations
= {
788 .close
= ipoib_close
,
789 .transmit
= ipoib_transmit
,
795 * Join IPoIB broadcast group
797 * @v ipoib IPoIB device
798 * @ret rc Return status code
800 static int ipoib_join_broadcast_group ( struct ipoib_device
*ipoib
) {
801 struct ib_device
*ibdev
= ipoib
->ibdev
;
802 unsigned int delay_ms
;
805 /* Make sure we have some receive descriptors */
806 ipoib_refill_recv ( ipoib
, &ipoib
->meta
);
808 /* Send join request */
809 if ( ( rc
= ipoib_mc_member_record ( ipoib
, &ipoib
->broadcast_gid
,
811 DBGC ( ipoib
, "IPoIB %p could not send broadcast join: %s\n",
812 ipoib
, strerror ( rc
) );
816 /* Wait for join to complete. Ideally we wouldn't delay for
817 * this long, but we need the queue key before we can set up
818 * the data queue pair, which we need before we can know the
821 for ( delay_ms
= IPOIB_JOIN_MAX_DELAY_MS
; delay_ms
; delay_ms
-- ) {
823 ib_poll_cq ( ibdev
, ipoib
->meta
.cq
, ipoib_meta_complete_send
,
824 ipoib_meta_complete_recv
);
825 ipoib_refill_recv ( ipoib
, &ipoib
->meta
);
826 if ( ipoib
->broadcast_joined
)
829 DBGC ( ipoib
, "IPoIB %p timed out waiting for broadcast join\n",
838 * @v ibdev Infiniband device
839 * @ret rc Return status code
841 int ipoib_probe ( struct ib_device
*ibdev
) {
842 struct net_device
*netdev
;
843 struct ipoib_device
*ipoib
;
844 struct ipoib_mac
*mac
;
847 /* Allocate network device */
848 netdev
= alloc_ipoibdev ( sizeof ( *ipoib
) );
851 netdev_init ( netdev
, &ipoib_operations
);
852 ipoib
= netdev
->priv
;
853 ib_set_ownerdata ( ibdev
, netdev
);
854 netdev
->dev
= ibdev
->dev
;
855 memset ( ipoib
, 0, sizeof ( *ipoib
) );
856 ipoib
->netdev
= netdev
;
857 ipoib
->ibdev
= ibdev
;
859 /* Calculate broadcast GID */
860 memcpy ( &ipoib
->broadcast_gid
, &ipv4_broadcast_gid
,
861 sizeof ( ipoib
->broadcast_gid
) );
862 ipoib
->broadcast_gid
.u
.words
[2] = htons ( ibdev
->pkey
);
864 /* Allocate metadata queue set */
865 if ( ( rc
= ipoib_create_qset ( ipoib
, &ipoib
->meta
,
867 IPOIB_META_NUM_SEND_WQES
,
868 IPOIB_META_NUM_RECV_WQES
,
869 IB_GLOBAL_QKEY
) ) != 0 ) {
870 DBGC ( ipoib
, "IPoIB %p could not allocate metadata QP: %s\n",
871 ipoib
, strerror ( rc
) );
872 goto err_create_meta_qset
;
875 /* Join broadcast group */
876 if ( ( rc
= ipoib_join_broadcast_group ( ipoib
) ) != 0 ) {
877 DBGC ( ipoib
, "IPoIB %p could not join broadcast group: %s\n",
878 ipoib
, strerror ( rc
) );
879 goto err_join_broadcast_group
;
882 /* Allocate data queue set */
883 if ( ( rc
= ipoib_create_qset ( ipoib
, &ipoib
->data
,
885 IPOIB_DATA_NUM_SEND_WQES
,
886 IPOIB_DATA_NUM_RECV_WQES
,
887 ipoib
->data_qkey
) ) != 0 ) {
888 DBGC ( ipoib
, "IPoIB %p could not allocate data QP: %s\n",
889 ipoib
, strerror ( rc
) );
890 goto err_create_data_qset
;
893 /* Construct MAC address */
894 mac
= ( ( struct ipoib_mac
* ) netdev
->ll_addr
);
895 mac
->qpn
= htonl ( ipoib
->data
.qp
->qpn
);
896 memcpy ( &mac
->gid
, &ibdev
->port_gid
, sizeof ( mac
->gid
) );
898 /* Register network device */
899 if ( ( rc
= register_netdev ( netdev
) ) != 0 )
900 goto err_register_netdev
;
905 ipoib_destroy_qset ( ipoib
, &ipoib
->data
);
906 err_join_broadcast_group
:
907 err_create_data_qset
:
908 ipoib_destroy_qset ( ipoib
, &ipoib
->meta
);
909 err_create_meta_qset
:
910 netdev_nullify ( netdev
);
911 netdev_put ( netdev
);
916 * Remove IPoIB device
918 * @v ibdev Infiniband device
920 void ipoib_remove ( struct ib_device
*ibdev
) {
921 struct net_device
*netdev
= ib_get_ownerdata ( ibdev
);
922 struct ipoib_device
*ipoib
= netdev
->priv
;
924 unregister_netdev ( netdev
);
925 ipoib_destroy_qset ( ipoib
, &ipoib
->data
);
926 ipoib_destroy_qset ( ipoib
, &ipoib
->meta
);
927 netdev_nullify ( netdev
);
928 netdev_put ( netdev
);