Add errfile definition for new e1000.c
[gpxe.git] / src / drivers / net / ipoib.c
blob784c072096d8dfda106651858ff88ca3c529206e
1 /*
2 * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or any later version.
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 #include <stdint.h>
20 #include <stdio.h>
21 #include <unistd.h>
22 #include <string.h>
23 #include <byteswap.h>
24 #include <errno.h>
25 #include "timer.h"
26 #include <gpxe/if_arp.h>
27 #include <gpxe/iobuf.h>
28 #include <gpxe/netdevice.h>
29 #include <gpxe/infiniband.h>
30 #include <gpxe/ipoib.h>
32 /** @file
34 * IP over Infiniband
37 /** IPoIB MTU */
38 #define IPOIB_MTU 2048
40 /** Number of IPoIB data send work queue entries */
41 #define IPOIB_DATA_NUM_SEND_WQES 2
43 /** Number of IPoIB data receive work queue entries */
44 #define IPOIB_DATA_NUM_RECV_WQES 4
46 /** Number of IPoIB data completion entries */
47 #define IPOIB_DATA_NUM_CQES 8
49 /** Number of IPoIB metadata send work queue entries */
50 #define IPOIB_META_NUM_SEND_WQES 2
52 /** Number of IPoIB metadata receive work queue entries */
53 #define IPOIB_META_NUM_RECV_WQES 2
55 /** Number of IPoIB metadata completion entries */
56 #define IPOIB_META_NUM_CQES 8
58 /** An IPoIB queue set */
59 struct ipoib_queue_set {
60 /** Completion queue */
61 struct ib_completion_queue *cq;
62 /** Queue pair */
63 struct ib_queue_pair *qp;
64 /** Receive work queue fill level */
65 unsigned int recv_fill;
66 /** Receive work queue maximum fill level */
67 unsigned int recv_max_fill;
70 /** An IPoIB device */
71 struct ipoib_device {
72 /** Network device */
73 struct net_device *netdev;
74 /** Underlying Infiniband device */
75 struct ib_device *ibdev;
76 /** Data queue set */
77 struct ipoib_queue_set data;
78 /** Data queue set */
79 struct ipoib_queue_set meta;
80 /** Broadcast GID */
81 struct ib_gid broadcast_gid;
82 /** Broadcast LID */
83 unsigned int broadcast_lid;
84 /** Joined to broadcast group */
85 int broadcast_joined;
86 /** Data queue key */
87 unsigned long data_qkey;
90 /**
91 * IPoIB path cache entry
93 * This serves a similar role to the ARP cache for Ethernet. (ARP
94 * *is* used on IPoIB; we have two caches to maintain.)
96 struct ipoib_cached_path {
97 /** Destination GID */
98 struct ib_gid gid;
99 /** Destination LID */
100 unsigned int dlid;
101 /** Service level */
102 unsigned int sl;
103 /** Rate */
104 unsigned int rate;
107 /** Number of IPoIB path cache entries */
108 #define IPOIB_NUM_CACHED_PATHS 2
110 /** IPoIB path cache */
111 static struct ipoib_cached_path ipoib_path_cache[IPOIB_NUM_CACHED_PATHS];
113 /** Oldest IPoIB path cache entry index */
114 static unsigned int ipoib_path_cache_idx = 0;
116 /** TID half used to identify get path record replies */
117 #define IPOIB_TID_GET_PATH_REC 0x11111111UL
119 /** TID half used to identify multicast member record replies */
120 #define IPOIB_TID_MC_MEMBER_REC 0x22222222UL
122 /** IPoIB metadata TID */
123 static uint32_t ipoib_meta_tid = 0;
125 /** IPv4 broadcast GID */
126 static const struct ib_gid ipv4_broadcast_gid = {
127 { { 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00,
128 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff } }
131 /** Maximum time we will wait for the broadcast join to succeed */
132 #define IPOIB_JOIN_MAX_DELAY_MS 1000
134 /****************************************************************************
136 * IPoIB link layer
138 ****************************************************************************
141 /** Broadcast QPN used in IPoIB MAC addresses
143 * This is a guaranteed invalid real QPN
145 #define IPOIB_BROADCAST_QPN 0xffffffffUL
147 /** Broadcast IPoIB address */
148 static struct ipoib_mac ipoib_broadcast = {
149 .qpn = ntohl ( IPOIB_BROADCAST_QPN ),
153 * Transmit IPoIB packet
155 * @v iobuf I/O buffer
156 * @v netdev Network device
157 * @v net_protocol Network-layer protocol
158 * @v ll_dest Link-layer destination address
160 * Prepends the IPoIB link-layer header and transmits the packet.
162 static int ipoib_tx ( struct io_buffer *iobuf, struct net_device *netdev,
163 struct net_protocol *net_protocol,
164 const void *ll_dest ) {
165 struct ipoib_hdr *ipoib_hdr =
166 iob_push ( iobuf, sizeof ( *ipoib_hdr ) );
168 /* Build IPoIB header */
169 memcpy ( &ipoib_hdr->pseudo.peer, ll_dest,
170 sizeof ( ipoib_hdr->pseudo.peer ) );
171 ipoib_hdr->real.proto = net_protocol->net_proto;
172 ipoib_hdr->real.reserved = 0;
174 /* Hand off to network device */
175 return netdev_tx ( netdev, iobuf );
179 * Process received IPoIB packet
181 * @v iobuf I/O buffer
182 * @v netdev Network device
184 * Strips off the IPoIB link-layer header and passes up to the
185 * network-layer protocol.
187 static int ipoib_rx ( struct io_buffer *iobuf, struct net_device *netdev ) {
188 struct ipoib_hdr *ipoib_hdr = iobuf->data;
190 /* Sanity check */
191 if ( iob_len ( iobuf ) < sizeof ( *ipoib_hdr ) ) {
192 DBG ( "IPoIB packet too short for link-layer header\n" );
193 DBG_HD ( iobuf->data, iob_len ( iobuf ) );
194 free_iob ( iobuf );
195 return -EINVAL;
198 /* Strip off IPoIB header */
199 iob_pull ( iobuf, sizeof ( *ipoib_hdr ) );
201 /* Hand off to network-layer protocol */
202 return net_rx ( iobuf, netdev, ipoib_hdr->real.proto,
203 &ipoib_hdr->pseudo.peer );
207 * Transcribe IPoIB address
209 * @v ll_addr Link-layer address
210 * @ret string Link-layer address in human-readable format
212 const char * ipoib_ntoa ( const void *ll_addr ) {
213 static char buf[45];
214 const struct ipoib_mac *mac = ll_addr;
216 snprintf ( buf, sizeof ( buf ), "%08lx:%08lx:%08lx:%08lx:%08lx",
217 htonl ( mac->qpn ), htonl ( mac->gid.u.dwords[0] ),
218 htonl ( mac->gid.u.dwords[1] ),
219 htonl ( mac->gid.u.dwords[2] ),
220 htonl ( mac->gid.u.dwords[3] ) );
221 return buf;
224 /** IPoIB protocol */
225 struct ll_protocol ipoib_protocol __ll_protocol = {
226 .name = "IPoIB",
227 .ll_proto = htons ( ARPHRD_INFINIBAND ),
228 .ll_addr_len = IPOIB_ALEN,
229 .ll_header_len = IPOIB_HLEN,
230 .ll_broadcast = ( uint8_t * ) &ipoib_broadcast,
231 .tx = ipoib_tx,
232 .rx = ipoib_rx,
233 .ntoa = ipoib_ntoa,
236 /****************************************************************************
238 * IPoIB network device
240 ****************************************************************************
244 * Destroy queue set
246 * @v ipoib IPoIB device
247 * @v qset Queue set
249 static void ipoib_destroy_qset ( struct ipoib_device *ipoib,
250 struct ipoib_queue_set *qset ) {
251 struct ib_device *ibdev = ipoib->ibdev;
253 if ( qset->qp )
254 ib_destroy_qp ( ibdev, qset->qp );
255 if ( qset->cq )
256 ib_destroy_cq ( ibdev, qset->cq );
257 memset ( qset, 0, sizeof ( *qset ) );
261 * Create queue set
263 * @v ipoib IPoIB device
264 * @v qset Queue set
265 * @ret rc Return status code
267 static int ipoib_create_qset ( struct ipoib_device *ipoib,
268 struct ipoib_queue_set *qset,
269 unsigned int num_cqes,
270 unsigned int num_send_wqes,
271 unsigned int num_recv_wqes,
272 unsigned long qkey ) {
273 struct ib_device *ibdev = ipoib->ibdev;
274 int rc;
276 /* Store queue parameters */
277 qset->recv_max_fill = num_recv_wqes;
279 /* Allocate completion queue */
280 qset->cq = ib_create_cq ( ibdev, num_cqes );
281 if ( ! qset->cq ) {
282 DBGC ( ipoib, "IPoIB %p could not allocate completion queue\n",
283 ipoib );
284 rc = -ENOMEM;
285 goto err;
288 /* Allocate queue pair */
289 qset->qp = ib_create_qp ( ibdev, num_send_wqes, qset->cq,
290 num_recv_wqes, qset->cq, qkey );
291 if ( ! qset->qp ) {
292 DBGC ( ipoib, "IPoIB %p could not allocate queue pair\n",
293 ipoib );
294 rc = -ENOMEM;
295 goto err;
297 qset->qp->owner_priv = ipoib->netdev;
299 return 0;
301 err:
302 ipoib_destroy_qset ( ipoib, qset );
303 return rc;
307 * Find path cache entry by GID
309 * @v gid GID
310 * @ret entry Path cache entry, or NULL
312 static struct ipoib_cached_path *
313 ipoib_find_cached_path ( struct ib_gid *gid ) {
314 struct ipoib_cached_path *path;
315 unsigned int i;
317 for ( i = 0 ; i < IPOIB_NUM_CACHED_PATHS ; i++ ) {
318 path = &ipoib_path_cache[i];
319 if ( memcmp ( &path->gid, gid, sizeof ( *gid ) ) == 0 )
320 return path;
322 DBG ( "IPoIB %08lx:%08lx:%08lx:%08lx cache miss\n",
323 htonl ( gid->u.dwords[0] ), htonl ( gid->u.dwords[1] ),
324 htonl ( gid->u.dwords[2] ), htonl ( gid->u.dwords[3] ) );
325 return NULL;
329 * Transmit path record request
331 * @v ipoib IPoIB device
332 * @v gid Destination GID
333 * @ret rc Return status code
335 static int ipoib_get_path_record ( struct ipoib_device *ipoib,
336 struct ib_gid *gid ) {
337 struct ib_device *ibdev = ipoib->ibdev;
338 struct io_buffer *iobuf;
339 struct ib_mad_path_record *path_record;
340 struct ib_address_vector av;
341 int rc;
343 /* Allocate I/O buffer */
344 iobuf = alloc_iob ( sizeof ( *path_record ) );
345 if ( ! iobuf )
346 return -ENOMEM;
347 iob_put ( iobuf, sizeof ( *path_record ) );
348 path_record = iobuf->data;
349 memset ( path_record, 0, sizeof ( *path_record ) );
351 /* Construct path record request */
352 path_record->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
353 path_record->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
354 path_record->mad_hdr.class_version = 2;
355 path_record->mad_hdr.method = IB_MGMT_METHOD_GET;
356 path_record->mad_hdr.attr_id = htons ( IB_SA_ATTR_PATH_REC );
357 path_record->mad_hdr.tid[0] = IPOIB_TID_GET_PATH_REC;
358 path_record->mad_hdr.tid[1] = ipoib_meta_tid++;
359 path_record->sa_hdr.comp_mask[1] =
360 htonl ( IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID );
361 memcpy ( &path_record->dgid, gid, sizeof ( path_record->dgid ) );
362 memcpy ( &path_record->sgid, &ibdev->port_gid,
363 sizeof ( path_record->sgid ) );
365 /* Construct address vector */
366 memset ( &av, 0, sizeof ( av ) );
367 av.dlid = ibdev->sm_lid;
368 av.dest_qp = IB_SA_QPN;
369 av.qkey = IB_GLOBAL_QKEY;
371 /* Post send request */
372 if ( ( rc = ib_post_send ( ibdev, ipoib->meta.qp, &av,
373 iobuf ) ) != 0 ) {
374 DBGC ( ipoib, "IPoIB %p could not send get path record: %s\n",
375 ipoib, strerror ( rc ) );
376 free_iob ( iobuf );
377 return rc;
380 return 0;
384 * Transmit multicast group membership request
386 * @v ipoib IPoIB device
387 * @v gid Multicast GID
388 * @v join Join (rather than leave) group
389 * @ret rc Return status code
391 static int ipoib_mc_member_record ( struct ipoib_device *ipoib,
392 struct ib_gid *gid, int join ) {
393 struct ib_device *ibdev = ipoib->ibdev;
394 struct io_buffer *iobuf;
395 struct ib_mad_mc_member_record *mc_member_record;
396 struct ib_address_vector av;
397 int rc;
399 /* Allocate I/O buffer */
400 iobuf = alloc_iob ( sizeof ( *mc_member_record ) );
401 if ( ! iobuf )
402 return -ENOMEM;
403 iob_put ( iobuf, sizeof ( *mc_member_record ) );
404 mc_member_record = iobuf->data;
405 memset ( mc_member_record, 0, sizeof ( *mc_member_record ) );
407 /* Construct path record request */
408 mc_member_record->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
409 mc_member_record->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
410 mc_member_record->mad_hdr.class_version = 2;
411 mc_member_record->mad_hdr.method =
412 ( join ? IB_MGMT_METHOD_SET : IB_MGMT_METHOD_DELETE );
413 mc_member_record->mad_hdr.attr_id = htons ( IB_SA_ATTR_MC_MEMBER_REC );
414 mc_member_record->mad_hdr.tid[0] = IPOIB_TID_MC_MEMBER_REC;
415 mc_member_record->mad_hdr.tid[1] = ipoib_meta_tid++;
416 mc_member_record->sa_hdr.comp_mask[1] =
417 htonl ( IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
418 IB_SA_MCMEMBER_REC_JOIN_STATE );
419 mc_member_record->scope__join_state = 1;
420 memcpy ( &mc_member_record->mgid, gid,
421 sizeof ( mc_member_record->mgid ) );
422 memcpy ( &mc_member_record->port_gid, &ibdev->port_gid,
423 sizeof ( mc_member_record->port_gid ) );
425 /* Construct address vector */
426 memset ( &av, 0, sizeof ( av ) );
427 av.dlid = ibdev->sm_lid;
428 av.dest_qp = IB_SA_QPN;
429 av.qkey = IB_GLOBAL_QKEY;
431 /* Post send request */
432 if ( ( rc = ib_post_send ( ibdev, ipoib->meta.qp, &av,
433 iobuf ) ) != 0 ) {
434 DBGC ( ipoib, "IPoIB %p could not send get path record: %s\n",
435 ipoib, strerror ( rc ) );
436 free_iob ( iobuf );
437 return rc;
440 return 0;
444 * Transmit packet via IPoIB network device
446 * @v netdev Network device
447 * @v iobuf I/O buffer
448 * @ret rc Return status code
450 static int ipoib_transmit ( struct net_device *netdev,
451 struct io_buffer *iobuf ) {
452 struct ipoib_device *ipoib = netdev->priv;
453 struct ib_device *ibdev = ipoib->ibdev;
454 struct ipoib_pseudo_hdr *ipoib_pshdr = iobuf->data;
455 struct ib_address_vector av;
456 struct ib_gid *gid;
457 struct ipoib_cached_path *path;
458 int rc;
460 /* Sanity check */
461 if ( iob_len ( iobuf ) < sizeof ( *ipoib_pshdr ) ) {
462 DBGC ( ipoib, "IPoIB %p buffer too short\n", ipoib );
463 return -EINVAL;
465 iob_pull ( iobuf, ( sizeof ( *ipoib_pshdr ) ) );
467 /* Construct address vector */
468 memset ( &av, 0, sizeof ( av ) );
469 av.qkey = IB_GLOBAL_QKEY;
470 av.gid_present = 1;
471 if ( ipoib_pshdr->peer.qpn == htonl ( IPOIB_BROADCAST_QPN ) ) {
472 /* Broadcast address */
473 av.dest_qp = IB_BROADCAST_QPN;
474 av.dlid = ipoib->broadcast_lid;
475 gid = &ipoib->broadcast_gid;
476 } else {
477 /* Unicast - look in path cache */
478 path = ipoib_find_cached_path ( &ipoib_pshdr->peer.gid );
479 if ( ! path ) {
480 /* No path entry - get path record */
481 rc = ipoib_get_path_record ( ipoib,
482 &ipoib_pshdr->peer.gid );
483 netdev_tx_complete ( netdev, iobuf );
484 return rc;
486 av.dest_qp = ntohl ( ipoib_pshdr->peer.qpn );
487 av.dlid = path->dlid;
488 av.rate = path->rate;
489 av.sl = path->sl;
490 gid = &ipoib_pshdr->peer.gid;
492 memcpy ( &av.gid, gid, sizeof ( av.gid ) );
494 return ib_post_send ( ibdev, ipoib->data.qp, &av, iobuf );
498 * Handle IPoIB data send completion
500 * @v ibdev Infiniband device
501 * @v qp Queue pair
502 * @v completion Completion
503 * @v iobuf I/O buffer
505 static void ipoib_data_complete_send ( struct ib_device *ibdev __unused,
506 struct ib_queue_pair *qp,
507 struct ib_completion *completion,
508 struct io_buffer *iobuf ) {
509 struct net_device *netdev = qp->owner_priv;
511 netdev_tx_complete_err ( netdev, iobuf,
512 ( completion->syndrome ? -EIO : 0 ) );
516 * Handle IPoIB data receive completion
518 * @v ibdev Infiniband device
519 * @v qp Queue pair
520 * @v completion Completion
521 * @v iobuf I/O buffer
523 static void ipoib_data_complete_recv ( struct ib_device *ibdev __unused,
524 struct ib_queue_pair *qp,
525 struct ib_completion *completion,
526 struct io_buffer *iobuf ) {
527 struct net_device *netdev = qp->owner_priv;
528 struct ipoib_device *ipoib = netdev->priv;
529 struct ipoib_pseudo_hdr *ipoib_pshdr;
531 if ( completion->syndrome ) {
532 netdev_rx_err ( netdev, iobuf, -EIO );
533 goto done;
536 iob_put ( iobuf, completion->len );
537 if ( iob_len ( iobuf ) < sizeof ( struct ib_global_route_header ) ) {
538 DBGC ( ipoib, "IPoIB %p received data packet too short to "
539 "contain GRH\n", ipoib );
540 DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) );
541 netdev_rx_err ( netdev, iobuf, -EIO );
542 goto done;
544 iob_pull ( iobuf, sizeof ( struct ib_global_route_header ) );
546 if ( iob_len ( iobuf ) < sizeof ( struct ipoib_real_hdr ) ) {
547 DBGC ( ipoib, "IPoIB %p received data packet too short to "
548 "contain IPoIB header\n", ipoib );
549 DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) );
550 netdev_rx_err ( netdev, iobuf, -EIO );
551 goto done;
554 ipoib_pshdr = iob_push ( iobuf, sizeof ( *ipoib_pshdr ) );
555 /* FIXME: fill in a MAC address for the sake of AoE! */
557 netdev_rx ( netdev, iobuf );
559 done:
560 ipoib->data.recv_fill--;
564 * Handle IPoIB metadata send completion
566 * @v ibdev Infiniband device
567 * @v qp Queue pair
568 * @v completion Completion
569 * @v iobuf I/O buffer
571 static void ipoib_meta_complete_send ( struct ib_device *ibdev __unused,
572 struct ib_queue_pair *qp,
573 struct ib_completion *completion,
574 struct io_buffer *iobuf ) {
575 struct net_device *netdev = qp->owner_priv;
576 struct ipoib_device *ipoib = netdev->priv;
578 if ( completion->syndrome ) {
579 DBGC ( ipoib, "IPoIB %p metadata TX completion error %x\n",
580 ipoib, completion->syndrome );
582 free_iob ( iobuf );
586 * Handle received IPoIB path record
588 * @v ipoib IPoIB device
589 * @v path_record Path record
591 static void ipoib_recv_path_record ( struct ipoib_device *ipoib __unused,
592 struct ib_mad_path_record *path_record ) {
593 struct ipoib_cached_path *path;
595 /* Update path cache entry */
596 path = &ipoib_path_cache[ipoib_path_cache_idx];
597 memcpy ( &path->gid, &path_record->dgid, sizeof ( path->gid ) );
598 path->dlid = ntohs ( path_record->dlid );
599 path->sl = ( path_record->reserved__sl & 0x0f );
600 path->rate = ( path_record->rate_selector__rate & 0x3f );
602 DBG ( "IPoIB %08lx:%08lx:%08lx:%08lx dlid %x sl %x rate %x\n",
603 htonl ( path->gid.u.dwords[0] ), htonl ( path->gid.u.dwords[1] ),
604 htonl ( path->gid.u.dwords[2] ), htonl ( path->gid.u.dwords[3] ),
605 path->dlid, path->sl, path->rate );
607 /* Update path cache index */
608 ipoib_path_cache_idx++;
609 if ( ipoib_path_cache_idx == IPOIB_NUM_CACHED_PATHS )
610 ipoib_path_cache_idx = 0;
614 * Handle received IPoIB multicast membership record
616 * @v ipoib IPoIB device
617 * @v mc_member_record Multicast membership record
619 static void ipoib_recv_mc_member_record ( struct ipoib_device *ipoib,
620 struct ib_mad_mc_member_record *mc_member_record ) {
621 /* Record parameters */
622 ipoib->broadcast_joined =
623 ( mc_member_record->scope__join_state & 0x0f );
624 ipoib->data_qkey = ntohl ( mc_member_record->qkey );
625 ipoib->broadcast_lid = ntohs ( mc_member_record->mlid );
626 DBGC ( ipoib, "IPoIB %p %s broadcast group: qkey %lx mlid %x\n",
627 ipoib, ( ipoib->broadcast_joined ? "joined" : "left" ),
628 ipoib->data_qkey, ipoib->broadcast_lid );
632 * Handle IPoIB metadata receive completion
634 * @v ibdev Infiniband device
635 * @v qp Queue pair
636 * @v completion Completion
637 * @v iobuf I/O buffer
639 static void ipoib_meta_complete_recv ( struct ib_device *ibdev __unused,
640 struct ib_queue_pair *qp,
641 struct ib_completion *completion,
642 struct io_buffer *iobuf ) {
643 struct net_device *netdev = qp->owner_priv;
644 struct ipoib_device *ipoib = netdev->priv;
645 union ib_mad *mad;
647 if ( completion->syndrome ) {
648 DBGC ( ipoib, "IPoIB %p metadata RX completion error %x\n",
649 ipoib, completion->syndrome );
650 goto done;
653 iob_put ( iobuf, completion->len );
654 if ( iob_len ( iobuf ) < sizeof ( struct ib_global_route_header ) ) {
655 DBGC ( ipoib, "IPoIB %p received metadata packet too short "
656 "to contain GRH\n", ipoib );
657 DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) );
658 goto done;
660 iob_pull ( iobuf, sizeof ( struct ib_global_route_header ) );
661 if ( iob_len ( iobuf ) < sizeof ( *mad ) ) {
662 DBGC ( ipoib, "IPoIB %p received metadata packet too short "
663 "to contain reply\n", ipoib );
664 DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) );
665 goto done;
667 mad = iobuf->data;
669 if ( mad->mad_hdr.status != 0 ) {
670 DBGC ( ipoib, "IPoIB %p metadata RX err status %04x\n",
671 ipoib, ntohs ( mad->mad_hdr.status ) );
672 goto done;
675 switch ( mad->mad_hdr.tid[0] ) {
676 case IPOIB_TID_GET_PATH_REC:
677 ipoib_recv_path_record ( ipoib, &mad->path_record );
678 break;
679 case IPOIB_TID_MC_MEMBER_REC:
680 ipoib_recv_mc_member_record ( ipoib, &mad->mc_member_record );
681 break;
682 default:
683 DBGC ( ipoib, "IPoIB %p unwanted response:\n",
684 ipoib );
685 DBGC_HD ( ipoib, mad, sizeof ( *mad ) );
686 break;
689 done:
690 ipoib->meta.recv_fill--;
691 free_iob ( iobuf );
695 * Refill IPoIB receive ring
697 * @v ipoib IPoIB device
699 static void ipoib_refill_recv ( struct ipoib_device *ipoib,
700 struct ipoib_queue_set *qset ) {
701 struct ib_device *ibdev = ipoib->ibdev;
702 struct io_buffer *iobuf;
703 int rc;
705 while ( qset->recv_fill < qset->recv_max_fill ) {
706 iobuf = alloc_iob ( IPOIB_MTU );
707 if ( ! iobuf )
708 break;
709 if ( ( rc = ib_post_recv ( ibdev, qset->qp, iobuf ) ) != 0 ) {
710 free_iob ( iobuf );
711 break;
713 qset->recv_fill++;
718 * Poll IPoIB network device
720 * @v netdev Network device
722 static void ipoib_poll ( struct net_device *netdev ) {
723 struct ipoib_device *ipoib = netdev->priv;
724 struct ib_device *ibdev = ipoib->ibdev;
726 ib_poll_cq ( ibdev, ipoib->meta.cq, ipoib_meta_complete_send,
727 ipoib_meta_complete_recv );
728 ib_poll_cq ( ibdev, ipoib->data.cq, ipoib_data_complete_send,
729 ipoib_data_complete_recv );
730 ipoib_refill_recv ( ipoib, &ipoib->meta );
731 ipoib_refill_recv ( ipoib, &ipoib->data );
735 * Enable/disable interrupts on IPoIB network device
737 * @v netdev Network device
738 * @v enable Interrupts should be enabled
740 static void ipoib_irq ( struct net_device *netdev __unused,
741 int enable __unused ) {
742 /* No implementation */
746 * Open IPoIB network device
748 * @v netdev Network device
749 * @ret rc Return status code
751 static int ipoib_open ( struct net_device *netdev ) {
752 struct ipoib_device *ipoib = netdev->priv;
753 struct ib_device *ibdev = ipoib->ibdev;
754 int rc;
756 /* Attach to broadcast multicast GID */
757 if ( ( rc = ib_mcast_attach ( ibdev, ipoib->data.qp,
758 &ipoib->broadcast_gid ) ) != 0 ) {
759 DBG ( "Could not attach to broadcast GID: %s\n",
760 strerror ( rc ) );
761 return rc;
764 /* Fill receive rings */
765 ipoib_refill_recv ( ipoib, &ipoib->meta );
766 ipoib_refill_recv ( ipoib, &ipoib->data );
768 return 0;
772 * Close IPoIB network device
774 * @v netdev Network device
776 static void ipoib_close ( struct net_device *netdev ) {
777 struct ipoib_device *ipoib = netdev->priv;
778 struct ib_device *ibdev = ipoib->ibdev;
780 /* Detach from broadcast multicast GID */
781 ib_mcast_detach ( ibdev, ipoib->data.qp, &ipoib->broadcast_gid );
783 /* FIXME: should probably flush the receive ring */
786 /** IPoIB network device operations */
787 static struct net_device_operations ipoib_operations = {
788 .open = ipoib_open,
789 .close = ipoib_close,
790 .transmit = ipoib_transmit,
791 .poll = ipoib_poll,
792 .irq = ipoib_irq,
796 * Join IPoIB broadcast group
798 * @v ipoib IPoIB device
799 * @ret rc Return status code
801 static int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) {
802 struct ib_device *ibdev = ipoib->ibdev;
803 unsigned int delay_ms;
804 int rc;
806 /* Make sure we have some receive descriptors */
807 ipoib_refill_recv ( ipoib, &ipoib->meta );
809 /* Send join request */
810 if ( ( rc = ipoib_mc_member_record ( ipoib, &ipoib->broadcast_gid,
811 1 ) ) != 0 ) {
812 DBGC ( ipoib, "IPoIB %p could not send broadcast join: %s\n",
813 ipoib, strerror ( rc ) );
814 return rc;
817 /* Wait for join to complete. Ideally we wouldn't delay for
818 * this long, but we need the queue key before we can set up
819 * the data queue pair, which we need before we can know the
820 * MAC address.
822 for ( delay_ms = IPOIB_JOIN_MAX_DELAY_MS ; delay_ms ; delay_ms-- ) {
823 mdelay ( 1 );
824 ib_poll_cq ( ibdev, ipoib->meta.cq, ipoib_meta_complete_send,
825 ipoib_meta_complete_recv );
826 ipoib_refill_recv ( ipoib, &ipoib->meta );
827 if ( ipoib->broadcast_joined )
828 return 0;
830 DBGC ( ipoib, "IPoIB %p timed out waiting for broadcast join\n",
831 ipoib );
833 return -ETIMEDOUT;
837 * Probe IPoIB device
839 * @v ibdev Infiniband device
840 * @ret rc Return status code
842 int ipoib_probe ( struct ib_device *ibdev ) {
843 struct net_device *netdev;
844 struct ipoib_device *ipoib;
845 struct ipoib_mac *mac;
846 int rc;
848 /* Allocate network device */
849 netdev = alloc_ipoibdev ( sizeof ( *ipoib ) );
850 if ( ! netdev )
851 return -ENOMEM;
852 netdev_init ( netdev, &ipoib_operations );
853 ipoib = netdev->priv;
854 ib_set_ownerdata ( ibdev, netdev );
855 netdev->dev = ibdev->dev;
856 memset ( ipoib, 0, sizeof ( *ipoib ) );
857 ipoib->netdev = netdev;
858 ipoib->ibdev = ibdev;
860 /* Calculate broadcast GID */
861 memcpy ( &ipoib->broadcast_gid, &ipv4_broadcast_gid,
862 sizeof ( ipoib->broadcast_gid ) );
863 ipoib->broadcast_gid.u.words[2] = htons ( ibdev->pkey );
865 /* Allocate metadata queue set */
866 if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->meta,
867 IPOIB_META_NUM_CQES,
868 IPOIB_META_NUM_SEND_WQES,
869 IPOIB_META_NUM_RECV_WQES,
870 IB_GLOBAL_QKEY ) ) != 0 ) {
871 DBGC ( ipoib, "IPoIB %p could not allocate metadata QP: %s\n",
872 ipoib, strerror ( rc ) );
873 goto err_create_meta_qset;
876 /* Join broadcast group */
877 if ( ( rc = ipoib_join_broadcast_group ( ipoib ) ) != 0 ) {
878 DBGC ( ipoib, "IPoIB %p could not join broadcast group: %s\n",
879 ipoib, strerror ( rc ) );
880 goto err_join_broadcast_group;
883 /* Allocate data queue set */
884 if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->data,
885 IPOIB_DATA_NUM_CQES,
886 IPOIB_DATA_NUM_SEND_WQES,
887 IPOIB_DATA_NUM_RECV_WQES,
888 ipoib->data_qkey ) ) != 0 ) {
889 DBGC ( ipoib, "IPoIB %p could not allocate data QP: %s\n",
890 ipoib, strerror ( rc ) );
891 goto err_create_data_qset;
894 /* Construct MAC address */
895 mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
896 mac->qpn = htonl ( ipoib->data.qp->qpn );
897 memcpy ( &mac->gid, &ibdev->port_gid, sizeof ( mac->gid ) );
899 /* Register network device */
900 if ( ( rc = register_netdev ( netdev ) ) != 0 )
901 goto err_register_netdev;
903 return 0;
905 err_register_netdev:
906 ipoib_destroy_qset ( ipoib, &ipoib->data );
907 err_join_broadcast_group:
908 err_create_data_qset:
909 ipoib_destroy_qset ( ipoib, &ipoib->meta );
910 err_create_meta_qset:
911 netdev_nullify ( netdev );
912 netdev_put ( netdev );
913 return rc;
917 * Remove IPoIB device
919 * @v ibdev Infiniband device
921 void ipoib_remove ( struct ib_device *ibdev ) {
922 struct net_device *netdev = ib_get_ownerdata ( ibdev );
923 struct ipoib_device *ipoib = netdev->priv;
925 unregister_netdev ( netdev );
926 ipoib_destroy_qset ( ipoib, &ipoib->data );
927 ipoib_destroy_qset ( ipoib, &ipoib->meta );
928 netdev_nullify ( netdev );
929 netdev_put ( netdev );