2 * Copyright (c) 2005 Voltaire Inc. All rights reserved.
3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
5 * Copyright (c) 2005 Intel Corporation. All rights reserved.
7 * This software is available to you under a choice of one of two
8 * licenses. You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
17 * - Redistributions of source code must retain the above
18 * copyright notice, this list of conditions and the following
21 * - Redistributions in binary form must reproduce the above
22 * copyright notice, this list of conditions and the following
23 * disclaimer in the documentation and/or other materials
24 * provided with the distribution.
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
36 #include <linux/mutex.h>
37 #include <linux/inetdevice.h>
38 #include <linux/slab.h>
39 #include <linux/workqueue.h>
40 #include <linux/module.h>
42 #include <net/neighbour.h>
43 #include <net/route.h>
44 #include <net/netevent.h>
45 #include <net/addrconf.h>
46 #include <net/ip6_route.h>
47 #include <rdma/ib_addr.h>
50 MODULE_AUTHOR("Sean Hefty");
51 MODULE_DESCRIPTION("IB Address Translation");
52 MODULE_LICENSE("Dual BSD/GPL");
55 struct list_head list
;
56 struct sockaddr_storage src_addr
;
57 struct sockaddr_storage dst_addr
;
58 struct rdma_dev_addr
*addr
;
59 struct rdma_addr_client
*client
;
61 void (*callback
)(int status
, struct sockaddr
*src_addr
,
62 struct rdma_dev_addr
*addr
, void *context
);
63 unsigned long timeout
;
67 static void process_req(struct work_struct
*work
);
69 static DEFINE_MUTEX(lock
);
70 static LIST_HEAD(req_list
);
71 static DECLARE_DELAYED_WORK(work
, process_req
);
72 static struct workqueue_struct
*addr_wq
;
74 int rdma_addr_size(struct sockaddr
*addr
)
76 switch (addr
->sa_family
) {
78 return sizeof(struct sockaddr_in
);
80 return sizeof(struct sockaddr_in6
);
82 return sizeof(struct sockaddr_ib
);
87 EXPORT_SYMBOL(rdma_addr_size
);
89 static struct rdma_addr_client self
;
91 void rdma_addr_register_client(struct rdma_addr_client
*client
)
93 atomic_set(&client
->refcount
, 1);
94 init_completion(&client
->comp
);
96 EXPORT_SYMBOL(rdma_addr_register_client
);
98 static inline void put_client(struct rdma_addr_client
*client
)
100 if (atomic_dec_and_test(&client
->refcount
))
101 complete(&client
->comp
);
104 void rdma_addr_unregister_client(struct rdma_addr_client
*client
)
107 wait_for_completion(&client
->comp
);
109 EXPORT_SYMBOL(rdma_addr_unregister_client
);
111 int rdma_copy_addr(struct rdma_dev_addr
*dev_addr
, struct net_device
*dev
,
112 const unsigned char *dst_dev_addr
)
114 dev_addr
->dev_type
= dev
->type
;
115 memcpy(dev_addr
->src_dev_addr
, dev
->dev_addr
, MAX_ADDR_LEN
);
116 memcpy(dev_addr
->broadcast
, dev
->broadcast
, MAX_ADDR_LEN
);
118 memcpy(dev_addr
->dst_dev_addr
, dst_dev_addr
, MAX_ADDR_LEN
);
119 dev_addr
->bound_dev_if
= dev
->ifindex
;
122 EXPORT_SYMBOL(rdma_copy_addr
);
124 int rdma_translate_ip(const struct sockaddr
*addr
,
125 struct rdma_dev_addr
*dev_addr
,
128 struct net_device
*dev
;
129 int ret
= -EADDRNOTAVAIL
;
131 if (dev_addr
->bound_dev_if
) {
132 dev
= dev_get_by_index(dev_addr
->net
, dev_addr
->bound_dev_if
);
135 ret
= rdma_copy_addr(dev_addr
, dev
, NULL
);
140 switch (addr
->sa_family
) {
142 dev
= ip_dev_find(dev_addr
->net
,
143 ((const struct sockaddr_in
*)addr
)->sin_addr
.s_addr
);
148 ret
= rdma_copy_addr(dev_addr
, dev
, NULL
);
150 *vlan_id
= rdma_vlan_dev_vlan_id(dev
);
153 #if IS_ENABLED(CONFIG_IPV6)
156 for_each_netdev_rcu(dev_addr
->net
, dev
) {
157 if (ipv6_chk_addr(dev_addr
->net
,
158 &((const struct sockaddr_in6
*)addr
)->sin6_addr
,
160 ret
= rdma_copy_addr(dev_addr
, dev
, NULL
);
162 *vlan_id
= rdma_vlan_dev_vlan_id(dev
);
172 EXPORT_SYMBOL(rdma_translate_ip
);
174 static void set_timeout(unsigned long time
)
178 delay
= time
- jiffies
;
182 mod_delayed_work(addr_wq
, &work
, delay
);
185 static void queue_req(struct addr_req
*req
)
187 struct addr_req
*temp_req
;
190 list_for_each_entry_reverse(temp_req
, &req_list
, list
) {
191 if (time_after_eq(req
->timeout
, temp_req
->timeout
))
195 list_add(&req
->list
, &temp_req
->list
);
197 if (req_list
.next
== &req
->list
)
198 set_timeout(req
->timeout
);
202 static int dst_fetch_ha(struct dst_entry
*dst
, struct rdma_dev_addr
*dev_addr
,
208 n
= dst_neigh_lookup(dst
, daddr
);
211 if (!n
|| !(n
->nud_state
& NUD_VALID
)) {
213 neigh_event_send(n
, NULL
);
216 ret
= rdma_copy_addr(dev_addr
, dst
->dev
, n
->ha
);
226 static int addr4_resolve(struct sockaddr_in
*src_in
,
227 const struct sockaddr_in
*dst_in
,
228 struct rdma_dev_addr
*addr
,
231 __be32 src_ip
= src_in
->sin_addr
.s_addr
;
232 __be32 dst_ip
= dst_in
->sin_addr
.s_addr
;
237 memset(&fl4
, 0, sizeof(fl4
));
240 fl4
.flowi4_oif
= addr
->bound_dev_if
;
241 rt
= ip_route_output_key(addr
->net
, &fl4
);
246 src_in
->sin_family
= AF_INET
;
247 src_in
->sin_addr
.s_addr
= fl4
.saddr
;
249 /* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't
250 * routable) and we could set the network type accordingly.
252 if (rt
->rt_uses_gateway
)
253 addr
->network
= RDMA_NETWORK_IPV4
;
255 addr
->hoplimit
= ip4_dst_hoplimit(&rt
->dst
);
263 #if IS_ENABLED(CONFIG_IPV6)
264 static int addr6_resolve(struct sockaddr_in6
*src_in
,
265 const struct sockaddr_in6
*dst_in
,
266 struct rdma_dev_addr
*addr
,
267 struct dst_entry
**pdst
)
270 struct dst_entry
*dst
;
274 memset(&fl6
, 0, sizeof fl6
);
275 fl6
.daddr
= dst_in
->sin6_addr
;
276 fl6
.saddr
= src_in
->sin6_addr
;
277 fl6
.flowi6_oif
= addr
->bound_dev_if
;
279 dst
= ip6_route_output(addr
->net
, NULL
, &fl6
);
280 if ((ret
= dst
->error
))
283 rt
= (struct rt6_info
*)dst
;
284 if (ipv6_addr_any(&fl6
.saddr
)) {
285 ret
= ipv6_dev_get_saddr(addr
->net
, ip6_dst_idev(dst
)->dev
,
286 &fl6
.daddr
, 0, &fl6
.saddr
);
290 src_in
->sin6_family
= AF_INET6
;
291 src_in
->sin6_addr
= fl6
.saddr
;
294 /* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't
295 * routable) and we could set the network type accordingly.
297 if (rt
->rt6i_flags
& RTF_GATEWAY
)
298 addr
->network
= RDMA_NETWORK_IPV6
;
300 addr
->hoplimit
= ip6_dst_hoplimit(dst
);
309 static int addr6_resolve(struct sockaddr_in6
*src_in
,
310 const struct sockaddr_in6
*dst_in
,
311 struct rdma_dev_addr
*addr
,
312 struct dst_entry
**pdst
)
314 return -EADDRNOTAVAIL
;
318 static int addr_resolve_neigh(struct dst_entry
*dst
,
319 const struct sockaddr
*dst_in
,
320 struct rdma_dev_addr
*addr
)
322 if (dst
->dev
->flags
& IFF_LOOPBACK
) {
325 ret
= rdma_translate_ip(dst_in
, addr
, NULL
);
327 memcpy(addr
->dst_dev_addr
, addr
->src_dev_addr
,
333 /* If the device doesn't do ARP internally */
334 if (!(dst
->dev
->flags
& IFF_NOARP
)) {
335 const struct sockaddr_in
*dst_in4
=
336 (const struct sockaddr_in
*)dst_in
;
337 const struct sockaddr_in6
*dst_in6
=
338 (const struct sockaddr_in6
*)dst_in
;
340 return dst_fetch_ha(dst
, addr
,
341 dst_in
->sa_family
== AF_INET
?
342 (const void *)&dst_in4
->sin_addr
.s_addr
:
343 (const void *)&dst_in6
->sin6_addr
);
346 return rdma_copy_addr(addr
, dst
->dev
, NULL
);
349 static int addr_resolve(struct sockaddr
*src_in
,
350 const struct sockaddr
*dst_in
,
351 struct rdma_dev_addr
*addr
,
354 struct net_device
*ndev
;
355 struct dst_entry
*dst
;
358 if (src_in
->sa_family
== AF_INET
) {
359 struct rtable
*rt
= NULL
;
360 const struct sockaddr_in
*dst_in4
=
361 (const struct sockaddr_in
*)dst_in
;
363 ret
= addr4_resolve((struct sockaddr_in
*)src_in
,
369 ret
= addr_resolve_neigh(&rt
->dst
, dst_in
, addr
);
376 const struct sockaddr_in6
*dst_in6
=
377 (const struct sockaddr_in6
*)dst_in
;
379 ret
= addr6_resolve((struct sockaddr_in6
*)src_in
,
386 ret
= addr_resolve_neigh(dst
, dst_in
, addr
);
394 addr
->bound_dev_if
= ndev
->ifindex
;
395 addr
->net
= dev_net(ndev
);
401 static void process_req(struct work_struct
*work
)
403 struct addr_req
*req
, *temp_req
;
404 struct sockaddr
*src_in
, *dst_in
;
405 struct list_head done_list
;
407 INIT_LIST_HEAD(&done_list
);
410 list_for_each_entry_safe(req
, temp_req
, &req_list
, list
) {
411 if (req
->status
== -ENODATA
) {
412 src_in
= (struct sockaddr
*) &req
->src_addr
;
413 dst_in
= (struct sockaddr
*) &req
->dst_addr
;
414 req
->status
= addr_resolve(src_in
, dst_in
, req
->addr
,
416 if (req
->status
&& time_after_eq(jiffies
, req
->timeout
))
417 req
->status
= -ETIMEDOUT
;
418 else if (req
->status
== -ENODATA
)
421 list_move_tail(&req
->list
, &done_list
);
424 if (!list_empty(&req_list
)) {
425 req
= list_entry(req_list
.next
, struct addr_req
, list
);
426 set_timeout(req
->timeout
);
430 list_for_each_entry_safe(req
, temp_req
, &done_list
, list
) {
431 list_del(&req
->list
);
432 req
->callback(req
->status
, (struct sockaddr
*) &req
->src_addr
,
433 req
->addr
, req
->context
);
434 put_client(req
->client
);
439 int rdma_resolve_ip(struct rdma_addr_client
*client
,
440 struct sockaddr
*src_addr
, struct sockaddr
*dst_addr
,
441 struct rdma_dev_addr
*addr
, int timeout_ms
,
442 void (*callback
)(int status
, struct sockaddr
*src_addr
,
443 struct rdma_dev_addr
*addr
, void *context
),
446 struct sockaddr
*src_in
, *dst_in
;
447 struct addr_req
*req
;
450 req
= kzalloc(sizeof *req
, GFP_KERNEL
);
454 src_in
= (struct sockaddr
*) &req
->src_addr
;
455 dst_in
= (struct sockaddr
*) &req
->dst_addr
;
458 if (src_addr
->sa_family
!= dst_addr
->sa_family
) {
463 memcpy(src_in
, src_addr
, rdma_addr_size(src_addr
));
465 src_in
->sa_family
= dst_addr
->sa_family
;
468 memcpy(dst_in
, dst_addr
, rdma_addr_size(dst_addr
));
470 req
->callback
= callback
;
471 req
->context
= context
;
472 req
->client
= client
;
473 atomic_inc(&client
->refcount
);
475 req
->status
= addr_resolve(src_in
, dst_in
, addr
, true);
476 switch (req
->status
) {
478 req
->timeout
= jiffies
;
482 req
->timeout
= msecs_to_jiffies(timeout_ms
) + jiffies
;
487 atomic_dec(&client
->refcount
);
495 EXPORT_SYMBOL(rdma_resolve_ip
);
497 int rdma_resolve_ip_route(struct sockaddr
*src_addr
,
498 const struct sockaddr
*dst_addr
,
499 struct rdma_dev_addr
*addr
)
501 struct sockaddr_storage ssrc_addr
= {};
502 struct sockaddr
*src_in
= (struct sockaddr
*)&ssrc_addr
;
505 if (src_addr
->sa_family
!= dst_addr
->sa_family
)
508 memcpy(src_in
, src_addr
, rdma_addr_size(src_addr
));
510 src_in
->sa_family
= dst_addr
->sa_family
;
513 return addr_resolve(src_in
, dst_addr
, addr
, false);
515 EXPORT_SYMBOL(rdma_resolve_ip_route
);
517 void rdma_addr_cancel(struct rdma_dev_addr
*addr
)
519 struct addr_req
*req
, *temp_req
;
522 list_for_each_entry_safe(req
, temp_req
, &req_list
, list
) {
523 if (req
->addr
== addr
) {
524 req
->status
= -ECANCELED
;
525 req
->timeout
= jiffies
;
526 list_move(&req
->list
, &req_list
);
527 set_timeout(req
->timeout
);
533 EXPORT_SYMBOL(rdma_addr_cancel
);
535 struct resolve_cb_context
{
536 struct rdma_dev_addr
*addr
;
537 struct completion comp
;
540 static void resolve_cb(int status
, struct sockaddr
*src_addr
,
541 struct rdma_dev_addr
*addr
, void *context
)
543 memcpy(((struct resolve_cb_context
*)context
)->addr
, addr
, sizeof(struct
545 complete(&((struct resolve_cb_context
*)context
)->comp
);
548 int rdma_addr_find_l2_eth_by_grh(const union ib_gid
*sgid
,
549 const union ib_gid
*dgid
,
550 u8
*dmac
, u16
*vlan_id
, int *if_index
,
554 struct rdma_dev_addr dev_addr
;
555 struct resolve_cb_context ctx
;
556 struct net_device
*dev
;
559 struct sockaddr _sockaddr
;
560 struct sockaddr_in _sockaddr_in
;
561 struct sockaddr_in6 _sockaddr_in6
;
562 } sgid_addr
, dgid_addr
;
565 rdma_gid2ip(&sgid_addr
._sockaddr
, sgid
);
566 rdma_gid2ip(&dgid_addr
._sockaddr
, dgid
);
568 memset(&dev_addr
, 0, sizeof(dev_addr
));
570 dev_addr
.bound_dev_if
= *if_index
;
571 dev_addr
.net
= &init_net
;
573 ctx
.addr
= &dev_addr
;
574 init_completion(&ctx
.comp
);
575 ret
= rdma_resolve_ip(&self
, &sgid_addr
._sockaddr
, &dgid_addr
._sockaddr
,
576 &dev_addr
, 1000, resolve_cb
, &ctx
);
580 wait_for_completion(&ctx
.comp
);
582 memcpy(dmac
, dev_addr
.dst_dev_addr
, ETH_ALEN
);
583 dev
= dev_get_by_index(&init_net
, dev_addr
.bound_dev_if
);
587 *if_index
= dev_addr
.bound_dev_if
;
589 *vlan_id
= rdma_vlan_dev_vlan_id(dev
);
591 *hoplimit
= dev_addr
.hoplimit
;
595 EXPORT_SYMBOL(rdma_addr_find_l2_eth_by_grh
);
597 int rdma_addr_find_smac_by_sgid(union ib_gid
*sgid
, u8
*smac
, u16
*vlan_id
)
600 struct rdma_dev_addr dev_addr
;
602 struct sockaddr _sockaddr
;
603 struct sockaddr_in _sockaddr_in
;
604 struct sockaddr_in6 _sockaddr_in6
;
607 rdma_gid2ip(&gid_addr
._sockaddr
, sgid
);
609 memset(&dev_addr
, 0, sizeof(dev_addr
));
610 dev_addr
.net
= &init_net
;
611 ret
= rdma_translate_ip(&gid_addr
._sockaddr
, &dev_addr
, vlan_id
);
615 memcpy(smac
, dev_addr
.src_dev_addr
, ETH_ALEN
);
618 EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid
);
620 static int netevent_callback(struct notifier_block
*self
, unsigned long event
,
623 if (event
== NETEVENT_NEIGH_UPDATE
) {
624 struct neighbour
*neigh
= ctx
;
626 if (neigh
->nud_state
& NUD_VALID
) {
627 set_timeout(jiffies
);
633 static struct notifier_block nb
= {
634 .notifier_call
= netevent_callback
637 static int __init
addr_init(void)
639 addr_wq
= create_singlethread_workqueue("ib_addr");
643 register_netevent_notifier(&nb
);
644 rdma_addr_register_client(&self
);
648 static void __exit
addr_cleanup(void)
650 rdma_addr_unregister_client(&self
);
651 unregister_netevent_notifier(&nb
);
652 destroy_workqueue(addr_wq
);
655 module_init(addr_init
);
656 module_exit(addr_cleanup
);