2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Intel Corporation. All rights reserved.
4 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
5 * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
7 * This software is available to you under a choice of one of two
8 * licenses. You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
17 * - Redistributions of source code must retain the above
18 * copyright notice, this list of conditions and the following
21 * - Redistributions in binary form must reproduce the above
22 * copyright notice, this list of conditions and the following
23 * disclaimer in the documentation and/or other materials
24 * provided with the distribution.
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
36 #include <linux/module.h>
37 #include <linux/errno.h>
38 #include <linux/slab.h>
39 #include <linux/workqueue.h>
40 #include <linux/netdevice.h>
41 #include <net/addrconf.h>
43 #include <rdma/ib_cache.h>
45 #include "core_priv.h"
47 struct ib_pkey_cache
{
52 struct ib_update_work
{
53 struct work_struct work
;
54 struct ib_event event
;
55 bool enforce_security
;
61 enum gid_attr_find_mask
{
62 GID_ATTR_FIND_MASK_GID
= 1UL << 0,
63 GID_ATTR_FIND_MASK_NETDEV
= 1UL << 1,
64 GID_ATTR_FIND_MASK_DEFAULT
= 1UL << 2,
65 GID_ATTR_FIND_MASK_GID_TYPE
= 1UL << 3,
68 enum gid_table_entry_state
{
69 GID_TABLE_ENTRY_INVALID
= 1,
70 GID_TABLE_ENTRY_VALID
= 2,
72 * Indicates that entry is pending to be removed, there may
73 * be active users of this GID entry.
74 * When last user of the GID entry releases reference to it,
75 * GID entry is detached from the table.
77 GID_TABLE_ENTRY_PENDING_DEL
= 3,
80 struct roce_gid_ndev_storage
{
81 struct rcu_head rcu_head
;
82 struct net_device
*ndev
;
85 struct ib_gid_table_entry
{
87 struct work_struct del_work
;
88 struct ib_gid_attr attr
;
90 /* Store the ndev pointer to release reference later on in
91 * call_rcu context because by that time gid_table_entry
92 * and attr might be already freed. So keep a copy of it.
93 * ndev_storage is freed by rcu callback.
95 struct roce_gid_ndev_storage
*ndev_storage
;
96 enum gid_table_entry_state state
;
101 /* In RoCE, adding a GID to the table requires:
102 * (a) Find if this GID is already exists.
103 * (b) Find a free space.
104 * (c) Write the new GID
106 * Delete requires different set of operations:
111 /* Any writer to data_vec must hold this lock and the write side of
112 * rwlock. Readers must hold only rwlock. All writers must be in a
116 /* rwlock protects data_vec[ix]->state and entry pointer.
119 struct ib_gid_table_entry
**data_vec
;
120 /* bit field, each bit indicates the index of default GID */
121 u32 default_gid_indices
;
124 static void dispatch_gid_change_event(struct ib_device
*ib_dev
, u8 port
)
126 struct ib_event event
;
128 event
.device
= ib_dev
;
129 event
.element
.port_num
= port
;
130 event
.event
= IB_EVENT_GID_CHANGE
;
132 ib_dispatch_event_clients(&event
);
135 static const char * const gid_type_str
[] = {
136 [IB_GID_TYPE_IB
] = "IB/RoCE v1",
137 [IB_GID_TYPE_ROCE_UDP_ENCAP
] = "RoCE v2",
140 const char *ib_cache_gid_type_str(enum ib_gid_type gid_type
)
142 if (gid_type
< ARRAY_SIZE(gid_type_str
) && gid_type_str
[gid_type
])
143 return gid_type_str
[gid_type
];
145 return "Invalid GID type";
147 EXPORT_SYMBOL(ib_cache_gid_type_str
);
149 /** rdma_is_zero_gid - Check if given GID is zero or not.
151 * Returns true if given GID is zero, returns false otherwise.
153 bool rdma_is_zero_gid(const union ib_gid
*gid
)
155 return !memcmp(gid
, &zgid
, sizeof(*gid
));
157 EXPORT_SYMBOL(rdma_is_zero_gid
);
159 /** is_gid_index_default - Check if a given index belongs to
160 * reserved default GIDs or not.
161 * @table: GID table pointer
162 * @index: Index to check in GID table
163 * Returns true if index is one of the reserved default GID index otherwise
166 static bool is_gid_index_default(const struct ib_gid_table
*table
,
169 return index
< 32 && (BIT(index
) & table
->default_gid_indices
);
172 int ib_cache_gid_parse_type_str(const char *buf
)
182 if (buf
[len
- 1] == '\n')
185 for (i
= 0; i
< ARRAY_SIZE(gid_type_str
); ++i
)
186 if (gid_type_str
[i
] && !strncmp(buf
, gid_type_str
[i
], len
) &&
187 len
== strlen(gid_type_str
[i
])) {
194 EXPORT_SYMBOL(ib_cache_gid_parse_type_str
);
196 static struct ib_gid_table
*rdma_gid_table(struct ib_device
*device
, u8 port
)
198 return device
->port_data
[port
].cache
.gid
;
201 static bool is_gid_entry_free(const struct ib_gid_table_entry
*entry
)
206 static bool is_gid_entry_valid(const struct ib_gid_table_entry
*entry
)
208 return entry
&& entry
->state
== GID_TABLE_ENTRY_VALID
;
211 static void schedule_free_gid(struct kref
*kref
)
213 struct ib_gid_table_entry
*entry
=
214 container_of(kref
, struct ib_gid_table_entry
, kref
);
216 queue_work(ib_wq
, &entry
->del_work
);
219 static void put_gid_ndev(struct rcu_head
*head
)
221 struct roce_gid_ndev_storage
*storage
=
222 container_of(head
, struct roce_gid_ndev_storage
, rcu_head
);
224 WARN_ON(!storage
->ndev
);
225 /* At this point its safe to release netdev reference,
226 * as all callers working on gid_attr->ndev are done
229 dev_put(storage
->ndev
);
233 static void free_gid_entry_locked(struct ib_gid_table_entry
*entry
)
235 struct ib_device
*device
= entry
->attr
.device
;
236 u8 port_num
= entry
->attr
.port_num
;
237 struct ib_gid_table
*table
= rdma_gid_table(device
, port_num
);
239 dev_dbg(&device
->dev
, "%s port=%d index=%d gid %pI6\n", __func__
,
240 port_num
, entry
->attr
.index
, entry
->attr
.gid
.raw
);
242 write_lock_irq(&table
->rwlock
);
245 * The only way to avoid overwriting NULL in table is
246 * by comparing if it is same entry in table or not!
247 * If new entry in table is added by the time we free here,
248 * don't overwrite the table entry.
250 if (entry
== table
->data_vec
[entry
->attr
.index
])
251 table
->data_vec
[entry
->attr
.index
] = NULL
;
252 /* Now this index is ready to be allocated */
253 write_unlock_irq(&table
->rwlock
);
255 if (entry
->ndev_storage
)
256 call_rcu(&entry
->ndev_storage
->rcu_head
, put_gid_ndev
);
260 static void free_gid_entry(struct kref
*kref
)
262 struct ib_gid_table_entry
*entry
=
263 container_of(kref
, struct ib_gid_table_entry
, kref
);
265 free_gid_entry_locked(entry
);
269 * free_gid_work - Release reference to the GID entry
270 * @work: Work structure to refer to GID entry which needs to be
273 * free_gid_work() frees the entry from the HCA's hardware table
274 * if provider supports it. It releases reference to netdevice.
276 static void free_gid_work(struct work_struct
*work
)
278 struct ib_gid_table_entry
*entry
=
279 container_of(work
, struct ib_gid_table_entry
, del_work
);
280 struct ib_device
*device
= entry
->attr
.device
;
281 u8 port_num
= entry
->attr
.port_num
;
282 struct ib_gid_table
*table
= rdma_gid_table(device
, port_num
);
284 mutex_lock(&table
->lock
);
285 free_gid_entry_locked(entry
);
286 mutex_unlock(&table
->lock
);
289 static struct ib_gid_table_entry
*
290 alloc_gid_entry(const struct ib_gid_attr
*attr
)
292 struct ib_gid_table_entry
*entry
;
293 struct net_device
*ndev
;
295 entry
= kzalloc(sizeof(*entry
), GFP_KERNEL
);
299 ndev
= rcu_dereference_protected(attr
->ndev
, 1);
301 entry
->ndev_storage
= kzalloc(sizeof(*entry
->ndev_storage
),
303 if (!entry
->ndev_storage
) {
308 entry
->ndev_storage
->ndev
= ndev
;
310 kref_init(&entry
->kref
);
311 memcpy(&entry
->attr
, attr
, sizeof(*attr
));
312 INIT_WORK(&entry
->del_work
, free_gid_work
);
313 entry
->state
= GID_TABLE_ENTRY_INVALID
;
317 static void store_gid_entry(struct ib_gid_table
*table
,
318 struct ib_gid_table_entry
*entry
)
320 entry
->state
= GID_TABLE_ENTRY_VALID
;
322 dev_dbg(&entry
->attr
.device
->dev
, "%s port=%d index=%d gid %pI6\n",
323 __func__
, entry
->attr
.port_num
, entry
->attr
.index
,
324 entry
->attr
.gid
.raw
);
326 lockdep_assert_held(&table
->lock
);
327 write_lock_irq(&table
->rwlock
);
328 table
->data_vec
[entry
->attr
.index
] = entry
;
329 write_unlock_irq(&table
->rwlock
);
332 static void get_gid_entry(struct ib_gid_table_entry
*entry
)
334 kref_get(&entry
->kref
);
337 static void put_gid_entry(struct ib_gid_table_entry
*entry
)
339 kref_put(&entry
->kref
, schedule_free_gid
);
342 static void put_gid_entry_locked(struct ib_gid_table_entry
*entry
)
344 kref_put(&entry
->kref
, free_gid_entry
);
347 static int add_roce_gid(struct ib_gid_table_entry
*entry
)
349 const struct ib_gid_attr
*attr
= &entry
->attr
;
353 dev_err(&attr
->device
->dev
, "%s NULL netdev port=%d index=%d\n",
354 __func__
, attr
->port_num
, attr
->index
);
357 if (rdma_cap_roce_gid_table(attr
->device
, attr
->port_num
)) {
358 ret
= attr
->device
->ops
.add_gid(attr
, &entry
->context
);
360 dev_err(&attr
->device
->dev
,
361 "%s GID add failed port=%d index=%d\n",
362 __func__
, attr
->port_num
, attr
->index
);
370 * del_gid - Delete GID table entry
372 * @ib_dev: IB device whose GID entry to be deleted
373 * @port: Port number of the IB device
374 * @table: GID table of the IB device for a port
375 * @ix: GID entry index to delete
378 static void del_gid(struct ib_device
*ib_dev
, u8 port
,
379 struct ib_gid_table
*table
, int ix
)
381 struct roce_gid_ndev_storage
*ndev_storage
;
382 struct ib_gid_table_entry
*entry
;
384 lockdep_assert_held(&table
->lock
);
386 dev_dbg(&ib_dev
->dev
, "%s port=%d index=%d gid %pI6\n", __func__
, port
,
387 ix
, table
->data_vec
[ix
]->attr
.gid
.raw
);
389 write_lock_irq(&table
->rwlock
);
390 entry
= table
->data_vec
[ix
];
391 entry
->state
= GID_TABLE_ENTRY_PENDING_DEL
;
393 * For non RoCE protocol, GID entry slot is ready to use.
395 if (!rdma_protocol_roce(ib_dev
, port
))
396 table
->data_vec
[ix
] = NULL
;
397 write_unlock_irq(&table
->rwlock
);
399 ndev_storage
= entry
->ndev_storage
;
401 entry
->ndev_storage
= NULL
;
402 rcu_assign_pointer(entry
->attr
.ndev
, NULL
);
403 call_rcu(&ndev_storage
->rcu_head
, put_gid_ndev
);
406 if (rdma_cap_roce_gid_table(ib_dev
, port
))
407 ib_dev
->ops
.del_gid(&entry
->attr
, &entry
->context
);
409 put_gid_entry_locked(entry
);
413 * add_modify_gid - Add or modify GID table entry
415 * @table: GID table in which GID to be added or modified
416 * @attr: Attributes of the GID
418 * Returns 0 on success or appropriate error code. It accepts zero
419 * GID addition for non RoCE ports for HCA's who report them as valid
420 * GID. However such zero GIDs are not added to the cache.
422 static int add_modify_gid(struct ib_gid_table
*table
,
423 const struct ib_gid_attr
*attr
)
425 struct ib_gid_table_entry
*entry
;
429 * Invalidate any old entry in the table to make it safe to write to
432 if (is_gid_entry_valid(table
->data_vec
[attr
->index
]))
433 del_gid(attr
->device
, attr
->port_num
, table
, attr
->index
);
436 * Some HCA's report multiple GID entries with only one valid GID, and
437 * leave other unused entries as the zero GID. Convert zero GIDs to
438 * empty table entries instead of storing them.
440 if (rdma_is_zero_gid(&attr
->gid
))
443 entry
= alloc_gid_entry(attr
);
447 if (rdma_protocol_roce(attr
->device
, attr
->port_num
)) {
448 ret
= add_roce_gid(entry
);
453 store_gid_entry(table
, entry
);
457 put_gid_entry(entry
);
461 /* rwlock should be read locked, or lock should be held */
462 static int find_gid(struct ib_gid_table
*table
, const union ib_gid
*gid
,
463 const struct ib_gid_attr
*val
, bool default_gid
,
464 unsigned long mask
, int *pempty
)
468 int empty
= pempty
? -1 : 0;
470 while (i
< table
->sz
&& (found
< 0 || empty
< 0)) {
471 struct ib_gid_table_entry
*data
= table
->data_vec
[i
];
472 struct ib_gid_attr
*attr
;
477 /* find_gid() is used during GID addition where it is expected
478 * to return a free entry slot which is not duplicate.
479 * Free entry slot is requested and returned if pempty is set,
480 * so lookup free slot only if requested.
482 if (pempty
&& empty
< 0) {
483 if (is_gid_entry_free(data
) &&
485 is_gid_index_default(table
, curr_index
)) {
487 * Found an invalid (free) entry; allocate it.
488 * If default GID is requested, then our
489 * found slot must be one of the DEFAULT
490 * reserved slots or we fail.
491 * This ensures that only DEFAULT reserved
492 * slots are used for default property GIDs.
499 * Additionally find_gid() is used to find valid entry during
500 * lookup operation; so ignore the entries which are marked as
501 * pending for removal and the entries which are marked as
504 if (!is_gid_entry_valid(data
))
511 if (mask
& GID_ATTR_FIND_MASK_GID_TYPE
&&
512 attr
->gid_type
!= val
->gid_type
)
515 if (mask
& GID_ATTR_FIND_MASK_GID
&&
516 memcmp(gid
, &data
->attr
.gid
, sizeof(*gid
)))
519 if (mask
& GID_ATTR_FIND_MASK_NETDEV
&&
520 attr
->ndev
!= val
->ndev
)
523 if (mask
& GID_ATTR_FIND_MASK_DEFAULT
&&
524 is_gid_index_default(table
, curr_index
) != default_gid
)
536 static void make_default_gid(struct net_device
*dev
, union ib_gid
*gid
)
538 gid
->global
.subnet_prefix
= cpu_to_be64(0xfe80000000000000LL
);
539 addrconf_ifid_eui48(&gid
->raw
[8], dev
);
542 static int __ib_cache_gid_add(struct ib_device
*ib_dev
, u8 port
,
543 union ib_gid
*gid
, struct ib_gid_attr
*attr
,
544 unsigned long mask
, bool default_gid
)
546 struct ib_gid_table
*table
;
551 /* Do not allow adding zero GID in support of
552 * IB spec version 1.3 section 4.1.1 point (6) and
553 * section 12.7.10 and section 12.7.20
555 if (rdma_is_zero_gid(gid
))
558 table
= rdma_gid_table(ib_dev
, port
);
560 mutex_lock(&table
->lock
);
562 ix
= find_gid(table
, gid
, attr
, default_gid
, mask
, &empty
);
570 attr
->device
= ib_dev
;
572 attr
->port_num
= port
;
574 ret
= add_modify_gid(table
, attr
);
576 dispatch_gid_change_event(ib_dev
, port
);
579 mutex_unlock(&table
->lock
);
581 pr_warn("%s: unable to add gid %pI6 error=%d\n",
582 __func__
, gid
->raw
, ret
);
586 int ib_cache_gid_add(struct ib_device
*ib_dev
, u8 port
,
587 union ib_gid
*gid
, struct ib_gid_attr
*attr
)
589 unsigned long mask
= GID_ATTR_FIND_MASK_GID
|
590 GID_ATTR_FIND_MASK_GID_TYPE
|
591 GID_ATTR_FIND_MASK_NETDEV
;
593 return __ib_cache_gid_add(ib_dev
, port
, gid
, attr
, mask
, false);
597 _ib_cache_gid_del(struct ib_device
*ib_dev
, u8 port
,
598 union ib_gid
*gid
, struct ib_gid_attr
*attr
,
599 unsigned long mask
, bool default_gid
)
601 struct ib_gid_table
*table
;
605 table
= rdma_gid_table(ib_dev
, port
);
607 mutex_lock(&table
->lock
);
609 ix
= find_gid(table
, gid
, attr
, default_gid
, mask
, NULL
);
615 del_gid(ib_dev
, port
, table
, ix
);
616 dispatch_gid_change_event(ib_dev
, port
);
619 mutex_unlock(&table
->lock
);
621 pr_debug("%s: can't delete gid %pI6 error=%d\n",
622 __func__
, gid
->raw
, ret
);
626 int ib_cache_gid_del(struct ib_device
*ib_dev
, u8 port
,
627 union ib_gid
*gid
, struct ib_gid_attr
*attr
)
629 unsigned long mask
= GID_ATTR_FIND_MASK_GID
|
630 GID_ATTR_FIND_MASK_GID_TYPE
|
631 GID_ATTR_FIND_MASK_DEFAULT
|
632 GID_ATTR_FIND_MASK_NETDEV
;
634 return _ib_cache_gid_del(ib_dev
, port
, gid
, attr
, mask
, false);
637 int ib_cache_gid_del_all_netdev_gids(struct ib_device
*ib_dev
, u8 port
,
638 struct net_device
*ndev
)
640 struct ib_gid_table
*table
;
642 bool deleted
= false;
644 table
= rdma_gid_table(ib_dev
, port
);
646 mutex_lock(&table
->lock
);
648 for (ix
= 0; ix
< table
->sz
; ix
++) {
649 if (is_gid_entry_valid(table
->data_vec
[ix
]) &&
650 table
->data_vec
[ix
]->attr
.ndev
== ndev
) {
651 del_gid(ib_dev
, port
, table
, ix
);
656 mutex_unlock(&table
->lock
);
659 dispatch_gid_change_event(ib_dev
, port
);
665 * rdma_find_gid_by_port - Returns the GID entry attributes when it finds
666 * a valid GID entry for given search parameters. It searches for the specified
667 * GID value in the local software cache.
668 * @device: The device to query.
669 * @gid: The GID value to search for.
670 * @gid_type: The GID type to search for.
671 * @port_num: The port number of the device where the GID value should be
673 * @ndev: In RoCE, the net device of the device. NULL means ignore.
675 * Returns sgid attributes if the GID is found with valid reference or
676 * returns ERR_PTR for the error.
677 * The caller must invoke rdma_put_gid_attr() to release the reference.
679 const struct ib_gid_attr
*
680 rdma_find_gid_by_port(struct ib_device
*ib_dev
,
681 const union ib_gid
*gid
,
682 enum ib_gid_type gid_type
,
683 u8 port
, struct net_device
*ndev
)
686 struct ib_gid_table
*table
;
687 unsigned long mask
= GID_ATTR_FIND_MASK_GID
|
688 GID_ATTR_FIND_MASK_GID_TYPE
;
689 struct ib_gid_attr val
= {.ndev
= ndev
, .gid_type
= gid_type
};
690 const struct ib_gid_attr
*attr
;
693 if (!rdma_is_port_valid(ib_dev
, port
))
694 return ERR_PTR(-ENOENT
);
696 table
= rdma_gid_table(ib_dev
, port
);
699 mask
|= GID_ATTR_FIND_MASK_NETDEV
;
701 read_lock_irqsave(&table
->rwlock
, flags
);
702 local_index
= find_gid(table
, gid
, &val
, false, mask
, NULL
);
703 if (local_index
>= 0) {
704 get_gid_entry(table
->data_vec
[local_index
]);
705 attr
= &table
->data_vec
[local_index
]->attr
;
706 read_unlock_irqrestore(&table
->rwlock
, flags
);
710 read_unlock_irqrestore(&table
->rwlock
, flags
);
711 return ERR_PTR(-ENOENT
);
713 EXPORT_SYMBOL(rdma_find_gid_by_port
);
716 * rdma_find_gid_by_filter - Returns the GID table attribute where a
717 * specified GID value occurs
718 * @device: The device to query.
719 * @gid: The GID value to search for.
720 * @port: The port number of the device where the GID value could be
722 * @filter: The filter function is executed on any matching GID in the table.
723 * If the filter function returns true, the corresponding index is returned,
724 * otherwise, we continue searching the GID table. It's guaranteed that
725 * while filter is executed, ndev field is valid and the structure won't
726 * change. filter is executed in an atomic context. filter must not be NULL.
728 * rdma_find_gid_by_filter() searches for the specified GID value
729 * of which the filter function returns true in the port's GID table.
732 const struct ib_gid_attr
*rdma_find_gid_by_filter(
733 struct ib_device
*ib_dev
, const union ib_gid
*gid
, u8 port
,
734 bool (*filter
)(const union ib_gid
*gid
, const struct ib_gid_attr
*,
738 const struct ib_gid_attr
*res
= ERR_PTR(-ENOENT
);
739 struct ib_gid_table
*table
;
743 if (!rdma_is_port_valid(ib_dev
, port
))
744 return ERR_PTR(-EINVAL
);
746 table
= rdma_gid_table(ib_dev
, port
);
748 read_lock_irqsave(&table
->rwlock
, flags
);
749 for (i
= 0; i
< table
->sz
; i
++) {
750 struct ib_gid_table_entry
*entry
= table
->data_vec
[i
];
752 if (!is_gid_entry_valid(entry
))
755 if (memcmp(gid
, &entry
->attr
.gid
, sizeof(*gid
)))
758 if (filter(gid
, &entry
->attr
, context
)) {
759 get_gid_entry(entry
);
764 read_unlock_irqrestore(&table
->rwlock
, flags
);
768 static struct ib_gid_table
*alloc_gid_table(int sz
)
770 struct ib_gid_table
*table
= kzalloc(sizeof(*table
), GFP_KERNEL
);
775 table
->data_vec
= kcalloc(sz
, sizeof(*table
->data_vec
), GFP_KERNEL
);
776 if (!table
->data_vec
)
779 mutex_init(&table
->lock
);
782 rwlock_init(&table
->rwlock
);
790 static void release_gid_table(struct ib_device
*device
,
791 struct ib_gid_table
*table
)
799 for (i
= 0; i
< table
->sz
; i
++) {
800 if (is_gid_entry_free(table
->data_vec
[i
]))
802 if (kref_read(&table
->data_vec
[i
]->kref
) > 1) {
803 dev_err(&device
->dev
,
804 "GID entry ref leak for index %d ref=%d\n", i
,
805 kref_read(&table
->data_vec
[i
]->kref
));
812 mutex_destroy(&table
->lock
);
813 kfree(table
->data_vec
);
817 static void cleanup_gid_table_port(struct ib_device
*ib_dev
, u8 port
,
818 struct ib_gid_table
*table
)
825 mutex_lock(&table
->lock
);
826 for (i
= 0; i
< table
->sz
; ++i
) {
827 if (is_gid_entry_valid(table
->data_vec
[i
]))
828 del_gid(ib_dev
, port
, table
, i
);
830 mutex_unlock(&table
->lock
);
833 void ib_cache_gid_set_default_gid(struct ib_device
*ib_dev
, u8 port
,
834 struct net_device
*ndev
,
835 unsigned long gid_type_mask
,
836 enum ib_cache_gid_default_mode mode
)
838 union ib_gid gid
= { };
839 struct ib_gid_attr gid_attr
;
840 unsigned int gid_type
;
843 mask
= GID_ATTR_FIND_MASK_GID_TYPE
|
844 GID_ATTR_FIND_MASK_DEFAULT
|
845 GID_ATTR_FIND_MASK_NETDEV
;
846 memset(&gid_attr
, 0, sizeof(gid_attr
));
847 gid_attr
.ndev
= ndev
;
849 for (gid_type
= 0; gid_type
< IB_GID_TYPE_SIZE
; ++gid_type
) {
850 if (1UL << gid_type
& ~gid_type_mask
)
853 gid_attr
.gid_type
= gid_type
;
855 if (mode
== IB_CACHE_GID_DEFAULT_MODE_SET
) {
856 make_default_gid(ndev
, &gid
);
857 __ib_cache_gid_add(ib_dev
, port
, &gid
,
858 &gid_attr
, mask
, true);
859 } else if (mode
== IB_CACHE_GID_DEFAULT_MODE_DELETE
) {
860 _ib_cache_gid_del(ib_dev
, port
, &gid
,
861 &gid_attr
, mask
, true);
866 static void gid_table_reserve_default(struct ib_device
*ib_dev
, u8 port
,
867 struct ib_gid_table
*table
)
870 unsigned long roce_gid_type_mask
;
871 unsigned int num_default_gids
;
873 roce_gid_type_mask
= roce_gid_type_mask_support(ib_dev
, port
);
874 num_default_gids
= hweight_long(roce_gid_type_mask
);
875 /* Reserve starting indices for default GIDs */
876 for (i
= 0; i
< num_default_gids
&& i
< table
->sz
; i
++)
877 table
->default_gid_indices
|= BIT(i
);
881 static void gid_table_release_one(struct ib_device
*ib_dev
)
885 rdma_for_each_port (ib_dev
, p
) {
886 release_gid_table(ib_dev
, ib_dev
->port_data
[p
].cache
.gid
);
887 ib_dev
->port_data
[p
].cache
.gid
= NULL
;
891 static int _gid_table_setup_one(struct ib_device
*ib_dev
)
893 struct ib_gid_table
*table
;
894 unsigned int rdma_port
;
896 rdma_for_each_port (ib_dev
, rdma_port
) {
897 table
= alloc_gid_table(
898 ib_dev
->port_data
[rdma_port
].immutable
.gid_tbl_len
);
900 goto rollback_table_setup
;
902 gid_table_reserve_default(ib_dev
, rdma_port
, table
);
903 ib_dev
->port_data
[rdma_port
].cache
.gid
= table
;
907 rollback_table_setup
:
908 gid_table_release_one(ib_dev
);
912 static void gid_table_cleanup_one(struct ib_device
*ib_dev
)
916 rdma_for_each_port (ib_dev
, p
)
917 cleanup_gid_table_port(ib_dev
, p
,
918 ib_dev
->port_data
[p
].cache
.gid
);
921 static int gid_table_setup_one(struct ib_device
*ib_dev
)
925 err
= _gid_table_setup_one(ib_dev
);
930 rdma_roce_rescan_device(ib_dev
);
936 * rdma_query_gid - Read the GID content from the GID software cache
937 * @device: Device to query the GID
938 * @port_num: Port number of the device
939 * @index: Index of the GID table entry to read
940 * @gid: Pointer to GID where to store the entry's GID
942 * rdma_query_gid() only reads the GID entry content for requested device,
943 * port and index. It reads for IB, RoCE and iWarp link layers. It doesn't
944 * hold any reference to the GID table entry in the HCA or software cache.
946 * Returns 0 on success or appropriate error code.
949 int rdma_query_gid(struct ib_device
*device
, u8 port_num
,
950 int index
, union ib_gid
*gid
)
952 struct ib_gid_table
*table
;
956 if (!rdma_is_port_valid(device
, port_num
))
959 table
= rdma_gid_table(device
, port_num
);
960 read_lock_irqsave(&table
->rwlock
, flags
);
962 if (index
< 0 || index
>= table
->sz
||
963 !is_gid_entry_valid(table
->data_vec
[index
]))
966 memcpy(gid
, &table
->data_vec
[index
]->attr
.gid
, sizeof(*gid
));
970 read_unlock_irqrestore(&table
->rwlock
, flags
);
973 EXPORT_SYMBOL(rdma_query_gid
);
976 * rdma_find_gid - Returns SGID attributes if the matching GID is found.
977 * @device: The device to query.
978 * @gid: The GID value to search for.
979 * @gid_type: The GID type to search for.
980 * @ndev: In RoCE, the net device of the device. NULL means ignore.
982 * rdma_find_gid() searches for the specified GID value in the software cache.
984 * Returns GID attributes if a valid GID is found or returns ERR_PTR for the
985 * error. The caller must invoke rdma_put_gid_attr() to release the reference.
988 const struct ib_gid_attr
*rdma_find_gid(struct ib_device
*device
,
989 const union ib_gid
*gid
,
990 enum ib_gid_type gid_type
,
991 struct net_device
*ndev
)
993 unsigned long mask
= GID_ATTR_FIND_MASK_GID
|
994 GID_ATTR_FIND_MASK_GID_TYPE
;
995 struct ib_gid_attr gid_attr_val
= {.ndev
= ndev
, .gid_type
= gid_type
};
999 mask
|= GID_ATTR_FIND_MASK_NETDEV
;
1001 rdma_for_each_port(device
, p
) {
1002 struct ib_gid_table
*table
;
1003 unsigned long flags
;
1006 table
= device
->port_data
[p
].cache
.gid
;
1007 read_lock_irqsave(&table
->rwlock
, flags
);
1008 index
= find_gid(table
, gid
, &gid_attr_val
, false, mask
, NULL
);
1010 const struct ib_gid_attr
*attr
;
1012 get_gid_entry(table
->data_vec
[index
]);
1013 attr
= &table
->data_vec
[index
]->attr
;
1014 read_unlock_irqrestore(&table
->rwlock
, flags
);
1017 read_unlock_irqrestore(&table
->rwlock
, flags
);
1020 return ERR_PTR(-ENOENT
);
1022 EXPORT_SYMBOL(rdma_find_gid
);
1024 int ib_get_cached_pkey(struct ib_device
*device
,
1029 struct ib_pkey_cache
*cache
;
1030 unsigned long flags
;
1033 if (!rdma_is_port_valid(device
, port_num
))
1036 read_lock_irqsave(&device
->cache_lock
, flags
);
1038 cache
= device
->port_data
[port_num
].cache
.pkey
;
1040 if (index
< 0 || index
>= cache
->table_len
)
1043 *pkey
= cache
->table
[index
];
1045 read_unlock_irqrestore(&device
->cache_lock
, flags
);
1049 EXPORT_SYMBOL(ib_get_cached_pkey
);
1051 int ib_get_cached_subnet_prefix(struct ib_device
*device
,
1055 unsigned long flags
;
1057 if (!rdma_is_port_valid(device
, port_num
))
1060 read_lock_irqsave(&device
->cache_lock
, flags
);
1061 *sn_pfx
= device
->port_data
[port_num
].cache
.subnet_prefix
;
1062 read_unlock_irqrestore(&device
->cache_lock
, flags
);
1066 EXPORT_SYMBOL(ib_get_cached_subnet_prefix
);
1068 int ib_find_cached_pkey(struct ib_device
*device
,
1073 struct ib_pkey_cache
*cache
;
1074 unsigned long flags
;
1077 int partial_ix
= -1;
1079 if (!rdma_is_port_valid(device
, port_num
))
1082 read_lock_irqsave(&device
->cache_lock
, flags
);
1084 cache
= device
->port_data
[port_num
].cache
.pkey
;
1088 for (i
= 0; i
< cache
->table_len
; ++i
)
1089 if ((cache
->table
[i
] & 0x7fff) == (pkey
& 0x7fff)) {
1090 if (cache
->table
[i
] & 0x8000) {
1098 if (ret
&& partial_ix
>= 0) {
1099 *index
= partial_ix
;
1103 read_unlock_irqrestore(&device
->cache_lock
, flags
);
1107 EXPORT_SYMBOL(ib_find_cached_pkey
);
1109 int ib_find_exact_cached_pkey(struct ib_device
*device
,
1114 struct ib_pkey_cache
*cache
;
1115 unsigned long flags
;
1119 if (!rdma_is_port_valid(device
, port_num
))
1122 read_lock_irqsave(&device
->cache_lock
, flags
);
1124 cache
= device
->port_data
[port_num
].cache
.pkey
;
1128 for (i
= 0; i
< cache
->table_len
; ++i
)
1129 if (cache
->table
[i
] == pkey
) {
1135 read_unlock_irqrestore(&device
->cache_lock
, flags
);
1139 EXPORT_SYMBOL(ib_find_exact_cached_pkey
);
1141 int ib_get_cached_lmc(struct ib_device
*device
,
1145 unsigned long flags
;
1148 if (!rdma_is_port_valid(device
, port_num
))
1151 read_lock_irqsave(&device
->cache_lock
, flags
);
1152 *lmc
= device
->port_data
[port_num
].cache
.lmc
;
1153 read_unlock_irqrestore(&device
->cache_lock
, flags
);
1157 EXPORT_SYMBOL(ib_get_cached_lmc
);
1159 int ib_get_cached_port_state(struct ib_device
*device
,
1161 enum ib_port_state
*port_state
)
1163 unsigned long flags
;
1166 if (!rdma_is_port_valid(device
, port_num
))
1169 read_lock_irqsave(&device
->cache_lock
, flags
);
1170 *port_state
= device
->port_data
[port_num
].cache
.port_state
;
1171 read_unlock_irqrestore(&device
->cache_lock
, flags
);
1175 EXPORT_SYMBOL(ib_get_cached_port_state
);
1178 * rdma_get_gid_attr - Returns GID attributes for a port of a device
1179 * at a requested gid_index, if a valid GID entry exists.
1180 * @device: The device to query.
1181 * @port_num: The port number on the device where the GID value
1183 * @index: Index of the GID table entry whose attributes are to
1186 * rdma_get_gid_attr() acquires reference count of gid attributes from the
1187 * cached GID table. Caller must invoke rdma_put_gid_attr() to release
1188 * reference to gid attribute regardless of link layer.
1190 * Returns pointer to valid gid attribute or ERR_PTR for the appropriate error
1193 const struct ib_gid_attr
*
1194 rdma_get_gid_attr(struct ib_device
*device
, u8 port_num
, int index
)
1196 const struct ib_gid_attr
*attr
= ERR_PTR(-EINVAL
);
1197 struct ib_gid_table
*table
;
1198 unsigned long flags
;
1200 if (!rdma_is_port_valid(device
, port_num
))
1201 return ERR_PTR(-EINVAL
);
1203 table
= rdma_gid_table(device
, port_num
);
1204 if (index
< 0 || index
>= table
->sz
)
1205 return ERR_PTR(-EINVAL
);
1207 read_lock_irqsave(&table
->rwlock
, flags
);
1208 if (!is_gid_entry_valid(table
->data_vec
[index
]))
1211 get_gid_entry(table
->data_vec
[index
]);
1212 attr
= &table
->data_vec
[index
]->attr
;
1214 read_unlock_irqrestore(&table
->rwlock
, flags
);
1217 EXPORT_SYMBOL(rdma_get_gid_attr
);
1220 * rdma_put_gid_attr - Release reference to the GID attribute
1221 * @attr: Pointer to the GID attribute whose reference
1222 * needs to be released.
1224 * rdma_put_gid_attr() must be used to release reference whose
1225 * reference is acquired using rdma_get_gid_attr() or any APIs
1226 * which returns a pointer to the ib_gid_attr regardless of link layer
1230 void rdma_put_gid_attr(const struct ib_gid_attr
*attr
)
1232 struct ib_gid_table_entry
*entry
=
1233 container_of(attr
, struct ib_gid_table_entry
, attr
);
1235 put_gid_entry(entry
);
1237 EXPORT_SYMBOL(rdma_put_gid_attr
);
1240 * rdma_hold_gid_attr - Get reference to existing GID attribute
1242 * @attr: Pointer to the GID attribute whose reference
1243 * needs to be taken.
1245 * Increase the reference count to a GID attribute to keep it from being
1246 * freed. Callers are required to already be holding a reference to attribute.
1249 void rdma_hold_gid_attr(const struct ib_gid_attr
*attr
)
1251 struct ib_gid_table_entry
*entry
=
1252 container_of(attr
, struct ib_gid_table_entry
, attr
);
1254 get_gid_entry(entry
);
1256 EXPORT_SYMBOL(rdma_hold_gid_attr
);
1259 * rdma_read_gid_attr_ndev_rcu - Read GID attribute netdevice
1260 * which must be in UP state.
1262 * @attr:Pointer to the GID attribute
1264 * Returns pointer to netdevice if the netdevice was attached to GID and
1265 * netdevice is in UP state. Caller must hold RCU lock as this API
1266 * reads the netdev flags which can change while netdevice migrates to
1267 * different net namespace. Returns ERR_PTR with error code otherwise.
1270 struct net_device
*rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr
*attr
)
1272 struct ib_gid_table_entry
*entry
=
1273 container_of(attr
, struct ib_gid_table_entry
, attr
);
1274 struct ib_device
*device
= entry
->attr
.device
;
1275 struct net_device
*ndev
= ERR_PTR(-ENODEV
);
1276 u8 port_num
= entry
->attr
.port_num
;
1277 struct ib_gid_table
*table
;
1278 unsigned long flags
;
1281 table
= rdma_gid_table(device
, port_num
);
1283 read_lock_irqsave(&table
->rwlock
, flags
);
1284 valid
= is_gid_entry_valid(table
->data_vec
[attr
->index
]);
1286 ndev
= rcu_dereference(attr
->ndev
);
1288 (ndev
&& ((READ_ONCE(ndev
->flags
) & IFF_UP
) == 0)))
1289 ndev
= ERR_PTR(-ENODEV
);
1291 read_unlock_irqrestore(&table
->rwlock
, flags
);
1294 EXPORT_SYMBOL(rdma_read_gid_attr_ndev_rcu
);
1296 static int get_lower_dev_vlan(struct net_device
*lower_dev
, void *data
)
1298 u16
*vlan_id
= data
;
1300 if (is_vlan_dev(lower_dev
))
1301 *vlan_id
= vlan_dev_vlan_id(lower_dev
);
1303 /* We are interested only in first level vlan device, so
1304 * always return 1 to stop iterating over next level devices.
1310 * rdma_read_gid_l2_fields - Read the vlan ID and source MAC address
1313 * @attr: GID attribute pointer whose L2 fields to be read
1314 * @vlan_id: Pointer to vlan id to fill up if the GID entry has
1315 * vlan id. It is optional.
1316 * @smac: Pointer to smac to fill up for a GID entry. It is optional.
1318 * rdma_read_gid_l2_fields() returns 0 on success and returns vlan id
1319 * (if gid entry has vlan) and source MAC, or returns error.
1321 int rdma_read_gid_l2_fields(const struct ib_gid_attr
*attr
,
1322 u16
*vlan_id
, u8
*smac
)
1324 struct net_device
*ndev
;
1327 ndev
= rcu_dereference(attr
->ndev
);
1333 ether_addr_copy(smac
, ndev
->dev_addr
);
1336 if (is_vlan_dev(ndev
)) {
1337 *vlan_id
= vlan_dev_vlan_id(ndev
);
1339 /* If the netdev is upper device and if it's lower
1340 * device is vlan device, consider vlan id of the
1341 * the lower vlan device for this gid entry.
1343 netdev_walk_all_lower_dev_rcu(attr
->ndev
,
1344 get_lower_dev_vlan
, vlan_id
);
1350 EXPORT_SYMBOL(rdma_read_gid_l2_fields
);
1352 static int config_non_roce_gid_cache(struct ib_device
*device
,
1353 u8 port
, int gid_tbl_len
)
1355 struct ib_gid_attr gid_attr
= {};
1356 struct ib_gid_table
*table
;
1360 gid_attr
.device
= device
;
1361 gid_attr
.port_num
= port
;
1362 table
= rdma_gid_table(device
, port
);
1364 mutex_lock(&table
->lock
);
1365 for (i
= 0; i
< gid_tbl_len
; ++i
) {
1366 if (!device
->ops
.query_gid
)
1368 ret
= device
->ops
.query_gid(device
, port
, i
, &gid_attr
.gid
);
1370 dev_warn(&device
->dev
,
1371 "query_gid failed (%d) for index %d\n", ret
,
1376 add_modify_gid(table
, &gid_attr
);
1379 mutex_unlock(&table
->lock
);
1384 ib_cache_update(struct ib_device
*device
, u8 port
, bool enforce_security
)
1386 struct ib_port_attr
*tprops
= NULL
;
1387 struct ib_pkey_cache
*pkey_cache
= NULL
, *old_pkey_cache
;
1391 if (!rdma_is_port_valid(device
, port
))
1394 tprops
= kmalloc(sizeof *tprops
, GFP_KERNEL
);
1398 ret
= ib_query_port(device
, port
, tprops
);
1400 dev_warn(&device
->dev
, "ib_query_port failed (%d)\n", ret
);
1404 if (!rdma_protocol_roce(device
, port
)) {
1405 ret
= config_non_roce_gid_cache(device
, port
,
1406 tprops
->gid_tbl_len
);
1411 pkey_cache
= kmalloc(struct_size(pkey_cache
, table
,
1412 tprops
->pkey_tbl_len
),
1419 pkey_cache
->table_len
= tprops
->pkey_tbl_len
;
1421 for (i
= 0; i
< pkey_cache
->table_len
; ++i
) {
1422 ret
= ib_query_pkey(device
, port
, i
, pkey_cache
->table
+ i
);
1424 dev_warn(&device
->dev
,
1425 "ib_query_pkey failed (%d) for index %d\n",
1431 write_lock_irq(&device
->cache_lock
);
1433 old_pkey_cache
= device
->port_data
[port
].cache
.pkey
;
1435 device
->port_data
[port
].cache
.pkey
= pkey_cache
;
1436 device
->port_data
[port
].cache
.lmc
= tprops
->lmc
;
1437 device
->port_data
[port
].cache
.port_state
= tprops
->state
;
1439 device
->port_data
[port
].cache
.subnet_prefix
= tprops
->subnet_prefix
;
1440 write_unlock_irq(&device
->cache_lock
);
1442 if (enforce_security
)
1443 ib_security_cache_change(device
,
1445 tprops
->subnet_prefix
);
1447 kfree(old_pkey_cache
);
1457 static void ib_cache_event_task(struct work_struct
*_work
)
1459 struct ib_update_work
*work
=
1460 container_of(_work
, struct ib_update_work
, work
);
1463 /* Before distributing the cache update event, first sync
1466 ret
= ib_cache_update(work
->event
.device
, work
->event
.element
.port_num
,
1467 work
->enforce_security
);
1469 /* GID event is notified already for individual GID entries by
1470 * dispatch_gid_change_event(). Hence, notifiy for rest of the
1473 if (!ret
&& work
->event
.event
!= IB_EVENT_GID_CHANGE
)
1474 ib_dispatch_event_clients(&work
->event
);
1479 static void ib_generic_event_task(struct work_struct
*_work
)
1481 struct ib_update_work
*work
=
1482 container_of(_work
, struct ib_update_work
, work
);
1484 ib_dispatch_event_clients(&work
->event
);
1488 static bool is_cache_update_event(const struct ib_event
*event
)
1490 return (event
->event
== IB_EVENT_PORT_ERR
||
1491 event
->event
== IB_EVENT_PORT_ACTIVE
||
1492 event
->event
== IB_EVENT_LID_CHANGE
||
1493 event
->event
== IB_EVENT_PKEY_CHANGE
||
1494 event
->event
== IB_EVENT_CLIENT_REREGISTER
||
1495 event
->event
== IB_EVENT_GID_CHANGE
);
1499 * ib_dispatch_event - Dispatch an asynchronous event
1500 * @event:Event to dispatch
1502 * Low-level drivers must call ib_dispatch_event() to dispatch the
1503 * event to all registered event handlers when an asynchronous event
1506 void ib_dispatch_event(const struct ib_event
*event
)
1508 struct ib_update_work
*work
;
1510 work
= kzalloc(sizeof(*work
), GFP_ATOMIC
);
1514 if (is_cache_update_event(event
))
1515 INIT_WORK(&work
->work
, ib_cache_event_task
);
1517 INIT_WORK(&work
->work
, ib_generic_event_task
);
1519 work
->event
= *event
;
1520 if (event
->event
== IB_EVENT_PKEY_CHANGE
||
1521 event
->event
== IB_EVENT_GID_CHANGE
)
1522 work
->enforce_security
= true;
1524 queue_work(ib_wq
, &work
->work
);
1526 EXPORT_SYMBOL(ib_dispatch_event
);
1528 int ib_cache_setup_one(struct ib_device
*device
)
1533 rwlock_init(&device
->cache_lock
);
1535 err
= gid_table_setup_one(device
);
1539 rdma_for_each_port (device
, p
)
1540 ib_cache_update(device
, p
, true);
1545 void ib_cache_release_one(struct ib_device
*device
)
1550 * The release function frees all the cache elements.
1551 * This function should be called as part of freeing
1552 * all the device's resources when the cache could no
1553 * longer be accessed.
1555 rdma_for_each_port (device
, p
)
1556 kfree(device
->port_data
[p
].cache
.pkey
);
1558 gid_table_release_one(device
);
1561 void ib_cache_cleanup_one(struct ib_device
*device
)
1563 /* The cleanup function waits for all in-progress workqueue
1564 * elements and cleans up the GID cache. This function should be
1565 * called after the device was removed from the devices list and
1566 * all clients were removed, so the cache exists but is
1567 * non-functional and shouldn't be updated anymore.
1569 flush_workqueue(ib_wq
);
1570 gid_table_cleanup_one(device
);
1573 * Flush the wq second time for any pending GID delete work.
1575 flush_workqueue(ib_wq
);