2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Intel Corporation. All rights reserved.
4 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
5 * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
7 * This software is available to you under a choice of one of two
8 * licenses. You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
17 * - Redistributions of source code must retain the above
18 * copyright notice, this list of conditions and the following
21 * - Redistributions in binary form must reproduce the above
22 * copyright notice, this list of conditions and the following
23 * disclaimer in the documentation and/or other materials
24 * provided with the distribution.
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
36 #include <linux/module.h>
37 #include <linux/errno.h>
38 #include <linux/slab.h>
39 #include <linux/workqueue.h>
40 #include <linux/netdevice.h>
41 #include <net/addrconf.h>
43 #include <rdma/ib_cache.h>
45 #include "core_priv.h"
47 struct ib_pkey_cache
{
52 struct ib_update_work
{
53 struct work_struct work
;
54 struct ib_event event
;
55 bool enforce_security
;
61 enum gid_attr_find_mask
{
62 GID_ATTR_FIND_MASK_GID
= 1UL << 0,
63 GID_ATTR_FIND_MASK_NETDEV
= 1UL << 1,
64 GID_ATTR_FIND_MASK_DEFAULT
= 1UL << 2,
65 GID_ATTR_FIND_MASK_GID_TYPE
= 1UL << 3,
68 enum gid_table_entry_state
{
69 GID_TABLE_ENTRY_INVALID
= 1,
70 GID_TABLE_ENTRY_VALID
= 2,
72 * Indicates that entry is pending to be removed, there may
73 * be active users of this GID entry.
74 * When last user of the GID entry releases reference to it,
75 * GID entry is detached from the table.
77 GID_TABLE_ENTRY_PENDING_DEL
= 3,
80 struct roce_gid_ndev_storage
{
81 struct rcu_head rcu_head
;
82 struct net_device
*ndev
;
85 struct ib_gid_table_entry
{
87 struct work_struct del_work
;
88 struct ib_gid_attr attr
;
90 /* Store the ndev pointer to release reference later on in
91 * call_rcu context because by that time gid_table_entry
92 * and attr might be already freed. So keep a copy of it.
93 * ndev_storage is freed by rcu callback.
95 struct roce_gid_ndev_storage
*ndev_storage
;
96 enum gid_table_entry_state state
;
101 /* In RoCE, adding a GID to the table requires:
102 * (a) Find if this GID is already exists.
103 * (b) Find a free space.
104 * (c) Write the new GID
106 * Delete requires different set of operations:
111 /* Any writer to data_vec must hold this lock and the write side of
112 * rwlock. Readers must hold only rwlock. All writers must be in a
116 /* rwlock protects data_vec[ix]->state and entry pointer.
119 struct ib_gid_table_entry
**data_vec
;
120 /* bit field, each bit indicates the index of default GID */
121 u32 default_gid_indices
;
124 static void dispatch_gid_change_event(struct ib_device
*ib_dev
, u8 port
)
126 struct ib_event event
;
128 event
.device
= ib_dev
;
129 event
.element
.port_num
= port
;
130 event
.event
= IB_EVENT_GID_CHANGE
;
132 ib_dispatch_event_clients(&event
);
135 static const char * const gid_type_str
[] = {
136 [IB_GID_TYPE_IB
] = "IB/RoCE v1",
137 [IB_GID_TYPE_ROCE_UDP_ENCAP
] = "RoCE v2",
140 const char *ib_cache_gid_type_str(enum ib_gid_type gid_type
)
142 if (gid_type
< ARRAY_SIZE(gid_type_str
) && gid_type_str
[gid_type
])
143 return gid_type_str
[gid_type
];
145 return "Invalid GID type";
147 EXPORT_SYMBOL(ib_cache_gid_type_str
);
149 /** rdma_is_zero_gid - Check if given GID is zero or not.
151 * Returns true if given GID is zero, returns false otherwise.
153 bool rdma_is_zero_gid(const union ib_gid
*gid
)
155 return !memcmp(gid
, &zgid
, sizeof(*gid
));
157 EXPORT_SYMBOL(rdma_is_zero_gid
);
159 /** is_gid_index_default - Check if a given index belongs to
160 * reserved default GIDs or not.
161 * @table: GID table pointer
162 * @index: Index to check in GID table
163 * Returns true if index is one of the reserved default GID index otherwise
166 static bool is_gid_index_default(const struct ib_gid_table
*table
,
169 return index
< 32 && (BIT(index
) & table
->default_gid_indices
);
172 int ib_cache_gid_parse_type_str(const char *buf
)
182 if (buf
[len
- 1] == '\n')
185 for (i
= 0; i
< ARRAY_SIZE(gid_type_str
); ++i
)
186 if (gid_type_str
[i
] && !strncmp(buf
, gid_type_str
[i
], len
) &&
187 len
== strlen(gid_type_str
[i
])) {
194 EXPORT_SYMBOL(ib_cache_gid_parse_type_str
);
196 static struct ib_gid_table
*rdma_gid_table(struct ib_device
*device
, u8 port
)
198 return device
->port_data
[port
].cache
.gid
;
201 static bool is_gid_entry_free(const struct ib_gid_table_entry
*entry
)
206 static bool is_gid_entry_valid(const struct ib_gid_table_entry
*entry
)
208 return entry
&& entry
->state
== GID_TABLE_ENTRY_VALID
;
211 static void schedule_free_gid(struct kref
*kref
)
213 struct ib_gid_table_entry
*entry
=
214 container_of(kref
, struct ib_gid_table_entry
, kref
);
216 queue_work(ib_wq
, &entry
->del_work
);
219 static void put_gid_ndev(struct rcu_head
*head
)
221 struct roce_gid_ndev_storage
*storage
=
222 container_of(head
, struct roce_gid_ndev_storage
, rcu_head
);
224 WARN_ON(!storage
->ndev
);
225 /* At this point its safe to release netdev reference,
226 * as all callers working on gid_attr->ndev are done
229 dev_put(storage
->ndev
);
233 static void free_gid_entry_locked(struct ib_gid_table_entry
*entry
)
235 struct ib_device
*device
= entry
->attr
.device
;
236 u8 port_num
= entry
->attr
.port_num
;
237 struct ib_gid_table
*table
= rdma_gid_table(device
, port_num
);
239 dev_dbg(&device
->dev
, "%s port=%d index=%d gid %pI6\n", __func__
,
240 port_num
, entry
->attr
.index
, entry
->attr
.gid
.raw
);
242 write_lock_irq(&table
->rwlock
);
245 * The only way to avoid overwriting NULL in table is
246 * by comparing if it is same entry in table or not!
247 * If new entry in table is added by the time we free here,
248 * don't overwrite the table entry.
250 if (entry
== table
->data_vec
[entry
->attr
.index
])
251 table
->data_vec
[entry
->attr
.index
] = NULL
;
252 /* Now this index is ready to be allocated */
253 write_unlock_irq(&table
->rwlock
);
255 if (entry
->ndev_storage
)
256 call_rcu(&entry
->ndev_storage
->rcu_head
, put_gid_ndev
);
260 static void free_gid_entry(struct kref
*kref
)
262 struct ib_gid_table_entry
*entry
=
263 container_of(kref
, struct ib_gid_table_entry
, kref
);
265 free_gid_entry_locked(entry
);
269 * free_gid_work - Release reference to the GID entry
270 * @work: Work structure to refer to GID entry which needs to be
273 * free_gid_work() frees the entry from the HCA's hardware table
274 * if provider supports it. It releases reference to netdevice.
276 static void free_gid_work(struct work_struct
*work
)
278 struct ib_gid_table_entry
*entry
=
279 container_of(work
, struct ib_gid_table_entry
, del_work
);
280 struct ib_device
*device
= entry
->attr
.device
;
281 u8 port_num
= entry
->attr
.port_num
;
282 struct ib_gid_table
*table
= rdma_gid_table(device
, port_num
);
284 mutex_lock(&table
->lock
);
285 free_gid_entry_locked(entry
);
286 mutex_unlock(&table
->lock
);
289 static struct ib_gid_table_entry
*
290 alloc_gid_entry(const struct ib_gid_attr
*attr
)
292 struct ib_gid_table_entry
*entry
;
293 struct net_device
*ndev
;
295 entry
= kzalloc(sizeof(*entry
), GFP_KERNEL
);
299 ndev
= rcu_dereference_protected(attr
->ndev
, 1);
301 entry
->ndev_storage
= kzalloc(sizeof(*entry
->ndev_storage
),
303 if (!entry
->ndev_storage
) {
308 entry
->ndev_storage
->ndev
= ndev
;
310 kref_init(&entry
->kref
);
311 memcpy(&entry
->attr
, attr
, sizeof(*attr
));
312 INIT_WORK(&entry
->del_work
, free_gid_work
);
313 entry
->state
= GID_TABLE_ENTRY_INVALID
;
317 static void store_gid_entry(struct ib_gid_table
*table
,
318 struct ib_gid_table_entry
*entry
)
320 entry
->state
= GID_TABLE_ENTRY_VALID
;
322 dev_dbg(&entry
->attr
.device
->dev
, "%s port=%d index=%d gid %pI6\n",
323 __func__
, entry
->attr
.port_num
, entry
->attr
.index
,
324 entry
->attr
.gid
.raw
);
326 lockdep_assert_held(&table
->lock
);
327 write_lock_irq(&table
->rwlock
);
328 table
->data_vec
[entry
->attr
.index
] = entry
;
329 write_unlock_irq(&table
->rwlock
);
332 static void get_gid_entry(struct ib_gid_table_entry
*entry
)
334 kref_get(&entry
->kref
);
337 static void put_gid_entry(struct ib_gid_table_entry
*entry
)
339 kref_put(&entry
->kref
, schedule_free_gid
);
342 static void put_gid_entry_locked(struct ib_gid_table_entry
*entry
)
344 kref_put(&entry
->kref
, free_gid_entry
);
347 static int add_roce_gid(struct ib_gid_table_entry
*entry
)
349 const struct ib_gid_attr
*attr
= &entry
->attr
;
353 dev_err(&attr
->device
->dev
, "%s NULL netdev port=%d index=%d\n",
354 __func__
, attr
->port_num
, attr
->index
);
357 if (rdma_cap_roce_gid_table(attr
->device
, attr
->port_num
)) {
358 ret
= attr
->device
->ops
.add_gid(attr
, &entry
->context
);
360 dev_err(&attr
->device
->dev
,
361 "%s GID add failed port=%d index=%d\n",
362 __func__
, attr
->port_num
, attr
->index
);
370 * del_gid - Delete GID table entry
372 * @ib_dev: IB device whose GID entry to be deleted
373 * @port: Port number of the IB device
374 * @table: GID table of the IB device for a port
375 * @ix: GID entry index to delete
378 static void del_gid(struct ib_device
*ib_dev
, u8 port
,
379 struct ib_gid_table
*table
, int ix
)
381 struct roce_gid_ndev_storage
*ndev_storage
;
382 struct ib_gid_table_entry
*entry
;
384 lockdep_assert_held(&table
->lock
);
386 dev_dbg(&ib_dev
->dev
, "%s port=%d index=%d gid %pI6\n", __func__
, port
,
387 ix
, table
->data_vec
[ix
]->attr
.gid
.raw
);
389 write_lock_irq(&table
->rwlock
);
390 entry
= table
->data_vec
[ix
];
391 entry
->state
= GID_TABLE_ENTRY_PENDING_DEL
;
393 * For non RoCE protocol, GID entry slot is ready to use.
395 if (!rdma_protocol_roce(ib_dev
, port
))
396 table
->data_vec
[ix
] = NULL
;
397 write_unlock_irq(&table
->rwlock
);
399 ndev_storage
= entry
->ndev_storage
;
401 entry
->ndev_storage
= NULL
;
402 rcu_assign_pointer(entry
->attr
.ndev
, NULL
);
403 call_rcu(&ndev_storage
->rcu_head
, put_gid_ndev
);
406 if (rdma_cap_roce_gid_table(ib_dev
, port
))
407 ib_dev
->ops
.del_gid(&entry
->attr
, &entry
->context
);
409 put_gid_entry_locked(entry
);
413 * add_modify_gid - Add or modify GID table entry
415 * @table: GID table in which GID to be added or modified
416 * @attr: Attributes of the GID
418 * Returns 0 on success or appropriate error code. It accepts zero
419 * GID addition for non RoCE ports for HCA's who report them as valid
420 * GID. However such zero GIDs are not added to the cache.
422 static int add_modify_gid(struct ib_gid_table
*table
,
423 const struct ib_gid_attr
*attr
)
425 struct ib_gid_table_entry
*entry
;
429 * Invalidate any old entry in the table to make it safe to write to
432 if (is_gid_entry_valid(table
->data_vec
[attr
->index
]))
433 del_gid(attr
->device
, attr
->port_num
, table
, attr
->index
);
436 * Some HCA's report multiple GID entries with only one valid GID, and
437 * leave other unused entries as the zero GID. Convert zero GIDs to
438 * empty table entries instead of storing them.
440 if (rdma_is_zero_gid(&attr
->gid
))
443 entry
= alloc_gid_entry(attr
);
447 if (rdma_protocol_roce(attr
->device
, attr
->port_num
)) {
448 ret
= add_roce_gid(entry
);
453 store_gid_entry(table
, entry
);
457 put_gid_entry(entry
);
461 /* rwlock should be read locked, or lock should be held */
462 static int find_gid(struct ib_gid_table
*table
, const union ib_gid
*gid
,
463 const struct ib_gid_attr
*val
, bool default_gid
,
464 unsigned long mask
, int *pempty
)
468 int empty
= pempty
? -1 : 0;
470 while (i
< table
->sz
&& (found
< 0 || empty
< 0)) {
471 struct ib_gid_table_entry
*data
= table
->data_vec
[i
];
472 struct ib_gid_attr
*attr
;
477 /* find_gid() is used during GID addition where it is expected
478 * to return a free entry slot which is not duplicate.
479 * Free entry slot is requested and returned if pempty is set,
480 * so lookup free slot only if requested.
482 if (pempty
&& empty
< 0) {
483 if (is_gid_entry_free(data
) &&
485 is_gid_index_default(table
, curr_index
)) {
487 * Found an invalid (free) entry; allocate it.
488 * If default GID is requested, then our
489 * found slot must be one of the DEFAULT
490 * reserved slots or we fail.
491 * This ensures that only DEFAULT reserved
492 * slots are used for default property GIDs.
499 * Additionally find_gid() is used to find valid entry during
500 * lookup operation; so ignore the entries which are marked as
501 * pending for removal and the entries which are marked as
504 if (!is_gid_entry_valid(data
))
511 if (mask
& GID_ATTR_FIND_MASK_GID_TYPE
&&
512 attr
->gid_type
!= val
->gid_type
)
515 if (mask
& GID_ATTR_FIND_MASK_GID
&&
516 memcmp(gid
, &data
->attr
.gid
, sizeof(*gid
)))
519 if (mask
& GID_ATTR_FIND_MASK_NETDEV
&&
520 attr
->ndev
!= val
->ndev
)
523 if (mask
& GID_ATTR_FIND_MASK_DEFAULT
&&
524 is_gid_index_default(table
, curr_index
) != default_gid
)
536 static void make_default_gid(struct net_device
*dev
, union ib_gid
*gid
)
538 gid
->global
.subnet_prefix
= cpu_to_be64(0xfe80000000000000LL
);
539 addrconf_ifid_eui48(&gid
->raw
[8], dev
);
542 static int __ib_cache_gid_add(struct ib_device
*ib_dev
, u8 port
,
543 union ib_gid
*gid
, struct ib_gid_attr
*attr
,
544 unsigned long mask
, bool default_gid
)
546 struct ib_gid_table
*table
;
551 /* Do not allow adding zero GID in support of
552 * IB spec version 1.3 section 4.1.1 point (6) and
553 * section 12.7.10 and section 12.7.20
555 if (rdma_is_zero_gid(gid
))
558 table
= rdma_gid_table(ib_dev
, port
);
560 mutex_lock(&table
->lock
);
562 ix
= find_gid(table
, gid
, attr
, default_gid
, mask
, &empty
);
570 attr
->device
= ib_dev
;
572 attr
->port_num
= port
;
574 ret
= add_modify_gid(table
, attr
);
576 dispatch_gid_change_event(ib_dev
, port
);
579 mutex_unlock(&table
->lock
);
581 pr_warn("%s: unable to add gid %pI6 error=%d\n",
582 __func__
, gid
->raw
, ret
);
586 int ib_cache_gid_add(struct ib_device
*ib_dev
, u8 port
,
587 union ib_gid
*gid
, struct ib_gid_attr
*attr
)
589 unsigned long mask
= GID_ATTR_FIND_MASK_GID
|
590 GID_ATTR_FIND_MASK_GID_TYPE
|
591 GID_ATTR_FIND_MASK_NETDEV
;
593 return __ib_cache_gid_add(ib_dev
, port
, gid
, attr
, mask
, false);
597 _ib_cache_gid_del(struct ib_device
*ib_dev
, u8 port
,
598 union ib_gid
*gid
, struct ib_gid_attr
*attr
,
599 unsigned long mask
, bool default_gid
)
601 struct ib_gid_table
*table
;
605 table
= rdma_gid_table(ib_dev
, port
);
607 mutex_lock(&table
->lock
);
609 ix
= find_gid(table
, gid
, attr
, default_gid
, mask
, NULL
);
615 del_gid(ib_dev
, port
, table
, ix
);
616 dispatch_gid_change_event(ib_dev
, port
);
619 mutex_unlock(&table
->lock
);
621 pr_debug("%s: can't delete gid %pI6 error=%d\n",
622 __func__
, gid
->raw
, ret
);
626 int ib_cache_gid_del(struct ib_device
*ib_dev
, u8 port
,
627 union ib_gid
*gid
, struct ib_gid_attr
*attr
)
629 unsigned long mask
= GID_ATTR_FIND_MASK_GID
|
630 GID_ATTR_FIND_MASK_GID_TYPE
|
631 GID_ATTR_FIND_MASK_DEFAULT
|
632 GID_ATTR_FIND_MASK_NETDEV
;
634 return _ib_cache_gid_del(ib_dev
, port
, gid
, attr
, mask
, false);
637 int ib_cache_gid_del_all_netdev_gids(struct ib_device
*ib_dev
, u8 port
,
638 struct net_device
*ndev
)
640 struct ib_gid_table
*table
;
642 bool deleted
= false;
644 table
= rdma_gid_table(ib_dev
, port
);
646 mutex_lock(&table
->lock
);
648 for (ix
= 0; ix
< table
->sz
; ix
++) {
649 if (is_gid_entry_valid(table
->data_vec
[ix
]) &&
650 table
->data_vec
[ix
]->attr
.ndev
== ndev
) {
651 del_gid(ib_dev
, port
, table
, ix
);
656 mutex_unlock(&table
->lock
);
659 dispatch_gid_change_event(ib_dev
, port
);
665 * rdma_find_gid_by_port - Returns the GID entry attributes when it finds
666 * a valid GID entry for given search parameters. It searches for the specified
667 * GID value in the local software cache.
668 * @device: The device to query.
669 * @gid: The GID value to search for.
670 * @gid_type: The GID type to search for.
671 * @port_num: The port number of the device where the GID value should be
673 * @ndev: In RoCE, the net device of the device. NULL means ignore.
675 * Returns sgid attributes if the GID is found with valid reference or
676 * returns ERR_PTR for the error.
677 * The caller must invoke rdma_put_gid_attr() to release the reference.
679 const struct ib_gid_attr
*
680 rdma_find_gid_by_port(struct ib_device
*ib_dev
,
681 const union ib_gid
*gid
,
682 enum ib_gid_type gid_type
,
683 u8 port
, struct net_device
*ndev
)
686 struct ib_gid_table
*table
;
687 unsigned long mask
= GID_ATTR_FIND_MASK_GID
|
688 GID_ATTR_FIND_MASK_GID_TYPE
;
689 struct ib_gid_attr val
= {.ndev
= ndev
, .gid_type
= gid_type
};
690 const struct ib_gid_attr
*attr
;
693 if (!rdma_is_port_valid(ib_dev
, port
))
694 return ERR_PTR(-ENOENT
);
696 table
= rdma_gid_table(ib_dev
, port
);
699 mask
|= GID_ATTR_FIND_MASK_NETDEV
;
701 read_lock_irqsave(&table
->rwlock
, flags
);
702 local_index
= find_gid(table
, gid
, &val
, false, mask
, NULL
);
703 if (local_index
>= 0) {
704 get_gid_entry(table
->data_vec
[local_index
]);
705 attr
= &table
->data_vec
[local_index
]->attr
;
706 read_unlock_irqrestore(&table
->rwlock
, flags
);
710 read_unlock_irqrestore(&table
->rwlock
, flags
);
711 return ERR_PTR(-ENOENT
);
713 EXPORT_SYMBOL(rdma_find_gid_by_port
);
716 * rdma_find_gid_by_filter - Returns the GID table attribute where a
717 * specified GID value occurs
718 * @device: The device to query.
719 * @gid: The GID value to search for.
720 * @port: The port number of the device where the GID value could be
722 * @filter: The filter function is executed on any matching GID in the table.
723 * If the filter function returns true, the corresponding index is returned,
724 * otherwise, we continue searching the GID table. It's guaranteed that
725 * while filter is executed, ndev field is valid and the structure won't
726 * change. filter is executed in an atomic context. filter must not be NULL.
728 * rdma_find_gid_by_filter() searches for the specified GID value
729 * of which the filter function returns true in the port's GID table.
732 const struct ib_gid_attr
*rdma_find_gid_by_filter(
733 struct ib_device
*ib_dev
, const union ib_gid
*gid
, u8 port
,
734 bool (*filter
)(const union ib_gid
*gid
, const struct ib_gid_attr
*,
738 const struct ib_gid_attr
*res
= ERR_PTR(-ENOENT
);
739 struct ib_gid_table
*table
;
743 if (!rdma_is_port_valid(ib_dev
, port
))
744 return ERR_PTR(-EINVAL
);
746 table
= rdma_gid_table(ib_dev
, port
);
748 read_lock_irqsave(&table
->rwlock
, flags
);
749 for (i
= 0; i
< table
->sz
; i
++) {
750 struct ib_gid_table_entry
*entry
= table
->data_vec
[i
];
752 if (!is_gid_entry_valid(entry
))
755 if (memcmp(gid
, &entry
->attr
.gid
, sizeof(*gid
)))
758 if (filter(gid
, &entry
->attr
, context
)) {
759 get_gid_entry(entry
);
764 read_unlock_irqrestore(&table
->rwlock
, flags
);
768 static struct ib_gid_table
*alloc_gid_table(int sz
)
770 struct ib_gid_table
*table
= kzalloc(sizeof(*table
), GFP_KERNEL
);
775 table
->data_vec
= kcalloc(sz
, sizeof(*table
->data_vec
), GFP_KERNEL
);
776 if (!table
->data_vec
)
779 mutex_init(&table
->lock
);
782 rwlock_init(&table
->rwlock
);
790 static void release_gid_table(struct ib_device
*device
,
791 struct ib_gid_table
*table
)
799 for (i
= 0; i
< table
->sz
; i
++) {
800 if (is_gid_entry_free(table
->data_vec
[i
]))
802 if (kref_read(&table
->data_vec
[i
]->kref
) > 1) {
803 dev_err(&device
->dev
,
804 "GID entry ref leak for index %d ref=%d\n", i
,
805 kref_read(&table
->data_vec
[i
]->kref
));
812 mutex_destroy(&table
->lock
);
813 kfree(table
->data_vec
);
817 static void cleanup_gid_table_port(struct ib_device
*ib_dev
, u8 port
,
818 struct ib_gid_table
*table
)
825 mutex_lock(&table
->lock
);
826 for (i
= 0; i
< table
->sz
; ++i
) {
827 if (is_gid_entry_valid(table
->data_vec
[i
]))
828 del_gid(ib_dev
, port
, table
, i
);
830 mutex_unlock(&table
->lock
);
833 void ib_cache_gid_set_default_gid(struct ib_device
*ib_dev
, u8 port
,
834 struct net_device
*ndev
,
835 unsigned long gid_type_mask
,
836 enum ib_cache_gid_default_mode mode
)
838 union ib_gid gid
= { };
839 struct ib_gid_attr gid_attr
;
840 unsigned int gid_type
;
843 mask
= GID_ATTR_FIND_MASK_GID_TYPE
|
844 GID_ATTR_FIND_MASK_DEFAULT
|
845 GID_ATTR_FIND_MASK_NETDEV
;
846 memset(&gid_attr
, 0, sizeof(gid_attr
));
847 gid_attr
.ndev
= ndev
;
849 for (gid_type
= 0; gid_type
< IB_GID_TYPE_SIZE
; ++gid_type
) {
850 if (1UL << gid_type
& ~gid_type_mask
)
853 gid_attr
.gid_type
= gid_type
;
855 if (mode
== IB_CACHE_GID_DEFAULT_MODE_SET
) {
856 make_default_gid(ndev
, &gid
);
857 __ib_cache_gid_add(ib_dev
, port
, &gid
,
858 &gid_attr
, mask
, true);
859 } else if (mode
== IB_CACHE_GID_DEFAULT_MODE_DELETE
) {
860 _ib_cache_gid_del(ib_dev
, port
, &gid
,
861 &gid_attr
, mask
, true);
866 static void gid_table_reserve_default(struct ib_device
*ib_dev
, u8 port
,
867 struct ib_gid_table
*table
)
870 unsigned long roce_gid_type_mask
;
871 unsigned int num_default_gids
;
873 roce_gid_type_mask
= roce_gid_type_mask_support(ib_dev
, port
);
874 num_default_gids
= hweight_long(roce_gid_type_mask
);
875 /* Reserve starting indices for default GIDs */
876 for (i
= 0; i
< num_default_gids
&& i
< table
->sz
; i
++)
877 table
->default_gid_indices
|= BIT(i
);
881 static void gid_table_release_one(struct ib_device
*ib_dev
)
885 rdma_for_each_port (ib_dev
, p
) {
886 release_gid_table(ib_dev
, ib_dev
->port_data
[p
].cache
.gid
);
887 ib_dev
->port_data
[p
].cache
.gid
= NULL
;
891 static int _gid_table_setup_one(struct ib_device
*ib_dev
)
893 struct ib_gid_table
*table
;
894 unsigned int rdma_port
;
896 rdma_for_each_port (ib_dev
, rdma_port
) {
897 table
= alloc_gid_table(
898 ib_dev
->port_data
[rdma_port
].immutable
.gid_tbl_len
);
900 goto rollback_table_setup
;
902 gid_table_reserve_default(ib_dev
, rdma_port
, table
);
903 ib_dev
->port_data
[rdma_port
].cache
.gid
= table
;
907 rollback_table_setup
:
908 gid_table_release_one(ib_dev
);
912 static void gid_table_cleanup_one(struct ib_device
*ib_dev
)
916 rdma_for_each_port (ib_dev
, p
)
917 cleanup_gid_table_port(ib_dev
, p
,
918 ib_dev
->port_data
[p
].cache
.gid
);
921 static int gid_table_setup_one(struct ib_device
*ib_dev
)
925 err
= _gid_table_setup_one(ib_dev
);
930 rdma_roce_rescan_device(ib_dev
);
936 * rdma_query_gid - Read the GID content from the GID software cache
937 * @device: Device to query the GID
938 * @port_num: Port number of the device
939 * @index: Index of the GID table entry to read
940 * @gid: Pointer to GID where to store the entry's GID
942 * rdma_query_gid() only reads the GID entry content for requested device,
943 * port and index. It reads for IB, RoCE and iWarp link layers. It doesn't
944 * hold any reference to the GID table entry in the HCA or software cache.
946 * Returns 0 on success or appropriate error code.
949 int rdma_query_gid(struct ib_device
*device
, u8 port_num
,
950 int index
, union ib_gid
*gid
)
952 struct ib_gid_table
*table
;
956 if (!rdma_is_port_valid(device
, port_num
))
959 table
= rdma_gid_table(device
, port_num
);
960 read_lock_irqsave(&table
->rwlock
, flags
);
962 if (index
< 0 || index
>= table
->sz
||
963 !is_gid_entry_valid(table
->data_vec
[index
]))
966 memcpy(gid
, &table
->data_vec
[index
]->attr
.gid
, sizeof(*gid
));
970 read_unlock_irqrestore(&table
->rwlock
, flags
);
973 EXPORT_SYMBOL(rdma_query_gid
);
976 * rdma_read_gid_hw_context - Read the HW GID context from GID attribute
977 * @attr: Potinter to the GID attribute
979 * rdma_read_gid_hw_context() reads the drivers GID HW context corresponding
980 * to the SGID attr. Callers are required to already be holding the reference
981 * to an existing GID entry.
983 * Returns the HW GID context
986 void *rdma_read_gid_hw_context(const struct ib_gid_attr
*attr
)
988 return container_of(attr
, struct ib_gid_table_entry
, attr
)->context
;
990 EXPORT_SYMBOL(rdma_read_gid_hw_context
);
993 * rdma_find_gid - Returns SGID attributes if the matching GID is found.
994 * @device: The device to query.
995 * @gid: The GID value to search for.
996 * @gid_type: The GID type to search for.
997 * @ndev: In RoCE, the net device of the device. NULL means ignore.
999 * rdma_find_gid() searches for the specified GID value in the software cache.
1001 * Returns GID attributes if a valid GID is found or returns ERR_PTR for the
1002 * error. The caller must invoke rdma_put_gid_attr() to release the reference.
1005 const struct ib_gid_attr
*rdma_find_gid(struct ib_device
*device
,
1006 const union ib_gid
*gid
,
1007 enum ib_gid_type gid_type
,
1008 struct net_device
*ndev
)
1010 unsigned long mask
= GID_ATTR_FIND_MASK_GID
|
1011 GID_ATTR_FIND_MASK_GID_TYPE
;
1012 struct ib_gid_attr gid_attr_val
= {.ndev
= ndev
, .gid_type
= gid_type
};
1016 mask
|= GID_ATTR_FIND_MASK_NETDEV
;
1018 rdma_for_each_port(device
, p
) {
1019 struct ib_gid_table
*table
;
1020 unsigned long flags
;
1023 table
= device
->port_data
[p
].cache
.gid
;
1024 read_lock_irqsave(&table
->rwlock
, flags
);
1025 index
= find_gid(table
, gid
, &gid_attr_val
, false, mask
, NULL
);
1027 const struct ib_gid_attr
*attr
;
1029 get_gid_entry(table
->data_vec
[index
]);
1030 attr
= &table
->data_vec
[index
]->attr
;
1031 read_unlock_irqrestore(&table
->rwlock
, flags
);
1034 read_unlock_irqrestore(&table
->rwlock
, flags
);
1037 return ERR_PTR(-ENOENT
);
1039 EXPORT_SYMBOL(rdma_find_gid
);
1041 int ib_get_cached_pkey(struct ib_device
*device
,
1046 struct ib_pkey_cache
*cache
;
1047 unsigned long flags
;
1050 if (!rdma_is_port_valid(device
, port_num
))
1053 read_lock_irqsave(&device
->cache_lock
, flags
);
1055 cache
= device
->port_data
[port_num
].cache
.pkey
;
1057 if (index
< 0 || index
>= cache
->table_len
)
1060 *pkey
= cache
->table
[index
];
1062 read_unlock_irqrestore(&device
->cache_lock
, flags
);
1066 EXPORT_SYMBOL(ib_get_cached_pkey
);
1068 int ib_get_cached_subnet_prefix(struct ib_device
*device
,
1072 unsigned long flags
;
1074 if (!rdma_is_port_valid(device
, port_num
))
1077 read_lock_irqsave(&device
->cache_lock
, flags
);
1078 *sn_pfx
= device
->port_data
[port_num
].cache
.subnet_prefix
;
1079 read_unlock_irqrestore(&device
->cache_lock
, flags
);
1083 EXPORT_SYMBOL(ib_get_cached_subnet_prefix
);
1085 int ib_find_cached_pkey(struct ib_device
*device
,
1090 struct ib_pkey_cache
*cache
;
1091 unsigned long flags
;
1094 int partial_ix
= -1;
1096 if (!rdma_is_port_valid(device
, port_num
))
1099 read_lock_irqsave(&device
->cache_lock
, flags
);
1101 cache
= device
->port_data
[port_num
].cache
.pkey
;
1105 for (i
= 0; i
< cache
->table_len
; ++i
)
1106 if ((cache
->table
[i
] & 0x7fff) == (pkey
& 0x7fff)) {
1107 if (cache
->table
[i
] & 0x8000) {
1115 if (ret
&& partial_ix
>= 0) {
1116 *index
= partial_ix
;
1120 read_unlock_irqrestore(&device
->cache_lock
, flags
);
1124 EXPORT_SYMBOL(ib_find_cached_pkey
);
1126 int ib_find_exact_cached_pkey(struct ib_device
*device
,
1131 struct ib_pkey_cache
*cache
;
1132 unsigned long flags
;
1136 if (!rdma_is_port_valid(device
, port_num
))
1139 read_lock_irqsave(&device
->cache_lock
, flags
);
1141 cache
= device
->port_data
[port_num
].cache
.pkey
;
1145 for (i
= 0; i
< cache
->table_len
; ++i
)
1146 if (cache
->table
[i
] == pkey
) {
1152 read_unlock_irqrestore(&device
->cache_lock
, flags
);
1156 EXPORT_SYMBOL(ib_find_exact_cached_pkey
);
1158 int ib_get_cached_lmc(struct ib_device
*device
,
1162 unsigned long flags
;
1165 if (!rdma_is_port_valid(device
, port_num
))
1168 read_lock_irqsave(&device
->cache_lock
, flags
);
1169 *lmc
= device
->port_data
[port_num
].cache
.lmc
;
1170 read_unlock_irqrestore(&device
->cache_lock
, flags
);
1174 EXPORT_SYMBOL(ib_get_cached_lmc
);
1176 int ib_get_cached_port_state(struct ib_device
*device
,
1178 enum ib_port_state
*port_state
)
1180 unsigned long flags
;
1183 if (!rdma_is_port_valid(device
, port_num
))
1186 read_lock_irqsave(&device
->cache_lock
, flags
);
1187 *port_state
= device
->port_data
[port_num
].cache
.port_state
;
1188 read_unlock_irqrestore(&device
->cache_lock
, flags
);
1192 EXPORT_SYMBOL(ib_get_cached_port_state
);
1195 * rdma_get_gid_attr - Returns GID attributes for a port of a device
1196 * at a requested gid_index, if a valid GID entry exists.
1197 * @device: The device to query.
1198 * @port_num: The port number on the device where the GID value
1200 * @index: Index of the GID table entry whose attributes are to
1203 * rdma_get_gid_attr() acquires reference count of gid attributes from the
1204 * cached GID table. Caller must invoke rdma_put_gid_attr() to release
1205 * reference to gid attribute regardless of link layer.
1207 * Returns pointer to valid gid attribute or ERR_PTR for the appropriate error
1210 const struct ib_gid_attr
*
1211 rdma_get_gid_attr(struct ib_device
*device
, u8 port_num
, int index
)
1213 const struct ib_gid_attr
*attr
= ERR_PTR(-EINVAL
);
1214 struct ib_gid_table
*table
;
1215 unsigned long flags
;
1217 if (!rdma_is_port_valid(device
, port_num
))
1218 return ERR_PTR(-EINVAL
);
1220 table
= rdma_gid_table(device
, port_num
);
1221 if (index
< 0 || index
>= table
->sz
)
1222 return ERR_PTR(-EINVAL
);
1224 read_lock_irqsave(&table
->rwlock
, flags
);
1225 if (!is_gid_entry_valid(table
->data_vec
[index
]))
1228 get_gid_entry(table
->data_vec
[index
]);
1229 attr
= &table
->data_vec
[index
]->attr
;
1231 read_unlock_irqrestore(&table
->rwlock
, flags
);
1234 EXPORT_SYMBOL(rdma_get_gid_attr
);
1237 * rdma_put_gid_attr - Release reference to the GID attribute
1238 * @attr: Pointer to the GID attribute whose reference
1239 * needs to be released.
1241 * rdma_put_gid_attr() must be used to release reference whose
1242 * reference is acquired using rdma_get_gid_attr() or any APIs
1243 * which returns a pointer to the ib_gid_attr regardless of link layer
1247 void rdma_put_gid_attr(const struct ib_gid_attr
*attr
)
1249 struct ib_gid_table_entry
*entry
=
1250 container_of(attr
, struct ib_gid_table_entry
, attr
);
1252 put_gid_entry(entry
);
1254 EXPORT_SYMBOL(rdma_put_gid_attr
);
1257 * rdma_hold_gid_attr - Get reference to existing GID attribute
1259 * @attr: Pointer to the GID attribute whose reference
1260 * needs to be taken.
1262 * Increase the reference count to a GID attribute to keep it from being
1263 * freed. Callers are required to already be holding a reference to attribute.
1266 void rdma_hold_gid_attr(const struct ib_gid_attr
*attr
)
1268 struct ib_gid_table_entry
*entry
=
1269 container_of(attr
, struct ib_gid_table_entry
, attr
);
1271 get_gid_entry(entry
);
1273 EXPORT_SYMBOL(rdma_hold_gid_attr
);
1276 * rdma_read_gid_attr_ndev_rcu - Read GID attribute netdevice
1277 * which must be in UP state.
1279 * @attr:Pointer to the GID attribute
1281 * Returns pointer to netdevice if the netdevice was attached to GID and
1282 * netdevice is in UP state. Caller must hold RCU lock as this API
1283 * reads the netdev flags which can change while netdevice migrates to
1284 * different net namespace. Returns ERR_PTR with error code otherwise.
1287 struct net_device
*rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr
*attr
)
1289 struct ib_gid_table_entry
*entry
=
1290 container_of(attr
, struct ib_gid_table_entry
, attr
);
1291 struct ib_device
*device
= entry
->attr
.device
;
1292 struct net_device
*ndev
= ERR_PTR(-ENODEV
);
1293 u8 port_num
= entry
->attr
.port_num
;
1294 struct ib_gid_table
*table
;
1295 unsigned long flags
;
1298 table
= rdma_gid_table(device
, port_num
);
1300 read_lock_irqsave(&table
->rwlock
, flags
);
1301 valid
= is_gid_entry_valid(table
->data_vec
[attr
->index
]);
1303 ndev
= rcu_dereference(attr
->ndev
);
1305 (ndev
&& ((READ_ONCE(ndev
->flags
) & IFF_UP
) == 0)))
1306 ndev
= ERR_PTR(-ENODEV
);
1308 read_unlock_irqrestore(&table
->rwlock
, flags
);
1311 EXPORT_SYMBOL(rdma_read_gid_attr_ndev_rcu
);
1313 static int get_lower_dev_vlan(struct net_device
*lower_dev
, void *data
)
1315 u16
*vlan_id
= data
;
1317 if (is_vlan_dev(lower_dev
))
1318 *vlan_id
= vlan_dev_vlan_id(lower_dev
);
1320 /* We are interested only in first level vlan device, so
1321 * always return 1 to stop iterating over next level devices.
1327 * rdma_read_gid_l2_fields - Read the vlan ID and source MAC address
1330 * @attr: GID attribute pointer whose L2 fields to be read
1331 * @vlan_id: Pointer to vlan id to fill up if the GID entry has
1332 * vlan id. It is optional.
1333 * @smac: Pointer to smac to fill up for a GID entry. It is optional.
1335 * rdma_read_gid_l2_fields() returns 0 on success and returns vlan id
1336 * (if gid entry has vlan) and source MAC, or returns error.
1338 int rdma_read_gid_l2_fields(const struct ib_gid_attr
*attr
,
1339 u16
*vlan_id
, u8
*smac
)
1341 struct net_device
*ndev
;
1344 ndev
= rcu_dereference(attr
->ndev
);
1350 ether_addr_copy(smac
, ndev
->dev_addr
);
1353 if (is_vlan_dev(ndev
)) {
1354 *vlan_id
= vlan_dev_vlan_id(ndev
);
1356 /* If the netdev is upper device and if it's lower
1357 * device is vlan device, consider vlan id of the
1358 * the lower vlan device for this gid entry.
1360 netdev_walk_all_lower_dev_rcu(attr
->ndev
,
1361 get_lower_dev_vlan
, vlan_id
);
1367 EXPORT_SYMBOL(rdma_read_gid_l2_fields
);
1369 static int config_non_roce_gid_cache(struct ib_device
*device
,
1370 u8 port
, int gid_tbl_len
)
1372 struct ib_gid_attr gid_attr
= {};
1373 struct ib_gid_table
*table
;
1377 gid_attr
.device
= device
;
1378 gid_attr
.port_num
= port
;
1379 table
= rdma_gid_table(device
, port
);
1381 mutex_lock(&table
->lock
);
1382 for (i
= 0; i
< gid_tbl_len
; ++i
) {
1383 if (!device
->ops
.query_gid
)
1385 ret
= device
->ops
.query_gid(device
, port
, i
, &gid_attr
.gid
);
1387 dev_warn(&device
->dev
,
1388 "query_gid failed (%d) for index %d\n", ret
,
1393 add_modify_gid(table
, &gid_attr
);
1396 mutex_unlock(&table
->lock
);
1401 ib_cache_update(struct ib_device
*device
, u8 port
, bool enforce_security
)
1403 struct ib_port_attr
*tprops
= NULL
;
1404 struct ib_pkey_cache
*pkey_cache
= NULL
, *old_pkey_cache
;
1408 if (!rdma_is_port_valid(device
, port
))
1411 tprops
= kmalloc(sizeof *tprops
, GFP_KERNEL
);
1415 ret
= ib_query_port(device
, port
, tprops
);
1417 dev_warn(&device
->dev
, "ib_query_port failed (%d)\n", ret
);
1421 if (!rdma_protocol_roce(device
, port
)) {
1422 ret
= config_non_roce_gid_cache(device
, port
,
1423 tprops
->gid_tbl_len
);
1428 pkey_cache
= kmalloc(struct_size(pkey_cache
, table
,
1429 tprops
->pkey_tbl_len
),
1436 pkey_cache
->table_len
= tprops
->pkey_tbl_len
;
1438 for (i
= 0; i
< pkey_cache
->table_len
; ++i
) {
1439 ret
= ib_query_pkey(device
, port
, i
, pkey_cache
->table
+ i
);
1441 dev_warn(&device
->dev
,
1442 "ib_query_pkey failed (%d) for index %d\n",
1448 write_lock_irq(&device
->cache_lock
);
1450 old_pkey_cache
= device
->port_data
[port
].cache
.pkey
;
1452 device
->port_data
[port
].cache
.pkey
= pkey_cache
;
1453 device
->port_data
[port
].cache
.lmc
= tprops
->lmc
;
1454 device
->port_data
[port
].cache
.port_state
= tprops
->state
;
1456 device
->port_data
[port
].cache
.subnet_prefix
= tprops
->subnet_prefix
;
1457 write_unlock_irq(&device
->cache_lock
);
1459 if (enforce_security
)
1460 ib_security_cache_change(device
,
1462 tprops
->subnet_prefix
);
1464 kfree(old_pkey_cache
);
1474 static void ib_cache_event_task(struct work_struct
*_work
)
1476 struct ib_update_work
*work
=
1477 container_of(_work
, struct ib_update_work
, work
);
1480 /* Before distributing the cache update event, first sync
1483 ret
= ib_cache_update(work
->event
.device
, work
->event
.element
.port_num
,
1484 work
->enforce_security
);
1486 /* GID event is notified already for individual GID entries by
1487 * dispatch_gid_change_event(). Hence, notifiy for rest of the
1490 if (!ret
&& work
->event
.event
!= IB_EVENT_GID_CHANGE
)
1491 ib_dispatch_event_clients(&work
->event
);
1496 static void ib_generic_event_task(struct work_struct
*_work
)
1498 struct ib_update_work
*work
=
1499 container_of(_work
, struct ib_update_work
, work
);
1501 ib_dispatch_event_clients(&work
->event
);
1505 static bool is_cache_update_event(const struct ib_event
*event
)
1507 return (event
->event
== IB_EVENT_PORT_ERR
||
1508 event
->event
== IB_EVENT_PORT_ACTIVE
||
1509 event
->event
== IB_EVENT_LID_CHANGE
||
1510 event
->event
== IB_EVENT_PKEY_CHANGE
||
1511 event
->event
== IB_EVENT_CLIENT_REREGISTER
||
1512 event
->event
== IB_EVENT_GID_CHANGE
);
1516 * ib_dispatch_event - Dispatch an asynchronous event
1517 * @event:Event to dispatch
1519 * Low-level drivers must call ib_dispatch_event() to dispatch the
1520 * event to all registered event handlers when an asynchronous event
1523 void ib_dispatch_event(const struct ib_event
*event
)
1525 struct ib_update_work
*work
;
1527 work
= kzalloc(sizeof(*work
), GFP_ATOMIC
);
1531 if (is_cache_update_event(event
))
1532 INIT_WORK(&work
->work
, ib_cache_event_task
);
1534 INIT_WORK(&work
->work
, ib_generic_event_task
);
1536 work
->event
= *event
;
1537 if (event
->event
== IB_EVENT_PKEY_CHANGE
||
1538 event
->event
== IB_EVENT_GID_CHANGE
)
1539 work
->enforce_security
= true;
1541 queue_work(ib_wq
, &work
->work
);
1543 EXPORT_SYMBOL(ib_dispatch_event
);
1545 int ib_cache_setup_one(struct ib_device
*device
)
1550 rwlock_init(&device
->cache_lock
);
1552 err
= gid_table_setup_one(device
);
1556 rdma_for_each_port (device
, p
)
1557 ib_cache_update(device
, p
, true);
1562 void ib_cache_release_one(struct ib_device
*device
)
1567 * The release function frees all the cache elements.
1568 * This function should be called as part of freeing
1569 * all the device's resources when the cache could no
1570 * longer be accessed.
1572 rdma_for_each_port (device
, p
)
1573 kfree(device
->port_data
[p
].cache
.pkey
);
1575 gid_table_release_one(device
);
1578 void ib_cache_cleanup_one(struct ib_device
*device
)
1580 /* The cleanup function waits for all in-progress workqueue
1581 * elements and cleans up the GID cache. This function should be
1582 * called after the device was removed from the devices list and
1583 * all clients were removed, so the cache exists but is
1584 * non-functional and shouldn't be updated anymore.
1586 flush_workqueue(ib_wq
);
1587 gid_table_cleanup_one(device
);
1590 * Flush the wq second time for any pending GID delete work.
1592 flush_workqueue(ib_wq
);