1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
5 #include <linux/scatterlist.h>
6 #include <linux/memregion.h>
7 #include <linux/highmem.h>
8 #include <linux/kstrtox.h>
9 #include <linux/sched.h>
10 #include <linux/slab.h>
11 #include <linux/hash.h>
12 #include <linux/sort.h>
19 * For readq() and writeq() on 32-bit builds, the hi-lo, lo-hi order is
22 #include <linux/io-64-nonatomic-hi-lo.h>
24 static DEFINE_PER_CPU(int, flush_idx
);
26 static int nvdimm_map_flush(struct device
*dev
, struct nvdimm
*nvdimm
, int dimm
,
27 struct nd_region_data
*ndrd
)
31 dev_dbg(dev
, "%s: map %d flush address%s\n", nvdimm_name(nvdimm
),
32 nvdimm
->num_flush
, nvdimm
->num_flush
== 1 ? "" : "es");
33 for (i
= 0; i
< (1 << ndrd
->hints_shift
); i
++) {
34 struct resource
*res
= &nvdimm
->flush_wpq
[i
];
35 unsigned long pfn
= PHYS_PFN(res
->start
);
36 void __iomem
*flush_page
;
38 /* check if flush hints share a page */
39 for (j
= 0; j
< i
; j
++) {
40 struct resource
*res_j
= &nvdimm
->flush_wpq
[j
];
41 unsigned long pfn_j
= PHYS_PFN(res_j
->start
);
48 flush_page
= (void __iomem
*) ((unsigned long)
49 ndrd_get_flush_wpq(ndrd
, dimm
, j
)
52 flush_page
= devm_nvdimm_ioremap(dev
,
53 PFN_PHYS(pfn
), PAGE_SIZE
);
56 ndrd_set_flush_wpq(ndrd
, dimm
, i
, flush_page
57 + (res
->start
& ~PAGE_MASK
));
63 static int nd_region_invalidate_memregion(struct nd_region
*nd_region
)
65 int i
, incoherent
= 0;
67 for (i
= 0; i
< nd_region
->ndr_mappings
; i
++) {
68 struct nd_mapping
*nd_mapping
= &nd_region
->mapping
[i
];
69 struct nvdimm
*nvdimm
= nd_mapping
->nvdimm
;
71 if (test_bit(NDD_INCOHERENT
, &nvdimm
->flags
)) {
80 if (!cpu_cache_has_invalidate_memregion()) {
81 if (IS_ENABLED(CONFIG_NVDIMM_SECURITY_TEST
)) {
84 "Bypassing cpu_cache_invalidate_memergion() for testing!\n");
87 dev_err(&nd_region
->dev
,
88 "Failed to synchronize CPU cache state\n");
93 cpu_cache_invalidate_memregion(IORES_DESC_PERSISTENT_MEMORY
);
95 for (i
= 0; i
< nd_region
->ndr_mappings
; i
++) {
96 struct nd_mapping
*nd_mapping
= &nd_region
->mapping
[i
];
97 struct nvdimm
*nvdimm
= nd_mapping
->nvdimm
;
99 clear_bit(NDD_INCOHERENT
, &nvdimm
->flags
);
105 int nd_region_activate(struct nd_region
*nd_region
)
107 int i
, j
, rc
, num_flush
= 0;
108 struct nd_region_data
*ndrd
;
109 struct device
*dev
= &nd_region
->dev
;
110 size_t flush_data_size
= sizeof(void *);
112 nvdimm_bus_lock(&nd_region
->dev
);
113 for (i
= 0; i
< nd_region
->ndr_mappings
; i
++) {
114 struct nd_mapping
*nd_mapping
= &nd_region
->mapping
[i
];
115 struct nvdimm
*nvdimm
= nd_mapping
->nvdimm
;
117 if (test_bit(NDD_SECURITY_OVERWRITE
, &nvdimm
->flags
)) {
118 nvdimm_bus_unlock(&nd_region
->dev
);
122 /* at least one null hint slot per-dimm for the "no-hint" case */
123 flush_data_size
+= sizeof(void *);
124 num_flush
= min_not_zero(num_flush
, nvdimm
->num_flush
);
125 if (!nvdimm
->num_flush
)
127 flush_data_size
+= nvdimm
->num_flush
* sizeof(void *);
129 nvdimm_bus_unlock(&nd_region
->dev
);
131 rc
= nd_region_invalidate_memregion(nd_region
);
135 ndrd
= devm_kzalloc(dev
, sizeof(*ndrd
) + flush_data_size
, GFP_KERNEL
);
138 dev_set_drvdata(dev
, ndrd
);
143 ndrd
->hints_shift
= ilog2(num_flush
);
144 for (i
= 0; i
< nd_region
->ndr_mappings
; i
++) {
145 struct nd_mapping
*nd_mapping
= &nd_region
->mapping
[i
];
146 struct nvdimm
*nvdimm
= nd_mapping
->nvdimm
;
147 int rc
= nvdimm_map_flush(&nd_region
->dev
, nvdimm
, i
, ndrd
);
154 * Clear out entries that are duplicates. This should prevent the
157 for (i
= 0; i
< nd_region
->ndr_mappings
- 1; i
++) {
158 /* ignore if NULL already */
159 if (!ndrd_get_flush_wpq(ndrd
, i
, 0))
162 for (j
= i
+ 1; j
< nd_region
->ndr_mappings
; j
++)
163 if (ndrd_get_flush_wpq(ndrd
, i
, 0) ==
164 ndrd_get_flush_wpq(ndrd
, j
, 0))
165 ndrd_set_flush_wpq(ndrd
, j
, 0, NULL
);
171 static void nd_region_release(struct device
*dev
)
173 struct nd_region
*nd_region
= to_nd_region(dev
);
176 for (i
= 0; i
< nd_region
->ndr_mappings
; i
++) {
177 struct nd_mapping
*nd_mapping
= &nd_region
->mapping
[i
];
178 struct nvdimm
*nvdimm
= nd_mapping
->nvdimm
;
180 put_device(&nvdimm
->dev
);
182 free_percpu(nd_region
->lane
);
183 if (!test_bit(ND_REGION_CXL
, &nd_region
->flags
))
184 memregion_free(nd_region
->id
);
188 struct nd_region
*to_nd_region(struct device
*dev
)
190 struct nd_region
*nd_region
= container_of(dev
, struct nd_region
, dev
);
192 WARN_ON(dev
->type
->release
!= nd_region_release
);
195 EXPORT_SYMBOL_GPL(to_nd_region
);
197 struct device
*nd_region_dev(struct nd_region
*nd_region
)
201 return &nd_region
->dev
;
203 EXPORT_SYMBOL_GPL(nd_region_dev
);
205 void *nd_region_provider_data(struct nd_region
*nd_region
)
207 return nd_region
->provider_data
;
209 EXPORT_SYMBOL_GPL(nd_region_provider_data
);
212 * nd_region_to_nstype() - region to an integer namespace type
213 * @nd_region: region-device to interrogate
215 * This is the 'nstype' attribute of a region as well, an input to the
216 * MODALIAS for namespace devices, and bit number for a nvdimm_bus to match
217 * namespace devices with namespace drivers.
219 int nd_region_to_nstype(struct nd_region
*nd_region
)
221 if (is_memory(&nd_region
->dev
)) {
224 for (i
= 0, label
= 0; i
< nd_region
->ndr_mappings
; i
++) {
225 struct nd_mapping
*nd_mapping
= &nd_region
->mapping
[i
];
226 struct nvdimm
*nvdimm
= nd_mapping
->nvdimm
;
228 if (test_bit(NDD_LABELING
, &nvdimm
->flags
))
232 return ND_DEVICE_NAMESPACE_PMEM
;
234 return ND_DEVICE_NAMESPACE_IO
;
239 EXPORT_SYMBOL(nd_region_to_nstype
);
241 static unsigned long long region_size(struct nd_region
*nd_region
)
243 if (is_memory(&nd_region
->dev
)) {
244 return nd_region
->ndr_size
;
245 } else if (nd_region
->ndr_mappings
== 1) {
246 struct nd_mapping
*nd_mapping
= &nd_region
->mapping
[0];
248 return nd_mapping
->size
;
254 static ssize_t
size_show(struct device
*dev
,
255 struct device_attribute
*attr
, char *buf
)
257 struct nd_region
*nd_region
= to_nd_region(dev
);
259 return sprintf(buf
, "%llu\n", region_size(nd_region
));
261 static DEVICE_ATTR_RO(size
);
263 static ssize_t
deep_flush_show(struct device
*dev
,
264 struct device_attribute
*attr
, char *buf
)
266 struct nd_region
*nd_region
= to_nd_region(dev
);
269 * NOTE: in the nvdimm_has_flush() error case this attribute is
272 return sprintf(buf
, "%d\n", nvdimm_has_flush(nd_region
));
275 static ssize_t
deep_flush_store(struct device
*dev
, struct device_attribute
*attr
,
276 const char *buf
, size_t len
)
279 int rc
= kstrtobool(buf
, &flush
);
280 struct nd_region
*nd_region
= to_nd_region(dev
);
286 rc
= nvdimm_flush(nd_region
, NULL
);
292 static DEVICE_ATTR_RW(deep_flush
);
294 static ssize_t
mappings_show(struct device
*dev
,
295 struct device_attribute
*attr
, char *buf
)
297 struct nd_region
*nd_region
= to_nd_region(dev
);
299 return sprintf(buf
, "%d\n", nd_region
->ndr_mappings
);
301 static DEVICE_ATTR_RO(mappings
);
303 static ssize_t
nstype_show(struct device
*dev
,
304 struct device_attribute
*attr
, char *buf
)
306 struct nd_region
*nd_region
= to_nd_region(dev
);
308 return sprintf(buf
, "%d\n", nd_region_to_nstype(nd_region
));
310 static DEVICE_ATTR_RO(nstype
);
312 static ssize_t
set_cookie_show(struct device
*dev
,
313 struct device_attribute
*attr
, char *buf
)
315 struct nd_region
*nd_region
= to_nd_region(dev
);
316 struct nd_interleave_set
*nd_set
= nd_region
->nd_set
;
319 if (is_memory(dev
) && nd_set
)
320 /* pass, should be precluded by region_visible */;
325 * The cookie to show depends on which specification of the
326 * labels we are using. If there are not labels then default to
327 * the v1.1 namespace label cookie definition. To read all this
328 * data we need to wait for probing to settle.
331 nvdimm_bus_lock(dev
);
332 wait_nvdimm_bus_probe_idle(dev
);
333 if (nd_region
->ndr_mappings
) {
334 struct nd_mapping
*nd_mapping
= &nd_region
->mapping
[0];
335 struct nvdimm_drvdata
*ndd
= to_ndd(nd_mapping
);
338 struct nd_namespace_index
*nsindex
;
340 nsindex
= to_namespace_index(ndd
, ndd
->ns_current
);
341 rc
= sprintf(buf
, "%#llx\n",
342 nd_region_interleave_set_cookie(nd_region
,
346 nvdimm_bus_unlock(dev
);
351 return sprintf(buf
, "%#llx\n", nd_set
->cookie1
);
353 static DEVICE_ATTR_RO(set_cookie
);
355 resource_size_t
nd_region_available_dpa(struct nd_region
*nd_region
)
357 resource_size_t available
;
360 WARN_ON(!is_nvdimm_bus_locked(&nd_region
->dev
));
363 for (i
= 0; i
< nd_region
->ndr_mappings
; i
++) {
364 struct nd_mapping
*nd_mapping
= &nd_region
->mapping
[i
];
365 struct nvdimm_drvdata
*ndd
= to_ndd(nd_mapping
);
367 /* if a dimm is disabled the available capacity is zero */
371 available
+= nd_pmem_available_dpa(nd_region
, nd_mapping
);
377 resource_size_t
nd_region_allocatable_dpa(struct nd_region
*nd_region
)
379 resource_size_t avail
= 0;
382 WARN_ON(!is_nvdimm_bus_locked(&nd_region
->dev
));
383 for (i
= 0; i
< nd_region
->ndr_mappings
; i
++) {
384 struct nd_mapping
*nd_mapping
= &nd_region
->mapping
[i
];
386 avail
= min_not_zero(avail
, nd_pmem_max_contiguous_dpa(
387 nd_region
, nd_mapping
));
389 return avail
* nd_region
->ndr_mappings
;
392 static ssize_t
available_size_show(struct device
*dev
,
393 struct device_attribute
*attr
, char *buf
)
395 struct nd_region
*nd_region
= to_nd_region(dev
);
396 unsigned long long available
= 0;
399 * Flush in-flight updates and grab a snapshot of the available
400 * size. Of course, this value is potentially invalidated the
401 * memory nvdimm_bus_lock() is dropped, but that's userspace's
402 * problem to not race itself.
405 nvdimm_bus_lock(dev
);
406 wait_nvdimm_bus_probe_idle(dev
);
407 available
= nd_region_available_dpa(nd_region
);
408 nvdimm_bus_unlock(dev
);
411 return sprintf(buf
, "%llu\n", available
);
413 static DEVICE_ATTR_RO(available_size
);
415 static ssize_t
max_available_extent_show(struct device
*dev
,
416 struct device_attribute
*attr
, char *buf
)
418 struct nd_region
*nd_region
= to_nd_region(dev
);
419 unsigned long long available
= 0;
422 nvdimm_bus_lock(dev
);
423 wait_nvdimm_bus_probe_idle(dev
);
424 available
= nd_region_allocatable_dpa(nd_region
);
425 nvdimm_bus_unlock(dev
);
428 return sprintf(buf
, "%llu\n", available
);
430 static DEVICE_ATTR_RO(max_available_extent
);
432 static ssize_t
init_namespaces_show(struct device
*dev
,
433 struct device_attribute
*attr
, char *buf
)
435 struct nd_region_data
*ndrd
= dev_get_drvdata(dev
);
438 nvdimm_bus_lock(dev
);
440 rc
= sprintf(buf
, "%d/%d\n", ndrd
->ns_active
, ndrd
->ns_count
);
443 nvdimm_bus_unlock(dev
);
447 static DEVICE_ATTR_RO(init_namespaces
);
449 static ssize_t
namespace_seed_show(struct device
*dev
,
450 struct device_attribute
*attr
, char *buf
)
452 struct nd_region
*nd_region
= to_nd_region(dev
);
455 nvdimm_bus_lock(dev
);
456 if (nd_region
->ns_seed
)
457 rc
= sprintf(buf
, "%s\n", dev_name(nd_region
->ns_seed
));
459 rc
= sprintf(buf
, "\n");
460 nvdimm_bus_unlock(dev
);
463 static DEVICE_ATTR_RO(namespace_seed
);
465 static ssize_t
btt_seed_show(struct device
*dev
,
466 struct device_attribute
*attr
, char *buf
)
468 struct nd_region
*nd_region
= to_nd_region(dev
);
471 nvdimm_bus_lock(dev
);
472 if (nd_region
->btt_seed
)
473 rc
= sprintf(buf
, "%s\n", dev_name(nd_region
->btt_seed
));
475 rc
= sprintf(buf
, "\n");
476 nvdimm_bus_unlock(dev
);
480 static DEVICE_ATTR_RO(btt_seed
);
482 static ssize_t
pfn_seed_show(struct device
*dev
,
483 struct device_attribute
*attr
, char *buf
)
485 struct nd_region
*nd_region
= to_nd_region(dev
);
488 nvdimm_bus_lock(dev
);
489 if (nd_region
->pfn_seed
)
490 rc
= sprintf(buf
, "%s\n", dev_name(nd_region
->pfn_seed
));
492 rc
= sprintf(buf
, "\n");
493 nvdimm_bus_unlock(dev
);
497 static DEVICE_ATTR_RO(pfn_seed
);
499 static ssize_t
dax_seed_show(struct device
*dev
,
500 struct device_attribute
*attr
, char *buf
)
502 struct nd_region
*nd_region
= to_nd_region(dev
);
505 nvdimm_bus_lock(dev
);
506 if (nd_region
->dax_seed
)
507 rc
= sprintf(buf
, "%s\n", dev_name(nd_region
->dax_seed
));
509 rc
= sprintf(buf
, "\n");
510 nvdimm_bus_unlock(dev
);
514 static DEVICE_ATTR_RO(dax_seed
);
516 static ssize_t
read_only_show(struct device
*dev
,
517 struct device_attribute
*attr
, char *buf
)
519 struct nd_region
*nd_region
= to_nd_region(dev
);
521 return sprintf(buf
, "%d\n", nd_region
->ro
);
524 static int revalidate_read_only(struct device
*dev
, void *data
)
526 nd_device_notify(dev
, NVDIMM_REVALIDATE_REGION
);
530 static ssize_t
read_only_store(struct device
*dev
,
531 struct device_attribute
*attr
, const char *buf
, size_t len
)
534 int rc
= kstrtobool(buf
, &ro
);
535 struct nd_region
*nd_region
= to_nd_region(dev
);
541 device_for_each_child(dev
, NULL
, revalidate_read_only
);
544 static DEVICE_ATTR_RW(read_only
);
546 static ssize_t
align_show(struct device
*dev
,
547 struct device_attribute
*attr
, char *buf
)
549 struct nd_region
*nd_region
= to_nd_region(dev
);
551 return sprintf(buf
, "%#lx\n", nd_region
->align
);
554 static ssize_t
align_store(struct device
*dev
,
555 struct device_attribute
*attr
, const char *buf
, size_t len
)
557 struct nd_region
*nd_region
= to_nd_region(dev
);
558 unsigned long val
, dpa
;
559 u32 mappings
, remainder
;
562 rc
= kstrtoul(buf
, 0, &val
);
567 * Ensure space-align is evenly divisible by the region
568 * interleave-width because the kernel typically has no facility
569 * to determine which DIMM(s), dimm-physical-addresses, would
570 * contribute to the tail capacity in system-physical-address
571 * space for the namespace.
573 mappings
= max_t(u32
, 1, nd_region
->ndr_mappings
);
574 dpa
= div_u64_rem(val
, mappings
, &remainder
);
575 if (!is_power_of_2(dpa
) || dpa
< PAGE_SIZE
576 || val
> region_size(nd_region
) || remainder
)
580 * Given that space allocation consults this value multiple
581 * times ensure it does not change for the duration of the
584 nvdimm_bus_lock(dev
);
585 nd_region
->align
= val
;
586 nvdimm_bus_unlock(dev
);
590 static DEVICE_ATTR_RW(align
);
592 static ssize_t
region_badblocks_show(struct device
*dev
,
593 struct device_attribute
*attr
, char *buf
)
595 struct nd_region
*nd_region
= to_nd_region(dev
);
600 rc
= badblocks_show(&nd_region
->bb
, buf
, 0);
607 static DEVICE_ATTR(badblocks
, 0444, region_badblocks_show
, NULL
);
609 static ssize_t
resource_show(struct device
*dev
,
610 struct device_attribute
*attr
, char *buf
)
612 struct nd_region
*nd_region
= to_nd_region(dev
);
614 return sprintf(buf
, "%#llx\n", nd_region
->ndr_start
);
616 static DEVICE_ATTR_ADMIN_RO(resource
);
618 static ssize_t
persistence_domain_show(struct device
*dev
,
619 struct device_attribute
*attr
, char *buf
)
621 struct nd_region
*nd_region
= to_nd_region(dev
);
623 if (test_bit(ND_REGION_PERSIST_CACHE
, &nd_region
->flags
))
624 return sprintf(buf
, "cpu_cache\n");
625 else if (test_bit(ND_REGION_PERSIST_MEMCTRL
, &nd_region
->flags
))
626 return sprintf(buf
, "memory_controller\n");
628 return sprintf(buf
, "\n");
630 static DEVICE_ATTR_RO(persistence_domain
);
632 static struct attribute
*nd_region_attributes
[] = {
634 &dev_attr_align
.attr
,
635 &dev_attr_nstype
.attr
,
636 &dev_attr_mappings
.attr
,
637 &dev_attr_btt_seed
.attr
,
638 &dev_attr_pfn_seed
.attr
,
639 &dev_attr_dax_seed
.attr
,
640 &dev_attr_deep_flush
.attr
,
641 &dev_attr_read_only
.attr
,
642 &dev_attr_set_cookie
.attr
,
643 &dev_attr_available_size
.attr
,
644 &dev_attr_max_available_extent
.attr
,
645 &dev_attr_namespace_seed
.attr
,
646 &dev_attr_init_namespaces
.attr
,
647 &dev_attr_badblocks
.attr
,
648 &dev_attr_resource
.attr
,
649 &dev_attr_persistence_domain
.attr
,
653 static umode_t
region_visible(struct kobject
*kobj
, struct attribute
*a
, int n
)
655 struct device
*dev
= container_of(kobj
, typeof(*dev
), kobj
);
656 struct nd_region
*nd_region
= to_nd_region(dev
);
657 struct nd_interleave_set
*nd_set
= nd_region
->nd_set
;
658 int type
= nd_region_to_nstype(nd_region
);
660 if (!is_memory(dev
) && a
== &dev_attr_pfn_seed
.attr
)
663 if (!is_memory(dev
) && a
== &dev_attr_dax_seed
.attr
)
666 if (!is_memory(dev
) && a
== &dev_attr_badblocks
.attr
)
669 if (a
== &dev_attr_resource
.attr
&& !is_memory(dev
))
672 if (a
== &dev_attr_deep_flush
.attr
) {
673 int has_flush
= nvdimm_has_flush(nd_region
);
677 else if (has_flush
== 0)
683 if (a
== &dev_attr_persistence_domain
.attr
) {
684 if ((nd_region
->flags
& (BIT(ND_REGION_PERSIST_CACHE
)
685 | BIT(ND_REGION_PERSIST_MEMCTRL
))) == 0)
690 if (a
== &dev_attr_align
.attr
)
693 if (a
!= &dev_attr_set_cookie
.attr
694 && a
!= &dev_attr_available_size
.attr
)
697 if (type
== ND_DEVICE_NAMESPACE_PMEM
&&
698 a
== &dev_attr_available_size
.attr
)
700 else if (is_memory(dev
) && nd_set
)
706 static ssize_t
mappingN(struct device
*dev
, char *buf
, int n
)
708 struct nd_region
*nd_region
= to_nd_region(dev
);
709 struct nd_mapping
*nd_mapping
;
710 struct nvdimm
*nvdimm
;
712 if (n
>= nd_region
->ndr_mappings
)
714 nd_mapping
= &nd_region
->mapping
[n
];
715 nvdimm
= nd_mapping
->nvdimm
;
717 return sprintf(buf
, "%s,%llu,%llu,%d\n", dev_name(&nvdimm
->dev
),
718 nd_mapping
->start
, nd_mapping
->size
,
719 nd_mapping
->position
);
722 #define REGION_MAPPING(idx) \
723 static ssize_t mapping##idx##_show(struct device *dev, \
724 struct device_attribute *attr, char *buf) \
726 return mappingN(dev, buf, idx); \
728 static DEVICE_ATTR_RO(mapping##idx)
731 * 32 should be enough for a while, even in the presence of socket
732 * interleave a 32-way interleave set is a degenerate case.
767 static umode_t
mapping_visible(struct kobject
*kobj
, struct attribute
*a
, int n
)
769 struct device
*dev
= container_of(kobj
, struct device
, kobj
);
770 struct nd_region
*nd_region
= to_nd_region(dev
);
772 if (n
< nd_region
->ndr_mappings
)
777 static struct attribute
*mapping_attributes
[] = {
778 &dev_attr_mapping0
.attr
,
779 &dev_attr_mapping1
.attr
,
780 &dev_attr_mapping2
.attr
,
781 &dev_attr_mapping3
.attr
,
782 &dev_attr_mapping4
.attr
,
783 &dev_attr_mapping5
.attr
,
784 &dev_attr_mapping6
.attr
,
785 &dev_attr_mapping7
.attr
,
786 &dev_attr_mapping8
.attr
,
787 &dev_attr_mapping9
.attr
,
788 &dev_attr_mapping10
.attr
,
789 &dev_attr_mapping11
.attr
,
790 &dev_attr_mapping12
.attr
,
791 &dev_attr_mapping13
.attr
,
792 &dev_attr_mapping14
.attr
,
793 &dev_attr_mapping15
.attr
,
794 &dev_attr_mapping16
.attr
,
795 &dev_attr_mapping17
.attr
,
796 &dev_attr_mapping18
.attr
,
797 &dev_attr_mapping19
.attr
,
798 &dev_attr_mapping20
.attr
,
799 &dev_attr_mapping21
.attr
,
800 &dev_attr_mapping22
.attr
,
801 &dev_attr_mapping23
.attr
,
802 &dev_attr_mapping24
.attr
,
803 &dev_attr_mapping25
.attr
,
804 &dev_attr_mapping26
.attr
,
805 &dev_attr_mapping27
.attr
,
806 &dev_attr_mapping28
.attr
,
807 &dev_attr_mapping29
.attr
,
808 &dev_attr_mapping30
.attr
,
809 &dev_attr_mapping31
.attr
,
813 static const struct attribute_group nd_mapping_attribute_group
= {
814 .is_visible
= mapping_visible
,
815 .attrs
= mapping_attributes
,
818 static const struct attribute_group nd_region_attribute_group
= {
819 .attrs
= nd_region_attributes
,
820 .is_visible
= region_visible
,
823 static const struct attribute_group
*nd_region_attribute_groups
[] = {
824 &nd_device_attribute_group
,
825 &nd_region_attribute_group
,
826 &nd_numa_attribute_group
,
827 &nd_mapping_attribute_group
,
831 static const struct device_type nd_pmem_device_type
= {
833 .release
= nd_region_release
,
834 .groups
= nd_region_attribute_groups
,
837 static const struct device_type nd_volatile_device_type
= {
838 .name
= "nd_volatile",
839 .release
= nd_region_release
,
840 .groups
= nd_region_attribute_groups
,
843 bool is_nd_pmem(const struct device
*dev
)
845 return dev
? dev
->type
== &nd_pmem_device_type
: false;
848 bool is_nd_volatile(const struct device
*dev
)
850 return dev
? dev
->type
== &nd_volatile_device_type
: false;
853 u64
nd_region_interleave_set_cookie(struct nd_region
*nd_region
,
854 struct nd_namespace_index
*nsindex
)
856 struct nd_interleave_set
*nd_set
= nd_region
->nd_set
;
861 if (nsindex
&& __le16_to_cpu(nsindex
->major
) == 1
862 && __le16_to_cpu(nsindex
->minor
) == 1)
863 return nd_set
->cookie1
;
864 return nd_set
->cookie2
;
867 u64
nd_region_interleave_set_altcookie(struct nd_region
*nd_region
)
869 struct nd_interleave_set
*nd_set
= nd_region
->nd_set
;
872 return nd_set
->altcookie
;
876 void nd_mapping_free_labels(struct nd_mapping
*nd_mapping
)
878 struct nd_label_ent
*label_ent
, *e
;
880 lockdep_assert_held(&nd_mapping
->lock
);
881 list_for_each_entry_safe(label_ent
, e
, &nd_mapping
->labels
, list
) {
882 list_del(&label_ent
->list
);
888 * When a namespace is activated create new seeds for the next
889 * namespace, or namespace-personality to be configured.
891 void nd_region_advance_seeds(struct nd_region
*nd_region
, struct device
*dev
)
893 nvdimm_bus_lock(dev
);
894 if (nd_region
->ns_seed
== dev
) {
895 nd_region_create_ns_seed(nd_region
);
896 } else if (is_nd_btt(dev
)) {
897 struct nd_btt
*nd_btt
= to_nd_btt(dev
);
899 if (nd_region
->btt_seed
== dev
)
900 nd_region_create_btt_seed(nd_region
);
901 if (nd_region
->ns_seed
== &nd_btt
->ndns
->dev
)
902 nd_region_create_ns_seed(nd_region
);
903 } else if (is_nd_pfn(dev
)) {
904 struct nd_pfn
*nd_pfn
= to_nd_pfn(dev
);
906 if (nd_region
->pfn_seed
== dev
)
907 nd_region_create_pfn_seed(nd_region
);
908 if (nd_region
->ns_seed
== &nd_pfn
->ndns
->dev
)
909 nd_region_create_ns_seed(nd_region
);
910 } else if (is_nd_dax(dev
)) {
911 struct nd_dax
*nd_dax
= to_nd_dax(dev
);
913 if (nd_region
->dax_seed
== dev
)
914 nd_region_create_dax_seed(nd_region
);
915 if (nd_region
->ns_seed
== &nd_dax
->nd_pfn
.ndns
->dev
)
916 nd_region_create_ns_seed(nd_region
);
918 nvdimm_bus_unlock(dev
);
922 * nd_region_acquire_lane - allocate and lock a lane
923 * @nd_region: region id and number of lanes possible
925 * A lane correlates to a BLK-data-window and/or a log slot in the BTT.
926 * We optimize for the common case where there are 256 lanes, one
927 * per-cpu. For larger systems we need to lock to share lanes. For now
928 * this implementation assumes the cost of maintaining an allocator for
929 * free lanes is on the order of the lock hold time, so it implements a
930 * static lane = cpu % num_lanes mapping.
932 * In the case of a BTT instance on top of a BLK namespace a lane may be
933 * acquired recursively. We lock on the first instance.
935 * In the case of a BTT instance on top of PMEM, we only acquire a lane
936 * for the BTT metadata updates.
938 unsigned int nd_region_acquire_lane(struct nd_region
*nd_region
)
940 unsigned int cpu
, lane
;
943 cpu
= smp_processor_id();
944 if (nd_region
->num_lanes
< nr_cpu_ids
) {
945 struct nd_percpu_lane
*ndl_lock
, *ndl_count
;
947 lane
= cpu
% nd_region
->num_lanes
;
948 ndl_count
= per_cpu_ptr(nd_region
->lane
, cpu
);
949 ndl_lock
= per_cpu_ptr(nd_region
->lane
, lane
);
950 if (ndl_count
->count
++ == 0)
951 spin_lock(&ndl_lock
->lock
);
957 EXPORT_SYMBOL(nd_region_acquire_lane
);
959 void nd_region_release_lane(struct nd_region
*nd_region
, unsigned int lane
)
961 if (nd_region
->num_lanes
< nr_cpu_ids
) {
962 unsigned int cpu
= smp_processor_id();
963 struct nd_percpu_lane
*ndl_lock
, *ndl_count
;
965 ndl_count
= per_cpu_ptr(nd_region
->lane
, cpu
);
966 ndl_lock
= per_cpu_ptr(nd_region
->lane
, lane
);
967 if (--ndl_count
->count
== 0)
968 spin_unlock(&ndl_lock
->lock
);
972 EXPORT_SYMBOL(nd_region_release_lane
);
975 * PowerPC requires this alignment for memremap_pages(). All other archs
976 * should be ok with SUBSECTION_SIZE (see memremap_compat_align()).
978 #define MEMREMAP_COMPAT_ALIGN_MAX SZ_16M
980 static unsigned long default_align(struct nd_region
*nd_region
)
986 align
= MEMREMAP_COMPAT_ALIGN_MAX
;
987 if (nd_region
->ndr_size
< MEMREMAP_COMPAT_ALIGN_MAX
)
990 mappings
= max_t(u16
, 1, nd_region
->ndr_mappings
);
991 div_u64_rem(align
, mappings
, &remainder
);
998 static struct lock_class_key nvdimm_region_key
;
1000 static struct nd_region
*nd_region_create(struct nvdimm_bus
*nvdimm_bus
,
1001 struct nd_region_desc
*ndr_desc
,
1002 const struct device_type
*dev_type
, const char *caller
)
1004 struct nd_region
*nd_region
;
1009 for (i
= 0; i
< ndr_desc
->num_mappings
; i
++) {
1010 struct nd_mapping_desc
*mapping
= &ndr_desc
->mapping
[i
];
1011 struct nvdimm
*nvdimm
= mapping
->nvdimm
;
1013 if ((mapping
->start
| mapping
->size
) % PAGE_SIZE
) {
1014 dev_err(&nvdimm_bus
->dev
,
1015 "%s: %s mapping%d is not %ld aligned\n",
1016 caller
, dev_name(&nvdimm
->dev
), i
, PAGE_SIZE
);
1020 if (test_bit(NDD_UNARMED
, &nvdimm
->flags
))
1026 kzalloc(struct_size(nd_region
, mapping
, ndr_desc
->num_mappings
),
1031 nd_region
->ndr_mappings
= ndr_desc
->num_mappings
;
1032 /* CXL pre-assigns memregion ids before creating nvdimm regions */
1033 if (test_bit(ND_REGION_CXL
, &ndr_desc
->flags
)) {
1034 nd_region
->id
= ndr_desc
->memregion
;
1036 nd_region
->id
= memregion_alloc(GFP_KERNEL
);
1037 if (nd_region
->id
< 0)
1041 nd_region
->lane
= alloc_percpu(struct nd_percpu_lane
);
1042 if (!nd_region
->lane
)
1045 for (i
= 0; i
< nr_cpu_ids
; i
++) {
1046 struct nd_percpu_lane
*ndl
;
1048 ndl
= per_cpu_ptr(nd_region
->lane
, i
);
1049 spin_lock_init(&ndl
->lock
);
1053 for (i
= 0; i
< ndr_desc
->num_mappings
; i
++) {
1054 struct nd_mapping_desc
*mapping
= &ndr_desc
->mapping
[i
];
1055 struct nvdimm
*nvdimm
= mapping
->nvdimm
;
1057 nd_region
->mapping
[i
].nvdimm
= nvdimm
;
1058 nd_region
->mapping
[i
].start
= mapping
->start
;
1059 nd_region
->mapping
[i
].size
= mapping
->size
;
1060 nd_region
->mapping
[i
].position
= mapping
->position
;
1061 INIT_LIST_HEAD(&nd_region
->mapping
[i
].labels
);
1062 mutex_init(&nd_region
->mapping
[i
].lock
);
1064 get_device(&nvdimm
->dev
);
1066 nd_region
->provider_data
= ndr_desc
->provider_data
;
1067 nd_region
->nd_set
= ndr_desc
->nd_set
;
1068 nd_region
->num_lanes
= ndr_desc
->num_lanes
;
1069 nd_region
->flags
= ndr_desc
->flags
;
1071 nd_region
->numa_node
= ndr_desc
->numa_node
;
1072 nd_region
->target_node
= ndr_desc
->target_node
;
1073 ida_init(&nd_region
->ns_ida
);
1074 ida_init(&nd_region
->btt_ida
);
1075 ida_init(&nd_region
->pfn_ida
);
1076 ida_init(&nd_region
->dax_ida
);
1077 dev
= &nd_region
->dev
;
1078 dev_set_name(dev
, "region%d", nd_region
->id
);
1079 dev
->parent
= &nvdimm_bus
->dev
;
1080 dev
->type
= dev_type
;
1081 dev
->groups
= ndr_desc
->attr_groups
;
1082 dev
->of_node
= ndr_desc
->of_node
;
1083 nd_region
->ndr_size
= resource_size(ndr_desc
->res
);
1084 nd_region
->ndr_start
= ndr_desc
->res
->start
;
1085 nd_region
->align
= default_align(nd_region
);
1086 if (ndr_desc
->flush
)
1087 nd_region
->flush
= ndr_desc
->flush
;
1089 nd_region
->flush
= NULL
;
1091 device_initialize(dev
);
1092 lockdep_set_class(&dev
->mutex
, &nvdimm_region_key
);
1093 nd_device_register(dev
);
1098 if (!test_bit(ND_REGION_CXL
, &ndr_desc
->flags
))
1099 memregion_free(nd_region
->id
);
1105 struct nd_region
*nvdimm_pmem_region_create(struct nvdimm_bus
*nvdimm_bus
,
1106 struct nd_region_desc
*ndr_desc
)
1108 ndr_desc
->num_lanes
= ND_MAX_LANES
;
1109 return nd_region_create(nvdimm_bus
, ndr_desc
, &nd_pmem_device_type
,
1112 EXPORT_SYMBOL_GPL(nvdimm_pmem_region_create
);
1114 struct nd_region
*nvdimm_volatile_region_create(struct nvdimm_bus
*nvdimm_bus
,
1115 struct nd_region_desc
*ndr_desc
)
1117 ndr_desc
->num_lanes
= ND_MAX_LANES
;
1118 return nd_region_create(nvdimm_bus
, ndr_desc
, &nd_volatile_device_type
,
1121 EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create
);
1123 void nvdimm_region_delete(struct nd_region
*nd_region
)
1126 nd_device_unregister(&nd_region
->dev
, ND_SYNC
);
1128 EXPORT_SYMBOL_GPL(nvdimm_region_delete
);
1130 int nvdimm_flush(struct nd_region
*nd_region
, struct bio
*bio
)
1134 if (!nd_region
->flush
)
1135 rc
= generic_nvdimm_flush(nd_region
);
1137 if (nd_region
->flush(nd_region
, bio
))
1144 * generic_nvdimm_flush() - flush any posted write queues between the cpu and pmem media
1145 * @nd_region: interleaved pmem region
1147 int generic_nvdimm_flush(struct nd_region
*nd_region
)
1149 struct nd_region_data
*ndrd
= dev_get_drvdata(&nd_region
->dev
);
1153 * Try to encourage some diversity in flush hint addresses
1154 * across cpus assuming a limited number of flush hints.
1156 idx
= this_cpu_read(flush_idx
);
1157 idx
= this_cpu_add_return(flush_idx
, hash_32(current
->pid
+ idx
, 8));
1160 * The pmem_wmb() is needed to 'sfence' all
1161 * previous writes such that they are architecturally visible for
1162 * the platform buffer flush. Note that we've already arranged for pmem
1163 * writes to avoid the cache via memcpy_flushcache(). The final
1164 * wmb() ensures ordering for the NVDIMM flush write.
1167 for (i
= 0; i
< nd_region
->ndr_mappings
; i
++)
1168 if (ndrd_get_flush_wpq(ndrd
, i
, 0))
1169 writeq(1, ndrd_get_flush_wpq(ndrd
, i
, idx
));
1174 EXPORT_SYMBOL_GPL(nvdimm_flush
);
1177 * nvdimm_has_flush - determine write flushing requirements
1178 * @nd_region: interleaved pmem region
1180 * Returns 1 if writes require flushing
1181 * Returns 0 if writes do not require flushing
1182 * Returns -ENXIO if flushing capability can not be determined
1184 int nvdimm_has_flush(struct nd_region
*nd_region
)
1188 /* no nvdimm or pmem api == flushing capability unknown */
1189 if (nd_region
->ndr_mappings
== 0
1190 || !IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API
))
1193 /* Test if an explicit flush function is defined */
1194 if (test_bit(ND_REGION_ASYNC
, &nd_region
->flags
) && nd_region
->flush
)
1197 /* Test if any flush hints for the region are available */
1198 for (i
= 0; i
< nd_region
->ndr_mappings
; i
++) {
1199 struct nd_mapping
*nd_mapping
= &nd_region
->mapping
[i
];
1200 struct nvdimm
*nvdimm
= nd_mapping
->nvdimm
;
1202 /* flush hints present / available */
1203 if (nvdimm
->num_flush
)
1208 * The platform defines dimm devices without hints nor explicit flush,
1209 * assume platform persistence mechanism like ADR
1213 EXPORT_SYMBOL_GPL(nvdimm_has_flush
);
1215 int nvdimm_has_cache(struct nd_region
*nd_region
)
1217 return is_nd_pmem(&nd_region
->dev
) &&
1218 !test_bit(ND_REGION_PERSIST_CACHE
, &nd_region
->flags
);
1220 EXPORT_SYMBOL_GPL(nvdimm_has_cache
);
1222 bool is_nvdimm_sync(struct nd_region
*nd_region
)
1224 if (is_nd_volatile(&nd_region
->dev
))
1227 return is_nd_pmem(&nd_region
->dev
) &&
1228 !test_bit(ND_REGION_ASYNC
, &nd_region
->flags
);
1230 EXPORT_SYMBOL_GPL(is_nvdimm_sync
);
1232 struct conflict_context
{
1233 struct nd_region
*nd_region
;
1234 resource_size_t start
, size
;
1237 static int region_conflict(struct device
*dev
, void *data
)
1239 struct nd_region
*nd_region
;
1240 struct conflict_context
*ctx
= data
;
1241 resource_size_t res_end
, region_end
, region_start
;
1243 if (!is_memory(dev
))
1246 nd_region
= to_nd_region(dev
);
1247 if (nd_region
== ctx
->nd_region
)
1250 res_end
= ctx
->start
+ ctx
->size
;
1251 region_start
= nd_region
->ndr_start
;
1252 region_end
= region_start
+ nd_region
->ndr_size
;
1253 if (ctx
->start
>= region_start
&& ctx
->start
< region_end
)
1255 if (res_end
> region_start
&& res_end
<= region_end
)
1260 int nd_region_conflict(struct nd_region
*nd_region
, resource_size_t start
,
1261 resource_size_t size
)
1263 struct nvdimm_bus
*nvdimm_bus
= walk_to_nvdimm_bus(&nd_region
->dev
);
1264 struct conflict_context ctx
= {
1265 .nd_region
= nd_region
,
1270 return device_for_each_child(&nvdimm_bus
->dev
, &ctx
, region_conflict
);
1273 MODULE_IMPORT_NS("DEVMEM");