2 * Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 #include <linux/memremap.h>
14 #include <linux/blkdev.h>
15 #include <linux/device.h>
16 #include <linux/genhd.h>
17 #include <linux/sizes.h>
18 #include <linux/slab.h>
25 static void nd_pfn_release(struct device
*dev
)
27 struct nd_region
*nd_region
= to_nd_region(dev
->parent
);
28 struct nd_pfn
*nd_pfn
= to_nd_pfn(dev
);
30 dev_dbg(dev
, "%s\n", __func__
);
31 nd_detach_ndns(&nd_pfn
->dev
, &nd_pfn
->ndns
);
32 ida_simple_remove(&nd_region
->pfn_ida
, nd_pfn
->id
);
37 static struct device_type nd_pfn_device_type
= {
39 .release
= nd_pfn_release
,
42 bool is_nd_pfn(struct device
*dev
)
44 return dev
? dev
->type
== &nd_pfn_device_type
: false;
46 EXPORT_SYMBOL(is_nd_pfn
);
48 struct nd_pfn
*to_nd_pfn(struct device
*dev
)
50 struct nd_pfn
*nd_pfn
= container_of(dev
, struct nd_pfn
, dev
);
52 WARN_ON(!is_nd_pfn(dev
));
55 EXPORT_SYMBOL(to_nd_pfn
);
57 static ssize_t
mode_show(struct device
*dev
,
58 struct device_attribute
*attr
, char *buf
)
60 struct nd_pfn
*nd_pfn
= to_nd_pfn_safe(dev
);
62 switch (nd_pfn
->mode
) {
64 return sprintf(buf
, "ram\n");
66 return sprintf(buf
, "pmem\n");
68 return sprintf(buf
, "none\n");
72 static ssize_t
mode_store(struct device
*dev
,
73 struct device_attribute
*attr
, const char *buf
, size_t len
)
75 struct nd_pfn
*nd_pfn
= to_nd_pfn_safe(dev
);
85 if (strncmp(buf
, "pmem\n", n
) == 0
86 || strncmp(buf
, "pmem", n
) == 0) {
87 nd_pfn
->mode
= PFN_MODE_PMEM
;
88 } else if (strncmp(buf
, "ram\n", n
) == 0
89 || strncmp(buf
, "ram", n
) == 0)
90 nd_pfn
->mode
= PFN_MODE_RAM
;
91 else if (strncmp(buf
, "none\n", n
) == 0
92 || strncmp(buf
, "none", n
) == 0)
93 nd_pfn
->mode
= PFN_MODE_NONE
;
97 dev_dbg(dev
, "%s: result: %zd wrote: %s%s", __func__
,
98 rc
, buf
, buf
[len
- 1] == '\n' ? "" : "\n");
99 nvdimm_bus_unlock(dev
);
102 return rc
? rc
: len
;
104 static DEVICE_ATTR_RW(mode
);
106 static ssize_t
align_show(struct device
*dev
,
107 struct device_attribute
*attr
, char *buf
)
109 struct nd_pfn
*nd_pfn
= to_nd_pfn_safe(dev
);
111 return sprintf(buf
, "%ld\n", nd_pfn
->align
);
114 static ssize_t
__align_store(struct nd_pfn
*nd_pfn
, const char *buf
)
119 rc
= kstrtoul(buf
, 0, &val
);
123 if (!is_power_of_2(val
) || val
< PAGE_SIZE
|| val
> SZ_1G
)
126 if (nd_pfn
->dev
.driver
)
134 static ssize_t
align_store(struct device
*dev
,
135 struct device_attribute
*attr
, const char *buf
, size_t len
)
137 struct nd_pfn
*nd_pfn
= to_nd_pfn_safe(dev
);
141 nvdimm_bus_lock(dev
);
142 rc
= __align_store(nd_pfn
, buf
);
143 dev_dbg(dev
, "%s: result: %zd wrote: %s%s", __func__
,
144 rc
, buf
, buf
[len
- 1] == '\n' ? "" : "\n");
145 nvdimm_bus_unlock(dev
);
148 return rc
? rc
: len
;
150 static DEVICE_ATTR_RW(align
);
152 static ssize_t
uuid_show(struct device
*dev
,
153 struct device_attribute
*attr
, char *buf
)
155 struct nd_pfn
*nd_pfn
= to_nd_pfn_safe(dev
);
158 return sprintf(buf
, "%pUb\n", nd_pfn
->uuid
);
159 return sprintf(buf
, "\n");
162 static ssize_t
uuid_store(struct device
*dev
,
163 struct device_attribute
*attr
, const char *buf
, size_t len
)
165 struct nd_pfn
*nd_pfn
= to_nd_pfn_safe(dev
);
169 rc
= nd_uuid_store(dev
, &nd_pfn
->uuid
, buf
, len
);
170 dev_dbg(dev
, "%s: result: %zd wrote: %s%s", __func__
,
171 rc
, buf
, buf
[len
- 1] == '\n' ? "" : "\n");
174 return rc
? rc
: len
;
176 static DEVICE_ATTR_RW(uuid
);
178 static ssize_t
namespace_show(struct device
*dev
,
179 struct device_attribute
*attr
, char *buf
)
181 struct nd_pfn
*nd_pfn
= to_nd_pfn_safe(dev
);
184 nvdimm_bus_lock(dev
);
185 rc
= sprintf(buf
, "%s\n", nd_pfn
->ndns
186 ? dev_name(&nd_pfn
->ndns
->dev
) : "");
187 nvdimm_bus_unlock(dev
);
191 static ssize_t
namespace_store(struct device
*dev
,
192 struct device_attribute
*attr
, const char *buf
, size_t len
)
194 struct nd_pfn
*nd_pfn
= to_nd_pfn_safe(dev
);
198 nvdimm_bus_lock(dev
);
199 rc
= nd_namespace_store(dev
, &nd_pfn
->ndns
, buf
, len
);
200 dev_dbg(dev
, "%s: result: %zd wrote: %s%s", __func__
,
201 rc
, buf
, buf
[len
- 1] == '\n' ? "" : "\n");
202 nvdimm_bus_unlock(dev
);
207 static DEVICE_ATTR_RW(namespace);
209 static ssize_t
resource_show(struct device
*dev
,
210 struct device_attribute
*attr
, char *buf
)
212 struct nd_pfn
*nd_pfn
= to_nd_pfn_safe(dev
);
217 struct nd_pfn_sb
*pfn_sb
= nd_pfn
->pfn_sb
;
218 u64 offset
= __le64_to_cpu(pfn_sb
->dataoff
);
219 struct nd_namespace_common
*ndns
= nd_pfn
->ndns
;
220 u32 start_pad
= __le32_to_cpu(pfn_sb
->start_pad
);
221 struct nd_namespace_io
*nsio
= to_nd_namespace_io(&ndns
->dev
);
223 rc
= sprintf(buf
, "%#llx\n", (unsigned long long) nsio
->res
.start
224 + start_pad
+ offset
);
226 /* no address to convey if the pfn instance is disabled */
233 static DEVICE_ATTR_RO(resource
);
235 static ssize_t
size_show(struct device
*dev
,
236 struct device_attribute
*attr
, char *buf
)
238 struct nd_pfn
*nd_pfn
= to_nd_pfn_safe(dev
);
243 struct nd_pfn_sb
*pfn_sb
= nd_pfn
->pfn_sb
;
244 u64 offset
= __le64_to_cpu(pfn_sb
->dataoff
);
245 struct nd_namespace_common
*ndns
= nd_pfn
->ndns
;
246 u32 start_pad
= __le32_to_cpu(pfn_sb
->start_pad
);
247 u32 end_trunc
= __le32_to_cpu(pfn_sb
->end_trunc
);
248 struct nd_namespace_io
*nsio
= to_nd_namespace_io(&ndns
->dev
);
250 rc
= sprintf(buf
, "%llu\n", (unsigned long long)
251 resource_size(&nsio
->res
) - start_pad
252 - end_trunc
- offset
);
254 /* no size to convey if the pfn instance is disabled */
261 static DEVICE_ATTR_RO(size
);
263 static struct attribute
*nd_pfn_attributes
[] = {
265 &dev_attr_namespace
.attr
,
267 &dev_attr_align
.attr
,
268 &dev_attr_resource
.attr
,
273 struct attribute_group nd_pfn_attribute_group
= {
274 .attrs
= nd_pfn_attributes
,
277 static const struct attribute_group
*nd_pfn_attribute_groups
[] = {
278 &nd_pfn_attribute_group
,
279 &nd_device_attribute_group
,
280 &nd_numa_attribute_group
,
284 struct device
*nd_pfn_devinit(struct nd_pfn
*nd_pfn
,
285 struct nd_namespace_common
*ndns
)
287 struct device
*dev
= &nd_pfn
->dev
;
292 nd_pfn
->mode
= PFN_MODE_NONE
;
293 nd_pfn
->align
= HPAGE_SIZE
;
295 device_initialize(&nd_pfn
->dev
);
296 if (ndns
&& !__nd_attach_ndns(&nd_pfn
->dev
, ndns
, &nd_pfn
->ndns
)) {
297 dev_dbg(&ndns
->dev
, "%s failed, already claimed by %s\n",
298 __func__
, dev_name(ndns
->claim
));
305 static struct nd_pfn
*nd_pfn_alloc(struct nd_region
*nd_region
)
307 struct nd_pfn
*nd_pfn
;
310 nd_pfn
= kzalloc(sizeof(*nd_pfn
), GFP_KERNEL
);
314 nd_pfn
->id
= ida_simple_get(&nd_region
->pfn_ida
, 0, 0, GFP_KERNEL
);
315 if (nd_pfn
->id
< 0) {
321 dev_set_name(dev
, "pfn%d.%d", nd_region
->id
, nd_pfn
->id
);
322 dev
->groups
= nd_pfn_attribute_groups
;
323 dev
->type
= &nd_pfn_device_type
;
324 dev
->parent
= &nd_region
->dev
;
329 struct device
*nd_pfn_create(struct nd_region
*nd_region
)
331 struct nd_pfn
*nd_pfn
;
334 if (!is_nd_pmem(&nd_region
->dev
))
337 nd_pfn
= nd_pfn_alloc(nd_region
);
338 dev
= nd_pfn_devinit(nd_pfn
, NULL
);
340 __nd_device_register(dev
);
344 int nd_pfn_validate(struct nd_pfn
*nd_pfn
, const char *sig
)
346 u64 checksum
, offset
;
348 enum nd_pfn_mode mode
;
349 struct nd_namespace_io
*nsio
;
350 struct nd_pfn_sb
*pfn_sb
= nd_pfn
->pfn_sb
;
351 struct nd_namespace_common
*ndns
= nd_pfn
->ndns
;
352 const u8
*parent_uuid
= nd_dev_to_uuid(&ndns
->dev
);
354 if (!pfn_sb
|| !ndns
)
357 if (!is_nd_pmem(nd_pfn
->dev
.parent
))
360 if (nvdimm_read_bytes(ndns
, SZ_4K
, pfn_sb
, sizeof(*pfn_sb
)))
363 if (memcmp(pfn_sb
->signature
, sig
, PFN_SIG_LEN
) != 0)
366 checksum
= le64_to_cpu(pfn_sb
->checksum
);
367 pfn_sb
->checksum
= 0;
368 if (checksum
!= nd_sb_checksum((struct nd_gen_sb
*) pfn_sb
))
370 pfn_sb
->checksum
= cpu_to_le64(checksum
);
372 if (memcmp(pfn_sb
->parent_uuid
, parent_uuid
, 16) != 0)
375 if (__le16_to_cpu(pfn_sb
->version_minor
) < 1) {
376 pfn_sb
->start_pad
= 0;
377 pfn_sb
->end_trunc
= 0;
380 if (__le16_to_cpu(pfn_sb
->version_minor
) < 2)
383 switch (le32_to_cpu(pfn_sb
->mode
)) {
391 align
= le32_to_cpu(pfn_sb
->align
);
392 offset
= le64_to_cpu(pfn_sb
->dataoff
);
394 align
= 1UL << ilog2(offset
);
395 mode
= le32_to_cpu(pfn_sb
->mode
);
399 * When probing a namepace via nd_pfn_probe() the uuid
400 * is NULL (see: nd_pfn_devinit()) we init settings from
403 nd_pfn
->uuid
= kmemdup(pfn_sb
->uuid
, 16, GFP_KERNEL
);
406 nd_pfn
->align
= align
;
410 * When probing a pfn / dax instance we validate the
411 * live settings against the pfn_sb
413 if (memcmp(nd_pfn
->uuid
, pfn_sb
->uuid
, 16) != 0)
417 * If the uuid validates, but other settings mismatch
418 * return EINVAL because userspace has managed to change
419 * the configuration without specifying new
422 if (nd_pfn
->align
!= align
|| nd_pfn
->mode
!= mode
) {
423 dev_err(&nd_pfn
->dev
,
424 "init failed, settings mismatch\n");
425 dev_dbg(&nd_pfn
->dev
, "align: %lx:%lx mode: %d:%d\n",
426 nd_pfn
->align
, align
, nd_pfn
->mode
,
432 if (align
> nvdimm_namespace_capacity(ndns
)) {
433 dev_err(&nd_pfn
->dev
, "alignment: %lx exceeds capacity %llx\n",
434 align
, nvdimm_namespace_capacity(ndns
));
439 * These warnings are verbose because they can only trigger in
440 * the case where the physical address alignment of the
441 * namespace has changed since the pfn superblock was
444 nsio
= to_nd_namespace_io(&ndns
->dev
);
445 if (offset
>= resource_size(&nsio
->res
)) {
446 dev_err(&nd_pfn
->dev
, "pfn array size exceeds capacity of %s\n",
447 dev_name(&ndns
->dev
));
451 if ((align
&& !IS_ALIGNED(offset
, align
))
452 || !IS_ALIGNED(offset
, PAGE_SIZE
)) {
453 dev_err(&nd_pfn
->dev
,
454 "bad offset: %#llx dax disabled align: %#lx\n",
461 EXPORT_SYMBOL(nd_pfn_validate
);
463 int nd_pfn_probe(struct device
*dev
, struct nd_namespace_common
*ndns
)
466 struct nd_pfn
*nd_pfn
;
467 struct device
*pfn_dev
;
468 struct nd_pfn_sb
*pfn_sb
;
469 struct nd_region
*nd_region
= to_nd_region(ndns
->dev
.parent
);
474 nvdimm_bus_lock(&ndns
->dev
);
475 nd_pfn
= nd_pfn_alloc(nd_region
);
476 pfn_dev
= nd_pfn_devinit(nd_pfn
, ndns
);
477 nvdimm_bus_unlock(&ndns
->dev
);
480 pfn_sb
= devm_kzalloc(dev
, sizeof(*pfn_sb
), GFP_KERNEL
);
481 nd_pfn
= to_nd_pfn(pfn_dev
);
482 nd_pfn
->pfn_sb
= pfn_sb
;
483 rc
= nd_pfn_validate(nd_pfn
, PFN_SIG
);
484 dev_dbg(dev
, "%s: pfn: %s\n", __func__
,
485 rc
== 0 ? dev_name(pfn_dev
) : "<none>");
487 __nd_detach_ndns(pfn_dev
, &nd_pfn
->ndns
);
490 __nd_device_register(pfn_dev
);
494 EXPORT_SYMBOL(nd_pfn_probe
);
497 * We hotplug memory at section granularity, pad the reserved area from
498 * the previous section base to the namespace base address.
500 static unsigned long init_altmap_base(resource_size_t base
)
502 unsigned long base_pfn
= PHYS_PFN(base
);
504 return PFN_SECTION_ALIGN_DOWN(base_pfn
);
507 static unsigned long init_altmap_reserve(resource_size_t base
)
509 unsigned long reserve
= PHYS_PFN(SZ_8K
);
510 unsigned long base_pfn
= PHYS_PFN(base
);
512 reserve
+= base_pfn
- PFN_SECTION_ALIGN_DOWN(base_pfn
);
516 static struct vmem_altmap
*__nvdimm_setup_pfn(struct nd_pfn
*nd_pfn
,
517 struct resource
*res
, struct vmem_altmap
*altmap
)
519 struct nd_pfn_sb
*pfn_sb
= nd_pfn
->pfn_sb
;
520 u64 offset
= le64_to_cpu(pfn_sb
->dataoff
);
521 u32 start_pad
= __le32_to_cpu(pfn_sb
->start_pad
);
522 u32 end_trunc
= __le32_to_cpu(pfn_sb
->end_trunc
);
523 struct nd_namespace_common
*ndns
= nd_pfn
->ndns
;
524 struct nd_namespace_io
*nsio
= to_nd_namespace_io(&ndns
->dev
);
525 resource_size_t base
= nsio
->res
.start
+ start_pad
;
526 struct vmem_altmap __altmap
= {
527 .base_pfn
= init_altmap_base(base
),
528 .reserve
= init_altmap_reserve(base
),
531 memcpy(res
, &nsio
->res
, sizeof(*res
));
532 res
->start
+= start_pad
;
533 res
->end
-= end_trunc
;
535 if (nd_pfn
->mode
== PFN_MODE_RAM
) {
537 return ERR_PTR(-EINVAL
);
538 nd_pfn
->npfns
= le64_to_cpu(pfn_sb
->npfns
);
540 } else if (nd_pfn
->mode
== PFN_MODE_PMEM
) {
541 nd_pfn
->npfns
= (resource_size(res
) - offset
) / PAGE_SIZE
;
542 if (le64_to_cpu(nd_pfn
->pfn_sb
->npfns
) > nd_pfn
->npfns
)
543 dev_info(&nd_pfn
->dev
,
544 "number of pfns truncated from %lld to %ld\n",
545 le64_to_cpu(nd_pfn
->pfn_sb
->npfns
),
547 memcpy(altmap
, &__altmap
, sizeof(*altmap
));
548 altmap
->free
= PHYS_PFN(offset
- SZ_8K
);
551 return ERR_PTR(-ENXIO
);
556 static int nd_pfn_init(struct nd_pfn
*nd_pfn
)
558 u32 dax_label_reserve
= is_nd_dax(&nd_pfn
->dev
) ? SZ_128K
: 0;
559 struct nd_namespace_common
*ndns
= nd_pfn
->ndns
;
560 u32 start_pad
= 0, end_trunc
= 0;
561 resource_size_t start
, size
;
562 struct nd_namespace_io
*nsio
;
563 struct nd_region
*nd_region
;
564 struct nd_pfn_sb
*pfn_sb
;
571 pfn_sb
= devm_kzalloc(&nd_pfn
->dev
, sizeof(*pfn_sb
), GFP_KERNEL
);
575 nd_pfn
->pfn_sb
= pfn_sb
;
576 if (is_nd_dax(&nd_pfn
->dev
))
580 rc
= nd_pfn_validate(nd_pfn
, sig
);
584 /* no info block, do init */;
585 nd_region
= to_nd_region(nd_pfn
->dev
.parent
);
587 dev_info(&nd_pfn
->dev
,
588 "%s is read-only, unable to init metadata\n",
589 dev_name(&nd_region
->dev
));
593 memset(pfn_sb
, 0, sizeof(*pfn_sb
));
596 * Check if pmem collides with 'System RAM' when section aligned and
597 * trim it accordingly
599 nsio
= to_nd_namespace_io(&ndns
->dev
);
600 start
= PHYS_SECTION_ALIGN_DOWN(nsio
->res
.start
);
601 size
= resource_size(&nsio
->res
);
602 if (region_intersects(start
, size
, IORESOURCE_SYSTEM_RAM
,
603 IORES_DESC_NONE
) == REGION_MIXED
) {
604 start
= nsio
->res
.start
;
605 start_pad
= PHYS_SECTION_ALIGN_UP(start
) - start
;
608 start
= nsio
->res
.start
;
609 size
= PHYS_SECTION_ALIGN_UP(start
+ size
) - start
;
610 if (region_intersects(start
, size
, IORESOURCE_SYSTEM_RAM
,
611 IORES_DESC_NONE
) == REGION_MIXED
) {
612 size
= resource_size(&nsio
->res
);
613 end_trunc
= start
+ size
- PHYS_SECTION_ALIGN_DOWN(start
+ size
);
616 if (start_pad
+ end_trunc
)
617 dev_info(&nd_pfn
->dev
, "%s section collision, truncate %d bytes\n",
618 dev_name(&ndns
->dev
), start_pad
+ end_trunc
);
621 * Note, we use 64 here for the standard size of struct page,
622 * debugging options may cause it to be larger in which case the
623 * implementation will limit the pfns advertised through
624 * ->direct_access() to those that are included in the memmap.
627 size
= resource_size(&nsio
->res
);
628 npfns
= (size
- start_pad
- end_trunc
- SZ_8K
) / SZ_4K
;
629 if (nd_pfn
->mode
== PFN_MODE_PMEM
) {
630 unsigned long memmap_size
;
633 * vmemmap_populate_hugepages() allocates the memmap array in
636 memmap_size
= ALIGN(64 * npfns
, HPAGE_SIZE
);
637 offset
= ALIGN(start
+ SZ_8K
+ memmap_size
+ dax_label_reserve
,
638 nd_pfn
->align
) - start
;
639 } else if (nd_pfn
->mode
== PFN_MODE_RAM
)
640 offset
= ALIGN(start
+ SZ_8K
+ dax_label_reserve
,
641 nd_pfn
->align
) - start
;
645 if (offset
+ start_pad
+ end_trunc
>= size
) {
646 dev_err(&nd_pfn
->dev
, "%s unable to satisfy requested alignment\n",
647 dev_name(&ndns
->dev
));
651 npfns
= (size
- offset
- start_pad
- end_trunc
) / SZ_4K
;
652 pfn_sb
->mode
= cpu_to_le32(nd_pfn
->mode
);
653 pfn_sb
->dataoff
= cpu_to_le64(offset
);
654 pfn_sb
->npfns
= cpu_to_le64(npfns
);
655 memcpy(pfn_sb
->signature
, sig
, PFN_SIG_LEN
);
656 memcpy(pfn_sb
->uuid
, nd_pfn
->uuid
, 16);
657 memcpy(pfn_sb
->parent_uuid
, nd_dev_to_uuid(&ndns
->dev
), 16);
658 pfn_sb
->version_major
= cpu_to_le16(1);
659 pfn_sb
->version_minor
= cpu_to_le16(2);
660 pfn_sb
->start_pad
= cpu_to_le32(start_pad
);
661 pfn_sb
->end_trunc
= cpu_to_le32(end_trunc
);
662 pfn_sb
->align
= cpu_to_le32(nd_pfn
->align
);
663 checksum
= nd_sb_checksum((struct nd_gen_sb
*) pfn_sb
);
664 pfn_sb
->checksum
= cpu_to_le64(checksum
);
666 return nvdimm_write_bytes(ndns
, SZ_4K
, pfn_sb
, sizeof(*pfn_sb
));
670 * Determine the effective resource range and vmem_altmap from an nd_pfn
673 struct vmem_altmap
*nvdimm_setup_pfn(struct nd_pfn
*nd_pfn
,
674 struct resource
*res
, struct vmem_altmap
*altmap
)
678 if (!nd_pfn
->uuid
|| !nd_pfn
->ndns
)
679 return ERR_PTR(-ENODEV
);
681 rc
= nd_pfn_init(nd_pfn
);
685 /* we need a valid pfn_sb before we can init a vmem_altmap */
686 return __nvdimm_setup_pfn(nd_pfn
, res
, altmap
);
688 EXPORT_SYMBOL_GPL(nvdimm_setup_pfn
);