2 * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 #include <linux/libnvdimm.h>
14 #include <linux/badblocks.h>
15 #include <linux/export.h>
16 #include <linux/module.h>
17 #include <linux/blkdev.h>
18 #include <linux/device.h>
19 #include <linux/ctype.h>
20 #include <linux/ndctl.h>
21 #include <linux/mutex.h>
22 #include <linux/slab.h>
27 LIST_HEAD(nvdimm_bus_list
);
28 DEFINE_MUTEX(nvdimm_bus_list_mutex
);
30 void nvdimm_bus_lock(struct device
*dev
)
32 struct nvdimm_bus
*nvdimm_bus
= walk_to_nvdimm_bus(dev
);
36 mutex_lock(&nvdimm_bus
->reconfig_mutex
);
38 EXPORT_SYMBOL(nvdimm_bus_lock
);
40 void nvdimm_bus_unlock(struct device
*dev
)
42 struct nvdimm_bus
*nvdimm_bus
= walk_to_nvdimm_bus(dev
);
46 mutex_unlock(&nvdimm_bus
->reconfig_mutex
);
48 EXPORT_SYMBOL(nvdimm_bus_unlock
);
50 bool is_nvdimm_bus_locked(struct device
*dev
)
52 struct nvdimm_bus
*nvdimm_bus
= walk_to_nvdimm_bus(dev
);
56 return mutex_is_locked(&nvdimm_bus
->reconfig_mutex
);
58 EXPORT_SYMBOL(is_nvdimm_bus_locked
);
61 struct nvdimm_bus
*nvdimm_bus
;
62 struct list_head list
;
63 resource_size_t offset
;
73 static struct nvdimm_map
*find_nvdimm_map(struct device
*dev
,
74 resource_size_t offset
)
76 struct nvdimm_bus
*nvdimm_bus
= walk_to_nvdimm_bus(dev
);
77 struct nvdimm_map
*nvdimm_map
;
79 list_for_each_entry(nvdimm_map
, &nvdimm_bus
->mapping_list
, list
)
80 if (nvdimm_map
->offset
== offset
)
85 static struct nvdimm_map
*alloc_nvdimm_map(struct device
*dev
,
86 resource_size_t offset
, size_t size
, unsigned long flags
)
88 struct nvdimm_bus
*nvdimm_bus
= walk_to_nvdimm_bus(dev
);
89 struct nvdimm_map
*nvdimm_map
;
91 nvdimm_map
= kzalloc(sizeof(*nvdimm_map
), GFP_KERNEL
);
95 INIT_LIST_HEAD(&nvdimm_map
->list
);
96 nvdimm_map
->nvdimm_bus
= nvdimm_bus
;
97 nvdimm_map
->offset
= offset
;
98 nvdimm_map
->flags
= flags
;
99 nvdimm_map
->size
= size
;
100 kref_init(&nvdimm_map
->kref
);
102 if (!request_mem_region(offset
, size
, dev_name(&nvdimm_bus
->dev
))) {
103 dev_err(&nvdimm_bus
->dev
, "failed to request %pa + %zd for %s\n",
104 &offset
, size
, dev_name(dev
));
105 goto err_request_region
;
109 nvdimm_map
->mem
= memremap(offset
, size
, flags
);
111 nvdimm_map
->iomem
= ioremap(offset
, size
);
113 if (!nvdimm_map
->mem
)
116 dev_WARN_ONCE(dev
, !is_nvdimm_bus_locked(dev
), "%s: bus unlocked!",
118 list_add(&nvdimm_map
->list
, &nvdimm_bus
->mapping_list
);
123 release_mem_region(offset
, size
);
129 static void nvdimm_map_release(struct kref
*kref
)
131 struct nvdimm_bus
*nvdimm_bus
;
132 struct nvdimm_map
*nvdimm_map
;
134 nvdimm_map
= container_of(kref
, struct nvdimm_map
, kref
);
135 nvdimm_bus
= nvdimm_map
->nvdimm_bus
;
137 dev_dbg(&nvdimm_bus
->dev
, "%s: %pa\n", __func__
, &nvdimm_map
->offset
);
138 list_del(&nvdimm_map
->list
);
139 if (nvdimm_map
->flags
)
140 memunmap(nvdimm_map
->mem
);
142 iounmap(nvdimm_map
->iomem
);
143 release_mem_region(nvdimm_map
->offset
, nvdimm_map
->size
);
147 static void nvdimm_map_put(void *data
)
149 struct nvdimm_map
*nvdimm_map
= data
;
150 struct nvdimm_bus
*nvdimm_bus
= nvdimm_map
->nvdimm_bus
;
152 nvdimm_bus_lock(&nvdimm_bus
->dev
);
153 kref_put(&nvdimm_map
->kref
, nvdimm_map_release
);
154 nvdimm_bus_unlock(&nvdimm_bus
->dev
);
158 * devm_nvdimm_memremap - map a resource that is shared across regions
159 * @dev: device that will own a reference to the shared mapping
160 * @offset: physical base address of the mapping
161 * @size: mapping size
162 * @flags: memremap flags, or, if zero, perform an ioremap instead
164 void *devm_nvdimm_memremap(struct device
*dev
, resource_size_t offset
,
165 size_t size
, unsigned long flags
)
167 struct nvdimm_map
*nvdimm_map
;
169 nvdimm_bus_lock(dev
);
170 nvdimm_map
= find_nvdimm_map(dev
, offset
);
172 nvdimm_map
= alloc_nvdimm_map(dev
, offset
, size
, flags
);
174 kref_get(&nvdimm_map
->kref
);
175 nvdimm_bus_unlock(dev
);
180 if (devm_add_action_or_reset(dev
, nvdimm_map_put
, nvdimm_map
))
183 return nvdimm_map
->mem
;
185 EXPORT_SYMBOL_GPL(devm_nvdimm_memremap
);
187 u64
nd_fletcher64(void *addr
, size_t len
, bool le
)
194 for (i
= 0; i
< len
/ sizeof(u32
); i
++) {
195 lo32
+= le
? le32_to_cpu((__le32
) buf
[i
]) : buf
[i
];
199 return hi32
<< 32 | lo32
;
201 EXPORT_SYMBOL_GPL(nd_fletcher64
);
203 struct nvdimm_bus_descriptor
*to_nd_desc(struct nvdimm_bus
*nvdimm_bus
)
205 /* struct nvdimm_bus definition is private to libnvdimm */
206 return nvdimm_bus
->nd_desc
;
208 EXPORT_SYMBOL_GPL(to_nd_desc
);
210 struct device
*to_nvdimm_bus_dev(struct nvdimm_bus
*nvdimm_bus
)
212 /* struct nvdimm_bus definition is private to libnvdimm */
213 return &nvdimm_bus
->dev
;
215 EXPORT_SYMBOL_GPL(to_nvdimm_bus_dev
);
217 static bool is_uuid_sep(char sep
)
219 if (sep
== '\n' || sep
== '-' || sep
== ':' || sep
== '\0')
224 static int nd_uuid_parse(struct device
*dev
, u8
*uuid_out
, const char *buf
,
227 const char *str
= buf
;
231 for (i
= 0; i
< 16; i
++) {
232 if (!isxdigit(str
[0]) || !isxdigit(str
[1])) {
233 dev_dbg(dev
, "%s: pos: %d buf[%zd]: %c buf[%zd]: %c\n",
234 __func__
, i
, str
- buf
, str
[0],
235 str
+ 1 - buf
, str
[1]);
239 uuid
[i
] = (hex_to_bin(str
[0]) << 4) | hex_to_bin(str
[1]);
241 if (is_uuid_sep(*str
))
245 memcpy(uuid_out
, uuid
, sizeof(uuid
));
250 * nd_uuid_store: common implementation for writing 'uuid' sysfs attributes
251 * @dev: container device for the uuid property
252 * @uuid_out: uuid buffer to replace
253 * @buf: raw sysfs buffer to parse
255 * Enforce that uuids can only be changed while the device is disabled
257 * LOCKING: expects device_lock() is held on entry
259 int nd_uuid_store(struct device
*dev
, u8
**uuid_out
, const char *buf
,
268 rc
= nd_uuid_parse(dev
, uuid
, buf
, len
);
273 *uuid_out
= kmemdup(uuid
, sizeof(uuid
), GFP_KERNEL
);
280 ssize_t
nd_sector_size_show(unsigned long current_lbasize
,
281 const unsigned long *supported
, char *buf
)
286 for (i
= 0; supported
[i
]; i
++)
287 if (current_lbasize
== supported
[i
])
288 len
+= sprintf(buf
+ len
, "[%ld] ", supported
[i
]);
290 len
+= sprintf(buf
+ len
, "%ld ", supported
[i
]);
291 len
+= sprintf(buf
+ len
, "\n");
295 ssize_t
nd_sector_size_store(struct device
*dev
, const char *buf
,
296 unsigned long *current_lbasize
, const unsigned long *supported
)
298 unsigned long lbasize
;
304 rc
= kstrtoul(buf
, 0, &lbasize
);
308 for (i
= 0; supported
[i
]; i
++)
309 if (lbasize
== supported
[i
])
313 *current_lbasize
= lbasize
;
320 void __nd_iostat_start(struct bio
*bio
, unsigned long *start
)
322 struct gendisk
*disk
= bio
->bi_bdev
->bd_disk
;
323 const int rw
= bio_data_dir(bio
);
324 int cpu
= part_stat_lock();
327 part_round_stats(cpu
, &disk
->part0
);
328 part_stat_inc(cpu
, &disk
->part0
, ios
[rw
]);
329 part_stat_add(cpu
, &disk
->part0
, sectors
[rw
], bio_sectors(bio
));
330 part_inc_in_flight(&disk
->part0
, rw
);
333 EXPORT_SYMBOL(__nd_iostat_start
);
335 void nd_iostat_end(struct bio
*bio
, unsigned long start
)
337 struct gendisk
*disk
= bio
->bi_bdev
->bd_disk
;
338 unsigned long duration
= jiffies
- start
;
339 const int rw
= bio_data_dir(bio
);
340 int cpu
= part_stat_lock();
342 part_stat_add(cpu
, &disk
->part0
, ticks
[rw
], duration
);
343 part_round_stats(cpu
, &disk
->part0
);
344 part_dec_in_flight(&disk
->part0
, rw
);
347 EXPORT_SYMBOL(nd_iostat_end
);
349 static ssize_t
commands_show(struct device
*dev
,
350 struct device_attribute
*attr
, char *buf
)
353 struct nvdimm_bus
*nvdimm_bus
= to_nvdimm_bus(dev
);
354 struct nvdimm_bus_descriptor
*nd_desc
= nvdimm_bus
->nd_desc
;
356 for_each_set_bit(cmd
, &nd_desc
->cmd_mask
, BITS_PER_LONG
)
357 len
+= sprintf(buf
+ len
, "%s ", nvdimm_bus_cmd_name(cmd
));
358 len
+= sprintf(buf
+ len
, "\n");
361 static DEVICE_ATTR_RO(commands
);
363 static const char *nvdimm_bus_provider(struct nvdimm_bus
*nvdimm_bus
)
365 struct nvdimm_bus_descriptor
*nd_desc
= nvdimm_bus
->nd_desc
;
366 struct device
*parent
= nvdimm_bus
->dev
.parent
;
368 if (nd_desc
->provider_name
)
369 return nd_desc
->provider_name
;
371 return dev_name(parent
);
376 static ssize_t
provider_show(struct device
*dev
,
377 struct device_attribute
*attr
, char *buf
)
379 struct nvdimm_bus
*nvdimm_bus
= to_nvdimm_bus(dev
);
381 return sprintf(buf
, "%s\n", nvdimm_bus_provider(nvdimm_bus
));
383 static DEVICE_ATTR_RO(provider
);
385 static int flush_namespaces(struct device
*dev
, void *data
)
392 static int flush_regions_dimms(struct device
*dev
, void *data
)
396 device_for_each_child(dev
, NULL
, flush_namespaces
);
400 static ssize_t
wait_probe_show(struct device
*dev
,
401 struct device_attribute
*attr
, char *buf
)
403 struct nvdimm_bus
*nvdimm_bus
= to_nvdimm_bus(dev
);
404 struct nvdimm_bus_descriptor
*nd_desc
= nvdimm_bus
->nd_desc
;
407 if (nd_desc
->flush_probe
) {
408 rc
= nd_desc
->flush_probe(nd_desc
);
413 device_for_each_child(dev
, NULL
, flush_regions_dimms
);
414 return sprintf(buf
, "1\n");
416 static DEVICE_ATTR_RO(wait_probe
);
418 static struct attribute
*nvdimm_bus_attributes
[] = {
419 &dev_attr_commands
.attr
,
420 &dev_attr_wait_probe
.attr
,
421 &dev_attr_provider
.attr
,
425 struct attribute_group nvdimm_bus_attribute_group
= {
426 .attrs
= nvdimm_bus_attributes
,
428 EXPORT_SYMBOL_GPL(nvdimm_bus_attribute_group
);
430 static void set_badblock(struct badblocks
*bb
, sector_t s
, int num
)
432 dev_dbg(bb
->dev
, "Found a poison range (0x%llx, 0x%llx)\n",
433 (u64
) s
* 512, (u64
) num
* 512);
434 /* this isn't an error as the hardware will still throw an exception */
435 if (badblocks_set(bb
, s
, num
, 1))
436 dev_info_once(bb
->dev
, "%s: failed for sector %llx\n",
441 * __add_badblock_range() - Convert a physical address range to bad sectors
442 * @bb: badblocks instance to populate
443 * @ns_offset: namespace offset where the error range begins (in bytes)
444 * @len: number of bytes of poison to be added
446 * This assumes that the range provided with (ns_offset, len) is within
447 * the bounds of physical addresses for this namespace, i.e. lies in the
448 * interval [ns_start, ns_start + ns_size)
450 static void __add_badblock_range(struct badblocks
*bb
, u64 ns_offset
, u64 len
)
452 const unsigned int sector_size
= 512;
453 sector_t start_sector
, end_sector
;
457 start_sector
= div_u64(ns_offset
, sector_size
);
458 end_sector
= div_u64_rem(ns_offset
+ len
, sector_size
, &rem
);
461 num_sectors
= end_sector
- start_sector
;
463 if (unlikely(num_sectors
> (u64
)INT_MAX
)) {
464 u64 remaining
= num_sectors
;
465 sector_t s
= start_sector
;
468 int done
= min_t(u64
, remaining
, INT_MAX
);
470 set_badblock(bb
, s
, done
);
475 set_badblock(bb
, start_sector
, num_sectors
);
478 static void badblocks_populate(struct list_head
*poison_list
,
479 struct badblocks
*bb
, const struct resource
*res
)
481 struct nd_poison
*pl
;
483 if (list_empty(poison_list
))
486 list_for_each_entry(pl
, poison_list
, list
) {
487 u64 pl_end
= pl
->start
+ pl
->length
- 1;
489 /* Discard intervals with no intersection */
490 if (pl_end
< res
->start
)
492 if (pl
->start
> res
->end
)
494 /* Deal with any overlap after start of the namespace */
495 if (pl
->start
>= res
->start
) {
496 u64 start
= pl
->start
;
499 if (pl_end
<= res
->end
)
502 len
= res
->start
+ resource_size(res
)
504 __add_badblock_range(bb
, start
- res
->start
, len
);
507 /* Deal with overlap for poison starting before the namespace */
508 if (pl
->start
< res
->start
) {
511 if (pl_end
< res
->end
)
512 len
= pl
->start
+ pl
->length
- res
->start
;
514 len
= resource_size(res
);
515 __add_badblock_range(bb
, 0, len
);
521 * nvdimm_badblocks_populate() - Convert a list of poison ranges to badblocks
522 * @region: parent region of the range to interrogate
523 * @bb: badblocks instance to populate
524 * @res: resource range to consider
526 * The poison list generated during bus initialization may contain
527 * multiple, possibly overlapping physical address ranges. Compare each
528 * of these ranges to the resource range currently being initialized,
529 * and add badblocks entries for all matching sub-ranges
531 void nvdimm_badblocks_populate(struct nd_region
*nd_region
,
532 struct badblocks
*bb
, const struct resource
*res
)
534 struct nvdimm_bus
*nvdimm_bus
;
535 struct list_head
*poison_list
;
537 if (!is_nd_pmem(&nd_region
->dev
)) {
538 dev_WARN_ONCE(&nd_region
->dev
, 1,
539 "%s only valid for pmem regions\n", __func__
);
542 nvdimm_bus
= walk_to_nvdimm_bus(&nd_region
->dev
);
543 poison_list
= &nvdimm_bus
->poison_list
;
545 nvdimm_bus_lock(&nvdimm_bus
->dev
);
546 badblocks_populate(poison_list
, bb
, res
);
547 nvdimm_bus_unlock(&nvdimm_bus
->dev
);
549 EXPORT_SYMBOL_GPL(nvdimm_badblocks_populate
);
551 static int add_poison(struct nvdimm_bus
*nvdimm_bus
, u64 addr
, u64 length
,
554 struct nd_poison
*pl
;
556 pl
= kzalloc(sizeof(*pl
), flags
);
562 list_add_tail(&pl
->list
, &nvdimm_bus
->poison_list
);
567 static int bus_add_poison(struct nvdimm_bus
*nvdimm_bus
, u64 addr
, u64 length
)
569 struct nd_poison
*pl
;
571 if (list_empty(&nvdimm_bus
->poison_list
))
572 return add_poison(nvdimm_bus
, addr
, length
, GFP_KERNEL
);
575 * There is a chance this is a duplicate, check for those first.
576 * This will be the common case as ARS_STATUS returns all known
577 * errors in the SPA space, and we can't query it per region
579 list_for_each_entry(pl
, &nvdimm_bus
->poison_list
, list
)
580 if (pl
->start
== addr
) {
581 /* If length has changed, update this list entry */
582 if (pl
->length
!= length
)
588 * If not a duplicate or a simple length update, add the entry as is,
589 * as any overlapping ranges will get resolved when the list is consumed
590 * and converted to badblocks
592 return add_poison(nvdimm_bus
, addr
, length
, GFP_KERNEL
);
595 int nvdimm_bus_add_poison(struct nvdimm_bus
*nvdimm_bus
, u64 addr
, u64 length
)
599 nvdimm_bus_lock(&nvdimm_bus
->dev
);
600 rc
= bus_add_poison(nvdimm_bus
, addr
, length
);
601 nvdimm_bus_unlock(&nvdimm_bus
->dev
);
605 EXPORT_SYMBOL_GPL(nvdimm_bus_add_poison
);
607 void nvdimm_clear_from_poison_list(struct nvdimm_bus
*nvdimm_bus
,
608 phys_addr_t start
, unsigned int len
)
610 struct list_head
*poison_list
= &nvdimm_bus
->poison_list
;
611 u64 clr_end
= start
+ len
- 1;
612 struct nd_poison
*pl
, *next
;
614 nvdimm_bus_lock(&nvdimm_bus
->dev
);
615 WARN_ON_ONCE(list_empty(poison_list
));
618 * [start, clr_end] is the poison interval being cleared.
619 * [pl->start, pl_end] is the poison_list entry we're comparing
620 * the above interval against. The poison list entry may need
621 * to be modified (update either start or length), deleted, or
622 * split into two based on the overlap characteristics
625 list_for_each_entry_safe(pl
, next
, poison_list
, list
) {
626 u64 pl_end
= pl
->start
+ pl
->length
- 1;
628 /* Skip intervals with no intersection */
631 if (pl
->start
> clr_end
)
633 /* Delete completely overlapped poison entries */
634 if ((pl
->start
>= start
) && (pl_end
<= clr_end
)) {
639 /* Adjust start point of partially cleared entries */
640 if ((start
<= pl
->start
) && (clr_end
> pl
->start
)) {
641 pl
->length
-= clr_end
- pl
->start
+ 1;
642 pl
->start
= clr_end
+ 1;
645 /* Adjust pl->length for partial clearing at the tail end */
646 if ((pl
->start
< start
) && (pl_end
<= clr_end
)) {
647 /* pl->start remains the same */
648 pl
->length
= start
- pl
->start
;
652 * If clearing in the middle of an entry, we split it into
653 * two by modifying the current entry to represent one half of
654 * the split, and adding a new entry for the second half.
656 if ((pl
->start
< start
) && (pl_end
> clr_end
)) {
657 u64 new_start
= clr_end
+ 1;
658 u64 new_len
= pl_end
- new_start
+ 1;
660 /* Add new entry covering the right half */
661 add_poison(nvdimm_bus
, new_start
, new_len
, GFP_NOIO
);
662 /* Adjust this entry to cover the left half */
663 pl
->length
= start
- pl
->start
;
667 nvdimm_bus_unlock(&nvdimm_bus
->dev
);
669 EXPORT_SYMBOL_GPL(nvdimm_clear_from_poison_list
);
671 #ifdef CONFIG_BLK_DEV_INTEGRITY
672 int nd_integrity_init(struct gendisk
*disk
, unsigned long meta_size
)
674 struct blk_integrity bi
;
679 memset(&bi
, 0, sizeof(bi
));
681 bi
.tuple_size
= meta_size
;
682 bi
.tag_size
= meta_size
;
684 blk_integrity_register(disk
, &bi
);
685 blk_queue_max_integrity_segments(disk
->queue
, 1);
689 EXPORT_SYMBOL(nd_integrity_init
);
691 #else /* CONFIG_BLK_DEV_INTEGRITY */
692 int nd_integrity_init(struct gendisk
*disk
, unsigned long meta_size
)
696 EXPORT_SYMBOL(nd_integrity_init
);
700 static __init
int libnvdimm_init(void)
704 rc
= nvdimm_bus_init();
710 rc
= nd_region_init();
721 static __exit
void libnvdimm_exit(void)
723 WARN_ON(!list_empty(&nvdimm_bus_list
));
727 nd_region_devs_exit();
731 MODULE_LICENSE("GPL v2");
732 MODULE_AUTHOR("Intel Corporation");
733 subsys_initcall(libnvdimm_init
);
734 module_exit(libnvdimm_exit
);