2 * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14 #include <linux/vmalloc.h>
15 #include <linux/device.h>
16 #include <linux/ndctl.h>
17 #include <linux/slab.h>
26 static DEFINE_IDA(dimm_ida
);
29 * Retrieve bus and dimm handle and return if this bus supports
30 * get_config_data commands
32 int nvdimm_check_config_data(struct device
*dev
)
34 struct nvdimm
*nvdimm
= to_nvdimm(dev
);
36 if (!nvdimm
->cmd_mask
||
37 !test_bit(ND_CMD_GET_CONFIG_DATA
, &nvdimm
->cmd_mask
)) {
38 if (test_bit(NDD_ALIASING
, &nvdimm
->flags
))
47 static int validate_dimm(struct nvdimm_drvdata
*ndd
)
54 rc
= nvdimm_check_config_data(ndd
->dev
);
56 dev_dbg(ndd
->dev
, "%pf: %s error: %d\n",
57 __builtin_return_address(0), __func__
, rc
);
62 * nvdimm_init_nsarea - determine the geometry of a dimm's namespace area
63 * @nvdimm: dimm to initialize
65 int nvdimm_init_nsarea(struct nvdimm_drvdata
*ndd
)
67 struct nd_cmd_get_config_size
*cmd
= &ndd
->nsarea
;
68 struct nvdimm_bus
*nvdimm_bus
= walk_to_nvdimm_bus(ndd
->dev
);
69 struct nvdimm_bus_descriptor
*nd_desc
;
70 int rc
= validate_dimm(ndd
);
77 return 0; /* already valid */
79 memset(cmd
, 0, sizeof(*cmd
));
80 nd_desc
= nvdimm_bus
->nd_desc
;
81 rc
= nd_desc
->ndctl(nd_desc
, to_nvdimm(ndd
->dev
),
82 ND_CMD_GET_CONFIG_SIZE
, cmd
, sizeof(*cmd
), &cmd_rc
);
88 int nvdimm_init_config_data(struct nvdimm_drvdata
*ndd
)
90 struct nvdimm_bus
*nvdimm_bus
= walk_to_nvdimm_bus(ndd
->dev
);
91 int rc
= validate_dimm(ndd
), cmd_rc
= 0;
92 struct nd_cmd_get_config_data_hdr
*cmd
;
93 struct nvdimm_bus_descriptor
*nd_desc
;
94 u32 max_cmd_size
, config_size
;
103 if (ndd
->nsarea
.status
|| ndd
->nsarea
.max_xfer
== 0
104 || ndd
->nsarea
.config_size
< ND_LABEL_MIN_SIZE
) {
105 dev_dbg(ndd
->dev
, "failed to init config data area: (%d:%d)\n",
106 ndd
->nsarea
.max_xfer
, ndd
->nsarea
.config_size
);
110 ndd
->data
= kvmalloc(ndd
->nsarea
.config_size
, GFP_KERNEL
);
114 max_cmd_size
= min_t(u32
, PAGE_SIZE
, ndd
->nsarea
.max_xfer
);
115 cmd
= kzalloc(max_cmd_size
+ sizeof(*cmd
), GFP_KERNEL
);
119 nd_desc
= nvdimm_bus
->nd_desc
;
120 for (config_size
= ndd
->nsarea
.config_size
, offset
= 0;
121 config_size
; config_size
-= cmd
->in_length
,
122 offset
+= cmd
->in_length
) {
123 cmd
->in_length
= min(config_size
, max_cmd_size
);
124 cmd
->in_offset
= offset
;
125 rc
= nd_desc
->ndctl(nd_desc
, to_nvdimm(ndd
->dev
),
126 ND_CMD_GET_CONFIG_DATA
, cmd
,
127 cmd
->in_length
+ sizeof(*cmd
), &cmd_rc
);
134 memcpy(ndd
->data
+ offset
, cmd
->out_buf
, cmd
->in_length
);
136 dev_dbg(ndd
->dev
, "len: %zu rc: %d\n", offset
, rc
);
142 int nvdimm_set_config_data(struct nvdimm_drvdata
*ndd
, size_t offset
,
143 void *buf
, size_t len
)
145 size_t max_cmd_size
, buf_offset
;
146 struct nd_cmd_set_config_hdr
*cmd
;
147 int rc
= validate_dimm(ndd
), cmd_rc
= 0;
148 struct nvdimm_bus
*nvdimm_bus
= walk_to_nvdimm_bus(ndd
->dev
);
149 struct nvdimm_bus_descriptor
*nd_desc
= nvdimm_bus
->nd_desc
;
157 if (offset
+ len
> ndd
->nsarea
.config_size
)
160 max_cmd_size
= min_t(u32
, PAGE_SIZE
, len
);
161 max_cmd_size
= min_t(u32
, max_cmd_size
, ndd
->nsarea
.max_xfer
);
162 cmd
= kzalloc(max_cmd_size
+ sizeof(*cmd
) + sizeof(u32
), GFP_KERNEL
);
166 for (buf_offset
= 0; len
; len
-= cmd
->in_length
,
167 buf_offset
+= cmd
->in_length
) {
170 cmd
->in_offset
= offset
+ buf_offset
;
171 cmd
->in_length
= min(max_cmd_size
, len
);
172 memcpy(cmd
->in_buf
, buf
+ buf_offset
, cmd
->in_length
);
174 /* status is output in the last 4-bytes of the command buffer */
175 cmd_size
= sizeof(*cmd
) + cmd
->in_length
+ sizeof(u32
);
177 rc
= nd_desc
->ndctl(nd_desc
, to_nvdimm(ndd
->dev
),
178 ND_CMD_SET_CONFIG_DATA
, cmd
, cmd_size
, &cmd_rc
);
191 void nvdimm_set_aliasing(struct device
*dev
)
193 struct nvdimm
*nvdimm
= to_nvdimm(dev
);
195 set_bit(NDD_ALIASING
, &nvdimm
->flags
);
198 void nvdimm_set_locked(struct device
*dev
)
200 struct nvdimm
*nvdimm
= to_nvdimm(dev
);
202 set_bit(NDD_LOCKED
, &nvdimm
->flags
);
205 void nvdimm_clear_locked(struct device
*dev
)
207 struct nvdimm
*nvdimm
= to_nvdimm(dev
);
209 clear_bit(NDD_LOCKED
, &nvdimm
->flags
);
212 static void nvdimm_release(struct device
*dev
)
214 struct nvdimm
*nvdimm
= to_nvdimm(dev
);
216 ida_simple_remove(&dimm_ida
, nvdimm
->id
);
220 static struct device_type nvdimm_device_type
= {
222 .release
= nvdimm_release
,
225 bool is_nvdimm(struct device
*dev
)
227 return dev
->type
== &nvdimm_device_type
;
230 struct nvdimm
*to_nvdimm(struct device
*dev
)
232 struct nvdimm
*nvdimm
= container_of(dev
, struct nvdimm
, dev
);
234 WARN_ON(!is_nvdimm(dev
));
237 EXPORT_SYMBOL_GPL(to_nvdimm
);
239 struct nvdimm
*nd_blk_region_to_dimm(struct nd_blk_region
*ndbr
)
241 struct nd_region
*nd_region
= &ndbr
->nd_region
;
242 struct nd_mapping
*nd_mapping
= &nd_region
->mapping
[0];
244 return nd_mapping
->nvdimm
;
246 EXPORT_SYMBOL_GPL(nd_blk_region_to_dimm
);
248 unsigned long nd_blk_memremap_flags(struct nd_blk_region
*ndbr
)
250 /* pmem mapping properties are private to libnvdimm */
251 return ARCH_MEMREMAP_PMEM
;
253 EXPORT_SYMBOL_GPL(nd_blk_memremap_flags
);
255 struct nvdimm_drvdata
*to_ndd(struct nd_mapping
*nd_mapping
)
257 struct nvdimm
*nvdimm
= nd_mapping
->nvdimm
;
259 WARN_ON_ONCE(!is_nvdimm_bus_locked(&nvdimm
->dev
));
261 return dev_get_drvdata(&nvdimm
->dev
);
263 EXPORT_SYMBOL(to_ndd
);
265 void nvdimm_drvdata_release(struct kref
*kref
)
267 struct nvdimm_drvdata
*ndd
= container_of(kref
, typeof(*ndd
), kref
);
268 struct device
*dev
= ndd
->dev
;
269 struct resource
*res
, *_r
;
271 dev_dbg(dev
, "trace\n");
272 nvdimm_bus_lock(dev
);
273 for_each_dpa_resource_safe(ndd
, res
, _r
)
274 nvdimm_free_dpa(ndd
, res
);
275 nvdimm_bus_unlock(dev
);
282 void get_ndd(struct nvdimm_drvdata
*ndd
)
284 kref_get(&ndd
->kref
);
287 void put_ndd(struct nvdimm_drvdata
*ndd
)
290 kref_put(&ndd
->kref
, nvdimm_drvdata_release
);
293 const char *nvdimm_name(struct nvdimm
*nvdimm
)
295 return dev_name(&nvdimm
->dev
);
297 EXPORT_SYMBOL_GPL(nvdimm_name
);
299 struct kobject
*nvdimm_kobj(struct nvdimm
*nvdimm
)
301 return &nvdimm
->dev
.kobj
;
303 EXPORT_SYMBOL_GPL(nvdimm_kobj
);
305 unsigned long nvdimm_cmd_mask(struct nvdimm
*nvdimm
)
307 return nvdimm
->cmd_mask
;
309 EXPORT_SYMBOL_GPL(nvdimm_cmd_mask
);
311 void *nvdimm_provider_data(struct nvdimm
*nvdimm
)
314 return nvdimm
->provider_data
;
317 EXPORT_SYMBOL_GPL(nvdimm_provider_data
);
319 static ssize_t
commands_show(struct device
*dev
,
320 struct device_attribute
*attr
, char *buf
)
322 struct nvdimm
*nvdimm
= to_nvdimm(dev
);
325 if (!nvdimm
->cmd_mask
)
326 return sprintf(buf
, "\n");
328 for_each_set_bit(cmd
, &nvdimm
->cmd_mask
, BITS_PER_LONG
)
329 len
+= sprintf(buf
+ len
, "%s ", nvdimm_cmd_name(cmd
));
330 len
+= sprintf(buf
+ len
, "\n");
333 static DEVICE_ATTR_RO(commands
);
335 static ssize_t
flags_show(struct device
*dev
,
336 struct device_attribute
*attr
, char *buf
)
338 struct nvdimm
*nvdimm
= to_nvdimm(dev
);
340 return sprintf(buf
, "%s%s\n",
341 test_bit(NDD_ALIASING
, &nvdimm
->flags
) ? "alias " : "",
342 test_bit(NDD_LOCKED
, &nvdimm
->flags
) ? "lock " : "");
344 static DEVICE_ATTR_RO(flags
);
346 static ssize_t
state_show(struct device
*dev
, struct device_attribute
*attr
,
349 struct nvdimm
*nvdimm
= to_nvdimm(dev
);
352 * The state may be in the process of changing, userspace should
353 * quiesce probing if it wants a static answer
355 nvdimm_bus_lock(dev
);
356 nvdimm_bus_unlock(dev
);
357 return sprintf(buf
, "%s\n", atomic_read(&nvdimm
->busy
)
358 ? "active" : "idle");
360 static DEVICE_ATTR_RO(state
);
362 static ssize_t
available_slots_show(struct device
*dev
,
363 struct device_attribute
*attr
, char *buf
)
365 struct nvdimm_drvdata
*ndd
= dev_get_drvdata(dev
);
372 nvdimm_bus_lock(dev
);
373 nfree
= nd_label_nfree(ndd
);
374 if (nfree
- 1 > nfree
) {
375 dev_WARN_ONCE(dev
, 1, "we ate our last label?\n");
379 rc
= sprintf(buf
, "%d\n", nfree
);
380 nvdimm_bus_unlock(dev
);
383 static DEVICE_ATTR_RO(available_slots
);
385 static struct attribute
*nvdimm_attributes
[] = {
386 &dev_attr_state
.attr
,
387 &dev_attr_flags
.attr
,
388 &dev_attr_commands
.attr
,
389 &dev_attr_available_slots
.attr
,
393 struct attribute_group nvdimm_attribute_group
= {
394 .attrs
= nvdimm_attributes
,
396 EXPORT_SYMBOL_GPL(nvdimm_attribute_group
);
398 struct nvdimm
*nvdimm_create(struct nvdimm_bus
*nvdimm_bus
, void *provider_data
,
399 const struct attribute_group
**groups
, unsigned long flags
,
400 unsigned long cmd_mask
, int num_flush
,
401 struct resource
*flush_wpq
)
403 struct nvdimm
*nvdimm
= kzalloc(sizeof(*nvdimm
), GFP_KERNEL
);
409 nvdimm
->id
= ida_simple_get(&dimm_ida
, 0, 0, GFP_KERNEL
);
410 if (nvdimm
->id
< 0) {
414 nvdimm
->provider_data
= provider_data
;
415 nvdimm
->flags
= flags
;
416 nvdimm
->cmd_mask
= cmd_mask
;
417 nvdimm
->num_flush
= num_flush
;
418 nvdimm
->flush_wpq
= flush_wpq
;
419 atomic_set(&nvdimm
->busy
, 0);
421 dev_set_name(dev
, "nmem%d", nvdimm
->id
);
422 dev
->parent
= &nvdimm_bus
->dev
;
423 dev
->type
= &nvdimm_device_type
;
424 dev
->devt
= MKDEV(nvdimm_major
, nvdimm
->id
);
425 dev
->groups
= groups
;
426 nd_device_register(dev
);
430 EXPORT_SYMBOL_GPL(nvdimm_create
);
432 int alias_dpa_busy(struct device
*dev
, void *data
)
434 resource_size_t map_end
, blk_start
, new;
435 struct blk_alloc_info
*info
= data
;
436 struct nd_mapping
*nd_mapping
;
437 struct nd_region
*nd_region
;
438 struct nvdimm_drvdata
*ndd
;
439 struct resource
*res
;
445 nd_region
= to_nd_region(dev
);
446 for (i
= 0; i
< nd_region
->ndr_mappings
; i
++) {
447 nd_mapping
= &nd_region
->mapping
[i
];
448 if (nd_mapping
->nvdimm
== info
->nd_mapping
->nvdimm
)
452 if (i
>= nd_region
->ndr_mappings
)
455 ndd
= to_ndd(nd_mapping
);
456 map_end
= nd_mapping
->start
+ nd_mapping
->size
- 1;
457 blk_start
= nd_mapping
->start
;
460 * In the allocation case ->res is set to free space that we are
461 * looking to validate against PMEM aliasing collision rules
462 * (i.e. BLK is allocated after all aliased PMEM).
465 if (info
->res
->start
>= nd_mapping
->start
466 && info
->res
->start
< map_end
)
474 * Find the free dpa from the end of the last pmem allocation to
475 * the end of the interleave-set mapping.
477 for_each_dpa_resource(ndd
, res
) {
478 if (strncmp(res
->name
, "pmem", 4) != 0)
480 if ((res
->start
>= blk_start
&& res
->start
< map_end
)
481 || (res
->end
>= blk_start
482 && res
->end
<= map_end
)) {
483 new = max(blk_start
, min(map_end
+ 1, res
->end
+ 1));
484 if (new != blk_start
) {
491 /* update the free space range with the probed blk_start */
492 if (info
->res
&& blk_start
> info
->res
->start
) {
493 info
->res
->start
= max(info
->res
->start
, blk_start
);
494 if (info
->res
->start
> info
->res
->end
)
495 info
->res
->end
= info
->res
->start
- 1;
499 info
->available
-= blk_start
- nd_mapping
->start
;
505 * nd_blk_available_dpa - account the unused dpa of BLK region
506 * @nd_mapping: container of dpa-resource-root + labels
508 * Unlike PMEM, BLK namespaces can occupy discontiguous DPA ranges, but
509 * we arrange for them to never start at an lower dpa than the last
510 * PMEM allocation in an aliased region.
512 resource_size_t
nd_blk_available_dpa(struct nd_region
*nd_region
)
514 struct nvdimm_bus
*nvdimm_bus
= walk_to_nvdimm_bus(&nd_region
->dev
);
515 struct nd_mapping
*nd_mapping
= &nd_region
->mapping
[0];
516 struct nvdimm_drvdata
*ndd
= to_ndd(nd_mapping
);
517 struct blk_alloc_info info
= {
518 .nd_mapping
= nd_mapping
,
519 .available
= nd_mapping
->size
,
522 struct resource
*res
;
527 device_for_each_child(&nvdimm_bus
->dev
, &info
, alias_dpa_busy
);
529 /* now account for busy blk allocations in unaliased dpa */
530 for_each_dpa_resource(ndd
, res
) {
531 if (strncmp(res
->name
, "blk", 3) != 0)
533 info
.available
-= resource_size(res
);
536 return info
.available
;
540 * nd_pmem_max_contiguous_dpa - For the given dimm+region, return the max
541 * contiguous unallocated dpa range.
542 * @nd_region: constrain available space check to this reference region
543 * @nd_mapping: container of dpa-resource-root + labels
545 resource_size_t
nd_pmem_max_contiguous_dpa(struct nd_region
*nd_region
,
546 struct nd_mapping
*nd_mapping
)
548 struct nvdimm_drvdata
*ndd
= to_ndd(nd_mapping
);
549 struct nvdimm_bus
*nvdimm_bus
;
550 resource_size_t max
= 0;
551 struct resource
*res
;
553 /* if a dimm is disabled the available capacity is zero */
557 nvdimm_bus
= walk_to_nvdimm_bus(ndd
->dev
);
558 if (__reserve_free_pmem(&nd_region
->dev
, nd_mapping
->nvdimm
))
560 for_each_dpa_resource(ndd
, res
) {
561 if (strcmp(res
->name
, "pmem-reserve") != 0)
563 if (resource_size(res
) > max
)
564 max
= resource_size(res
);
566 release_free_pmem(nvdimm_bus
, nd_mapping
);
571 * nd_pmem_available_dpa - for the given dimm+region account unallocated dpa
572 * @nd_mapping: container of dpa-resource-root + labels
573 * @nd_region: constrain available space check to this reference region
574 * @overlap: calculate available space assuming this level of overlap
576 * Validate that a PMEM label, if present, aligns with the start of an
577 * interleave set and truncate the available size at the lowest BLK
580 * The expectation is that this routine is called multiple times as it
581 * probes for the largest BLK encroachment for any single member DIMM of
582 * the interleave set. Once that value is determined the PMEM-limit for
583 * the set can be established.
585 resource_size_t
nd_pmem_available_dpa(struct nd_region
*nd_region
,
586 struct nd_mapping
*nd_mapping
, resource_size_t
*overlap
)
588 resource_size_t map_start
, map_end
, busy
= 0, available
, blk_start
;
589 struct nvdimm_drvdata
*ndd
= to_ndd(nd_mapping
);
590 struct resource
*res
;
596 map_start
= nd_mapping
->start
;
597 map_end
= map_start
+ nd_mapping
->size
- 1;
598 blk_start
= max(map_start
, map_end
+ 1 - *overlap
);
599 for_each_dpa_resource(ndd
, res
) {
600 if (res
->start
>= map_start
&& res
->start
< map_end
) {
601 if (strncmp(res
->name
, "blk", 3) == 0)
602 blk_start
= min(blk_start
,
603 max(map_start
, res
->start
));
604 else if (res
->end
> map_end
) {
605 reason
= "misaligned to iset";
608 busy
+= resource_size(res
);
609 } else if (res
->end
>= map_start
&& res
->end
<= map_end
) {
610 if (strncmp(res
->name
, "blk", 3) == 0) {
612 * If a BLK allocation overlaps the start of
613 * PMEM the entire interleave set may now only
616 blk_start
= map_start
;
618 busy
+= resource_size(res
);
619 } else if (map_start
> res
->start
&& map_start
< res
->end
) {
620 /* total eclipse of the mapping */
621 busy
+= nd_mapping
->size
;
622 blk_start
= map_start
;
626 *overlap
= map_end
+ 1 - blk_start
;
627 available
= blk_start
- map_start
;
628 if (busy
< available
)
629 return available
- busy
;
633 nd_dbg_dpa(nd_region
, ndd
, res
, "%s\n", reason
);
637 void nvdimm_free_dpa(struct nvdimm_drvdata
*ndd
, struct resource
*res
)
639 WARN_ON_ONCE(!is_nvdimm_bus_locked(ndd
->dev
));
641 __release_region(&ndd
->dpa
, res
->start
, resource_size(res
));
644 struct resource
*nvdimm_allocate_dpa(struct nvdimm_drvdata
*ndd
,
645 struct nd_label_id
*label_id
, resource_size_t start
,
648 char *name
= kmemdup(label_id
, sizeof(*label_id
), GFP_KERNEL
);
649 struct resource
*res
;
654 WARN_ON_ONCE(!is_nvdimm_bus_locked(ndd
->dev
));
655 res
= __request_region(&ndd
->dpa
, start
, n
, name
, 0);
662 * nvdimm_allocated_dpa - sum up the dpa currently allocated to this label_id
663 * @nvdimm: container of dpa-resource-root + labels
664 * @label_id: dpa resource name of the form {pmem|blk}-<human readable uuid>
666 resource_size_t
nvdimm_allocated_dpa(struct nvdimm_drvdata
*ndd
,
667 struct nd_label_id
*label_id
)
669 resource_size_t allocated
= 0;
670 struct resource
*res
;
672 for_each_dpa_resource(ndd
, res
)
673 if (strcmp(res
->name
, label_id
->id
) == 0)
674 allocated
+= resource_size(res
);
679 static int count_dimms(struct device
*dev
, void *c
)
688 int nvdimm_bus_check_dimm_count(struct nvdimm_bus
*nvdimm_bus
, int dimm_count
)
691 /* Flush any possible dimm registration failures */
694 device_for_each_child(&nvdimm_bus
->dev
, &count
, count_dimms
);
695 dev_dbg(&nvdimm_bus
->dev
, "count: %d\n", count
);
696 if (count
!= dimm_count
)
700 EXPORT_SYMBOL_GPL(nvdimm_bus_check_dimm_count
);
702 void __exit
nvdimm_devs_exit(void)
704 ida_destroy(&dimm_ida
);