1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright(c) 2017-2018 Intel Corporation. All rights reserved. */
3 #include <linux/memremap.h>
4 #include <linux/device.h>
5 #include <linux/mutex.h>
6 #include <linux/list.h>
7 #include <linux/slab.h>
10 #include "dax-private.h"
13 static DEFINE_MUTEX(dax_bus_lock
);
16 * All changes to the dax region configuration occur with this lock held
19 DECLARE_RWSEM(dax_region_rwsem
);
22 * All changes to the dax device configuration occur with this lock held
25 DECLARE_RWSEM(dax_dev_rwsem
);
27 #define DAX_NAME_LEN 30
29 struct list_head list
;
30 char dev_name
[DAX_NAME_LEN
];
33 static int dax_bus_uevent(const struct device
*dev
, struct kobj_uevent_env
*env
)
36 * We only ever expect to handle device-dax instances, i.e. the
37 * @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero
39 return add_uevent_var(env
, "MODALIAS=" DAX_DEVICE_MODALIAS_FMT
, 0);
42 #define to_dax_drv(__drv) container_of_const(__drv, struct dax_device_driver, drv)
44 static struct dax_id
*__dax_match_id(const struct dax_device_driver
*dax_drv
,
47 struct dax_id
*dax_id
;
49 lockdep_assert_held(&dax_bus_lock
);
51 list_for_each_entry(dax_id
, &dax_drv
->ids
, list
)
52 if (sysfs_streq(dax_id
->dev_name
, dev_name
))
57 static int dax_match_id(const struct dax_device_driver
*dax_drv
, struct device
*dev
)
61 mutex_lock(&dax_bus_lock
);
62 match
= !!__dax_match_id(dax_drv
, dev_name(dev
));
63 mutex_unlock(&dax_bus_lock
);
68 static int dax_match_type(const struct dax_device_driver
*dax_drv
, struct device
*dev
)
70 enum dax_driver_type type
= DAXDRV_DEVICE_TYPE
;
71 struct dev_dax
*dev_dax
= to_dev_dax(dev
);
73 if (dev_dax
->region
->res
.flags
& IORESOURCE_DAX_KMEM
)
74 type
= DAXDRV_KMEM_TYPE
;
76 if (dax_drv
->type
== type
)
79 /* default to device mode if dax_kmem is disabled */
80 if (dax_drv
->type
== DAXDRV_DEVICE_TYPE
&&
81 !IS_ENABLED(CONFIG_DEV_DAX_KMEM
))
92 static ssize_t
do_id_store(struct device_driver
*drv
, const char *buf
,
93 size_t count
, enum id_action action
)
95 struct dax_device_driver
*dax_drv
= to_dax_drv(drv
);
96 unsigned int region_id
, id
;
97 char devname
[DAX_NAME_LEN
];
98 struct dax_id
*dax_id
;
102 fields
= sscanf(buf
, "dax%d.%d", ®ion_id
, &id
);
105 sprintf(devname
, "dax%d.%d", region_id
, id
);
106 if (!sysfs_streq(buf
, devname
))
109 mutex_lock(&dax_bus_lock
);
110 dax_id
= __dax_match_id(dax_drv
, buf
);
112 if (action
== ID_ADD
) {
113 dax_id
= kzalloc(sizeof(*dax_id
), GFP_KERNEL
);
115 strscpy(dax_id
->dev_name
, buf
, DAX_NAME_LEN
);
116 list_add(&dax_id
->list
, &dax_drv
->ids
);
120 } else if (action
== ID_REMOVE
) {
121 list_del(&dax_id
->list
);
124 mutex_unlock(&dax_bus_lock
);
128 if (action
== ID_ADD
)
129 rc
= driver_attach(drv
);
135 static ssize_t
new_id_store(struct device_driver
*drv
, const char *buf
,
138 return do_id_store(drv
, buf
, count
, ID_ADD
);
140 static DRIVER_ATTR_WO(new_id
);
142 static ssize_t
remove_id_store(struct device_driver
*drv
, const char *buf
,
145 return do_id_store(drv
, buf
, count
, ID_REMOVE
);
147 static DRIVER_ATTR_WO(remove_id
);
149 static struct attribute
*dax_drv_attrs
[] = {
150 &driver_attr_new_id
.attr
,
151 &driver_attr_remove_id
.attr
,
154 ATTRIBUTE_GROUPS(dax_drv
);
156 static int dax_bus_match(struct device
*dev
, const struct device_driver
*drv
);
159 * Static dax regions are regions created by an external subsystem
160 * nvdimm where a single range is assigned. Its boundaries are by the external
161 * subsystem and are usually limited to one physical memory range. For example,
162 * for PMEM it is usually defined by NVDIMM Namespace boundaries (i.e. a
163 * single contiguous range)
165 * On dynamic dax regions, the assigned region can be partitioned by dax core
166 * into multiple subdivisions. A subdivision is represented into one
167 * /dev/daxN.M device composed by one or more potentially discontiguous ranges.
169 * When allocating a dax region, drivers must set whether it's static
170 * (IORESOURCE_DAX_STATIC). On static dax devices, the @pgmap is pre-assigned
171 * to dax core when calling devm_create_dev_dax(), whereas in dynamic dax
172 * devices it is NULL but afterwards allocated by dax core on device ->probe().
173 * Care is needed to make sure that dynamic dax devices are torn down with a
174 * cleared @pgmap field (see kill_dev_dax()).
176 static bool is_static(struct dax_region
*dax_region
)
178 return (dax_region
->res
.flags
& IORESOURCE_DAX_STATIC
) != 0;
181 bool static_dev_dax(struct dev_dax
*dev_dax
)
183 return is_static(dev_dax
->region
);
185 EXPORT_SYMBOL_GPL(static_dev_dax
);
187 static u64
dev_dax_size(struct dev_dax
*dev_dax
)
192 lockdep_assert_held(&dax_dev_rwsem
);
194 for (i
= 0; i
< dev_dax
->nr_range
; i
++)
195 size
+= range_len(&dev_dax
->ranges
[i
].range
);
200 static int dax_bus_probe(struct device
*dev
)
202 struct dax_device_driver
*dax_drv
= to_dax_drv(dev
->driver
);
203 struct dev_dax
*dev_dax
= to_dev_dax(dev
);
204 struct dax_region
*dax_region
= dev_dax
->region
;
208 rc
= down_read_interruptible(&dax_dev_rwsem
);
211 size
= dev_dax_size(dev_dax
);
212 up_read(&dax_dev_rwsem
);
214 if (size
== 0 || dev_dax
->id
< 0)
217 rc
= dax_drv
->probe(dev_dax
);
219 if (rc
|| is_static(dax_region
))
223 * Track new seed creation only after successful probe of the
226 if (dax_region
->seed
== dev
)
227 dax_region
->seed
= NULL
;
232 static void dax_bus_remove(struct device
*dev
)
234 struct dax_device_driver
*dax_drv
= to_dax_drv(dev
->driver
);
235 struct dev_dax
*dev_dax
= to_dev_dax(dev
);
238 dax_drv
->remove(dev_dax
);
241 static const struct bus_type dax_bus_type
= {
243 .uevent
= dax_bus_uevent
,
244 .match
= dax_bus_match
,
245 .probe
= dax_bus_probe
,
246 .remove
= dax_bus_remove
,
247 .drv_groups
= dax_drv_groups
,
250 static int dax_bus_match(struct device
*dev
, const struct device_driver
*drv
)
252 const struct dax_device_driver
*dax_drv
= to_dax_drv(drv
);
254 if (dax_match_id(dax_drv
, dev
))
256 return dax_match_type(dax_drv
, dev
);
260 * Rely on the fact that drvdata is set before the attributes are
261 * registered, and that the attributes are unregistered before drvdata
262 * is cleared to assume that drvdata is always valid.
264 static ssize_t
id_show(struct device
*dev
,
265 struct device_attribute
*attr
, char *buf
)
267 struct dax_region
*dax_region
= dev_get_drvdata(dev
);
269 return sysfs_emit(buf
, "%d\n", dax_region
->id
);
271 static DEVICE_ATTR_RO(id
);
273 static ssize_t
region_size_show(struct device
*dev
,
274 struct device_attribute
*attr
, char *buf
)
276 struct dax_region
*dax_region
= dev_get_drvdata(dev
);
278 return sysfs_emit(buf
, "%llu\n",
279 (unsigned long long)resource_size(&dax_region
->res
));
281 static struct device_attribute dev_attr_region_size
= __ATTR(size
, 0444,
282 region_size_show
, NULL
);
284 static ssize_t
region_align_show(struct device
*dev
,
285 struct device_attribute
*attr
, char *buf
)
287 struct dax_region
*dax_region
= dev_get_drvdata(dev
);
289 return sysfs_emit(buf
, "%u\n", dax_region
->align
);
291 static struct device_attribute dev_attr_region_align
=
292 __ATTR(align
, 0400, region_align_show
, NULL
);
294 #define for_each_dax_region_resource(dax_region, res) \
295 for (res = (dax_region)->res.child; res; res = res->sibling)
297 static unsigned long long dax_region_avail_size(struct dax_region
*dax_region
)
299 resource_size_t size
= resource_size(&dax_region
->res
);
300 struct resource
*res
;
302 lockdep_assert_held(&dax_region_rwsem
);
304 for_each_dax_region_resource(dax_region
, res
)
305 size
-= resource_size(res
);
309 static ssize_t
available_size_show(struct device
*dev
,
310 struct device_attribute
*attr
, char *buf
)
312 struct dax_region
*dax_region
= dev_get_drvdata(dev
);
313 unsigned long long size
;
316 rc
= down_read_interruptible(&dax_region_rwsem
);
319 size
= dax_region_avail_size(dax_region
);
320 up_read(&dax_region_rwsem
);
322 return sysfs_emit(buf
, "%llu\n", size
);
324 static DEVICE_ATTR_RO(available_size
);
326 static ssize_t
seed_show(struct device
*dev
,
327 struct device_attribute
*attr
, char *buf
)
329 struct dax_region
*dax_region
= dev_get_drvdata(dev
);
333 if (is_static(dax_region
))
336 rc
= down_read_interruptible(&dax_region_rwsem
);
339 seed
= dax_region
->seed
;
340 rc
= sysfs_emit(buf
, "%s\n", seed
? dev_name(seed
) : "");
341 up_read(&dax_region_rwsem
);
345 static DEVICE_ATTR_RO(seed
);
347 static ssize_t
create_show(struct device
*dev
,
348 struct device_attribute
*attr
, char *buf
)
350 struct dax_region
*dax_region
= dev_get_drvdata(dev
);
351 struct device
*youngest
;
354 if (is_static(dax_region
))
357 rc
= down_read_interruptible(&dax_region_rwsem
);
360 youngest
= dax_region
->youngest
;
361 rc
= sysfs_emit(buf
, "%s\n", youngest
? dev_name(youngest
) : "");
362 up_read(&dax_region_rwsem
);
367 static struct dev_dax
*__devm_create_dev_dax(struct dev_dax_data
*data
);
369 static ssize_t
create_store(struct device
*dev
, struct device_attribute
*attr
,
370 const char *buf
, size_t len
)
372 struct dax_region
*dax_region
= dev_get_drvdata(dev
);
373 unsigned long long avail
;
377 if (is_static(dax_region
))
380 rc
= kstrtoint(buf
, 0, &val
);
386 rc
= down_write_killable(&dax_region_rwsem
);
389 avail
= dax_region_avail_size(dax_region
);
393 struct dev_dax_data data
= {
394 .dax_region
= dax_region
,
397 .memmap_on_memory
= false,
399 struct dev_dax
*dev_dax
= __devm_create_dev_dax(&data
);
402 rc
= PTR_ERR(dev_dax
);
405 * In support of crafting multiple new devices
406 * simultaneously multiple seeds can be created,
407 * but only the first one that has not been
408 * successfully bound is tracked as the region
411 if (!dax_region
->seed
)
412 dax_region
->seed
= &dev_dax
->dev
;
413 dax_region
->youngest
= &dev_dax
->dev
;
417 up_write(&dax_region_rwsem
);
421 static DEVICE_ATTR_RW(create
);
423 void kill_dev_dax(struct dev_dax
*dev_dax
)
425 struct dax_device
*dax_dev
= dev_dax
->dax_dev
;
426 struct inode
*inode
= dax_inode(dax_dev
);
429 unmap_mapping_range(inode
->i_mapping
, 0, 0, 1);
432 * Dynamic dax region have the pgmap allocated via dev_kzalloc()
433 * and thus freed by devm. Clear the pgmap to not have stale pgmap
434 * ranges on probe() from previous reconfigurations of region devices.
436 if (!static_dev_dax(dev_dax
))
437 dev_dax
->pgmap
= NULL
;
439 EXPORT_SYMBOL_GPL(kill_dev_dax
);
441 static void trim_dev_dax_range(struct dev_dax
*dev_dax
)
443 int i
= dev_dax
->nr_range
- 1;
444 struct range
*range
= &dev_dax
->ranges
[i
].range
;
445 struct dax_region
*dax_region
= dev_dax
->region
;
447 lockdep_assert_held_write(&dax_region_rwsem
);
448 dev_dbg(&dev_dax
->dev
, "delete range[%d]: %#llx:%#llx\n", i
,
449 (unsigned long long)range
->start
,
450 (unsigned long long)range
->end
);
452 __release_region(&dax_region
->res
, range
->start
, range_len(range
));
453 if (--dev_dax
->nr_range
== 0) {
454 kfree(dev_dax
->ranges
);
455 dev_dax
->ranges
= NULL
;
459 static void free_dev_dax_ranges(struct dev_dax
*dev_dax
)
461 while (dev_dax
->nr_range
)
462 trim_dev_dax_range(dev_dax
);
465 static void unregister_dev_dax(void *dev
)
467 struct dev_dax
*dev_dax
= to_dev_dax(dev
);
469 dev_dbg(dev
, "%s\n", __func__
);
471 down_write(&dax_region_rwsem
);
472 kill_dev_dax(dev_dax
);
474 free_dev_dax_ranges(dev_dax
);
476 up_write(&dax_region_rwsem
);
479 static void dax_region_free(struct kref
*kref
)
481 struct dax_region
*dax_region
;
483 dax_region
= container_of(kref
, struct dax_region
, kref
);
487 static void dax_region_put(struct dax_region
*dax_region
)
489 kref_put(&dax_region
->kref
, dax_region_free
);
492 /* a return value >= 0 indicates this invocation invalidated the id */
493 static int __free_dev_dax_id(struct dev_dax
*dev_dax
)
495 struct dax_region
*dax_region
;
496 int rc
= dev_dax
->id
;
498 lockdep_assert_held_write(&dax_dev_rwsem
);
500 if (!dev_dax
->dyn_id
|| dev_dax
->id
< 0)
502 dax_region
= dev_dax
->region
;
503 ida_free(&dax_region
->ida
, dev_dax
->id
);
504 dax_region_put(dax_region
);
509 static int free_dev_dax_id(struct dev_dax
*dev_dax
)
513 rc
= down_write_killable(&dax_dev_rwsem
);
516 rc
= __free_dev_dax_id(dev_dax
);
517 up_write(&dax_dev_rwsem
);
521 static int alloc_dev_dax_id(struct dev_dax
*dev_dax
)
523 struct dax_region
*dax_region
= dev_dax
->region
;
526 id
= ida_alloc(&dax_region
->ida
, GFP_KERNEL
);
529 kref_get(&dax_region
->kref
);
530 dev_dax
->dyn_id
= true;
535 static ssize_t
delete_store(struct device
*dev
, struct device_attribute
*attr
,
536 const char *buf
, size_t len
)
538 struct dax_region
*dax_region
= dev_get_drvdata(dev
);
539 struct dev_dax
*dev_dax
;
540 struct device
*victim
;
544 if (is_static(dax_region
))
547 victim
= device_find_child_by_name(dax_region
->dev
, buf
);
553 dev_dax
= to_dev_dax(victim
);
554 down_write(&dax_dev_rwsem
);
555 if (victim
->driver
|| dev_dax_size(dev_dax
))
559 * Invalidate the device so it does not become active
560 * again, but always preserve device-id-0 so that
561 * /sys/bus/dax/ is guaranteed to be populated while any
562 * dax_region is registered.
564 if (dev_dax
->id
> 0) {
565 do_del
= __free_dev_dax_id(dev_dax
) >= 0;
567 if (dax_region
->seed
== victim
)
568 dax_region
->seed
= NULL
;
569 if (dax_region
->youngest
== victim
)
570 dax_region
->youngest
= NULL
;
574 up_write(&dax_dev_rwsem
);
575 device_unlock(victim
);
577 /* won the race to invalidate the device, clean it up */
579 devm_release_action(dev
, unregister_dev_dax
, victim
);
585 static DEVICE_ATTR_WO(delete);
587 static umode_t
dax_region_visible(struct kobject
*kobj
, struct attribute
*a
,
590 struct device
*dev
= container_of(kobj
, struct device
, kobj
);
591 struct dax_region
*dax_region
= dev_get_drvdata(dev
);
593 if (is_static(dax_region
))
594 if (a
== &dev_attr_available_size
.attr
595 || a
== &dev_attr_create
.attr
596 || a
== &dev_attr_seed
.attr
597 || a
== &dev_attr_delete
.attr
)
602 static struct attribute
*dax_region_attributes
[] = {
603 &dev_attr_available_size
.attr
,
604 &dev_attr_region_size
.attr
,
605 &dev_attr_region_align
.attr
,
606 &dev_attr_create
.attr
,
608 &dev_attr_delete
.attr
,
613 static const struct attribute_group dax_region_attribute_group
= {
614 .name
= "dax_region",
615 .attrs
= dax_region_attributes
,
616 .is_visible
= dax_region_visible
,
619 static const struct attribute_group
*dax_region_attribute_groups
[] = {
620 &dax_region_attribute_group
,
624 static void dax_region_unregister(void *region
)
626 struct dax_region
*dax_region
= region
;
628 sysfs_remove_groups(&dax_region
->dev
->kobj
,
629 dax_region_attribute_groups
);
630 dax_region_put(dax_region
);
633 struct dax_region
*alloc_dax_region(struct device
*parent
, int region_id
,
634 struct range
*range
, int target_node
, unsigned int align
,
637 struct dax_region
*dax_region
;
640 * The DAX core assumes that it can store its private data in
641 * parent->driver_data. This WARN is a reminder / safeguard for
642 * developers of device-dax drivers.
644 if (dev_get_drvdata(parent
)) {
645 dev_WARN(parent
, "dax core failed to setup private data\n");
649 if (!IS_ALIGNED(range
->start
, align
)
650 || !IS_ALIGNED(range_len(range
), align
))
653 dax_region
= kzalloc(sizeof(*dax_region
), GFP_KERNEL
);
657 dev_set_drvdata(parent
, dax_region
);
658 kref_init(&dax_region
->kref
);
659 dax_region
->id
= region_id
;
660 dax_region
->align
= align
;
661 dax_region
->dev
= parent
;
662 dax_region
->target_node
= target_node
;
663 ida_init(&dax_region
->ida
);
664 dax_region
->res
= (struct resource
) {
665 .start
= range
->start
,
667 .flags
= IORESOURCE_MEM
| flags
,
670 if (sysfs_create_groups(&parent
->kobj
, dax_region_attribute_groups
)) {
675 if (devm_add_action_or_reset(parent
, dax_region_unregister
, dax_region
))
679 EXPORT_SYMBOL_GPL(alloc_dax_region
);
681 static void dax_mapping_release(struct device
*dev
)
683 struct dax_mapping
*mapping
= to_dax_mapping(dev
);
684 struct device
*parent
= dev
->parent
;
685 struct dev_dax
*dev_dax
= to_dev_dax(parent
);
687 ida_free(&dev_dax
->ida
, mapping
->id
);
692 static void unregister_dax_mapping(void *data
)
694 struct device
*dev
= data
;
695 struct dax_mapping
*mapping
= to_dax_mapping(dev
);
696 struct dev_dax
*dev_dax
= to_dev_dax(dev
->parent
);
698 dev_dbg(dev
, "%s\n", __func__
);
700 dev_dax
->ranges
[mapping
->range_id
].mapping
= NULL
;
701 mapping
->range_id
= -1;
703 device_unregister(dev
);
706 static struct dev_dax_range
*get_dax_range(struct device
*dev
)
708 struct dax_mapping
*mapping
= to_dax_mapping(dev
);
709 struct dev_dax
*dev_dax
= to_dev_dax(dev
->parent
);
712 rc
= down_write_killable(&dax_region_rwsem
);
715 if (mapping
->range_id
< 0) {
716 up_write(&dax_region_rwsem
);
720 return &dev_dax
->ranges
[mapping
->range_id
];
723 static void put_dax_range(void)
725 up_write(&dax_region_rwsem
);
728 static ssize_t
start_show(struct device
*dev
,
729 struct device_attribute
*attr
, char *buf
)
731 struct dev_dax_range
*dax_range
;
734 dax_range
= get_dax_range(dev
);
737 rc
= sysfs_emit(buf
, "%#llx\n", dax_range
->range
.start
);
742 static DEVICE_ATTR(start
, 0400, start_show
, NULL
);
744 static ssize_t
end_show(struct device
*dev
,
745 struct device_attribute
*attr
, char *buf
)
747 struct dev_dax_range
*dax_range
;
750 dax_range
= get_dax_range(dev
);
753 rc
= sysfs_emit(buf
, "%#llx\n", dax_range
->range
.end
);
758 static DEVICE_ATTR(end
, 0400, end_show
, NULL
);
760 static ssize_t
pgoff_show(struct device
*dev
,
761 struct device_attribute
*attr
, char *buf
)
763 struct dev_dax_range
*dax_range
;
766 dax_range
= get_dax_range(dev
);
769 rc
= sysfs_emit(buf
, "%#lx\n", dax_range
->pgoff
);
774 static DEVICE_ATTR(page_offset
, 0400, pgoff_show
, NULL
);
776 static struct attribute
*dax_mapping_attributes
[] = {
777 &dev_attr_start
.attr
,
779 &dev_attr_page_offset
.attr
,
783 static const struct attribute_group dax_mapping_attribute_group
= {
784 .attrs
= dax_mapping_attributes
,
787 static const struct attribute_group
*dax_mapping_attribute_groups
[] = {
788 &dax_mapping_attribute_group
,
792 static const struct device_type dax_mapping_type
= {
793 .release
= dax_mapping_release
,
794 .groups
= dax_mapping_attribute_groups
,
797 static int devm_register_dax_mapping(struct dev_dax
*dev_dax
, int range_id
)
799 struct dax_region
*dax_region
= dev_dax
->region
;
800 struct dax_mapping
*mapping
;
804 lockdep_assert_held_write(&dax_region_rwsem
);
806 if (dev_WARN_ONCE(&dev_dax
->dev
, !dax_region
->dev
->driver
,
807 "region disabled\n"))
810 mapping
= kzalloc(sizeof(*mapping
), GFP_KERNEL
);
813 mapping
->range_id
= range_id
;
814 mapping
->id
= ida_alloc(&dev_dax
->ida
, GFP_KERNEL
);
815 if (mapping
->id
< 0) {
819 dev_dax
->ranges
[range_id
].mapping
= mapping
;
821 device_initialize(dev
);
822 dev
->parent
= &dev_dax
->dev
;
823 get_device(dev
->parent
);
824 dev
->type
= &dax_mapping_type
;
825 dev_set_name(dev
, "mapping%d", mapping
->id
);
826 rc
= device_add(dev
);
832 rc
= devm_add_action_or_reset(dax_region
->dev
, unregister_dax_mapping
,
839 static int alloc_dev_dax_range(struct dev_dax
*dev_dax
, u64 start
,
840 resource_size_t size
)
842 struct dax_region
*dax_region
= dev_dax
->region
;
843 struct resource
*res
= &dax_region
->res
;
844 struct device
*dev
= &dev_dax
->dev
;
845 struct dev_dax_range
*ranges
;
846 unsigned long pgoff
= 0;
847 struct resource
*alloc
;
850 lockdep_assert_held_write(&dax_region_rwsem
);
852 /* handle the seed alloc special case */
854 if (dev_WARN_ONCE(dev
, dev_dax
->nr_range
,
855 "0-size allocation must be first\n"))
857 /* nr_range == 0 is elsewhere special cased as 0-size device */
861 alloc
= __request_region(res
, start
, size
, dev_name(dev
), 0);
865 ranges
= krealloc(dev_dax
->ranges
, sizeof(*ranges
)
866 * (dev_dax
->nr_range
+ 1), GFP_KERNEL
);
868 __release_region(res
, alloc
->start
, resource_size(alloc
));
872 for (i
= 0; i
< dev_dax
->nr_range
; i
++)
873 pgoff
+= PHYS_PFN(range_len(&ranges
[i
].range
));
874 dev_dax
->ranges
= ranges
;
875 ranges
[dev_dax
->nr_range
++] = (struct dev_dax_range
) {
878 .start
= alloc
->start
,
883 dev_dbg(dev
, "alloc range[%d]: %pa:%pa\n", dev_dax
->nr_range
- 1,
884 &alloc
->start
, &alloc
->end
);
886 * A dev_dax instance must be registered before mapping device
887 * children can be added. Defer to devm_create_dev_dax() to add
888 * the initial mapping device.
890 if (!device_is_registered(&dev_dax
->dev
))
893 rc
= devm_register_dax_mapping(dev_dax
, dev_dax
->nr_range
- 1);
895 trim_dev_dax_range(dev_dax
);
900 static int adjust_dev_dax_range(struct dev_dax
*dev_dax
, struct resource
*res
, resource_size_t size
)
902 int last_range
= dev_dax
->nr_range
- 1;
903 struct dev_dax_range
*dax_range
= &dev_dax
->ranges
[last_range
];
904 bool is_shrink
= resource_size(res
) > size
;
905 struct range
*range
= &dax_range
->range
;
906 struct device
*dev
= &dev_dax
->dev
;
909 lockdep_assert_held_write(&dax_region_rwsem
);
911 if (dev_WARN_ONCE(dev
, !size
, "deletion is handled by dev_dax_shrink\n"))
914 rc
= adjust_resource(res
, range
->start
, size
);
918 *range
= (struct range
) {
919 .start
= range
->start
,
920 .end
= range
->start
+ size
- 1,
923 dev_dbg(dev
, "%s range[%d]: %#llx:%#llx\n", is_shrink
? "shrink" : "extend",
924 last_range
, (unsigned long long) range
->start
,
925 (unsigned long long) range
->end
);
930 static ssize_t
size_show(struct device
*dev
,
931 struct device_attribute
*attr
, char *buf
)
933 struct dev_dax
*dev_dax
= to_dev_dax(dev
);
934 unsigned long long size
;
937 rc
= down_read_interruptible(&dax_dev_rwsem
);
940 size
= dev_dax_size(dev_dax
);
941 up_read(&dax_dev_rwsem
);
943 return sysfs_emit(buf
, "%llu\n", size
);
946 static bool alloc_is_aligned(struct dev_dax
*dev_dax
, resource_size_t size
)
949 * The minimum mapping granularity for a device instance is a
950 * single subsection, unless the arch says otherwise.
952 return IS_ALIGNED(size
, max_t(unsigned long, dev_dax
->align
, memremap_compat_align()));
955 static int dev_dax_shrink(struct dev_dax
*dev_dax
, resource_size_t size
)
957 resource_size_t to_shrink
= dev_dax_size(dev_dax
) - size
;
958 struct dax_region
*dax_region
= dev_dax
->region
;
959 struct device
*dev
= &dev_dax
->dev
;
962 for (i
= dev_dax
->nr_range
- 1; i
>= 0; i
--) {
963 struct range
*range
= &dev_dax
->ranges
[i
].range
;
964 struct dax_mapping
*mapping
= dev_dax
->ranges
[i
].mapping
;
965 struct resource
*adjust
= NULL
, *res
;
966 resource_size_t shrink
;
968 shrink
= min_t(u64
, to_shrink
, range_len(range
));
969 if (shrink
>= range_len(range
)) {
970 devm_release_action(dax_region
->dev
,
971 unregister_dax_mapping
, &mapping
->dev
);
972 trim_dev_dax_range(dev_dax
);
979 for_each_dax_region_resource(dax_region
, res
)
980 if (strcmp(res
->name
, dev_name(dev
)) == 0
981 && res
->start
== range
->start
) {
986 if (dev_WARN_ONCE(dev
, !adjust
|| i
!= dev_dax
->nr_range
- 1,
987 "failed to find matching resource\n"))
989 return adjust_dev_dax_range(dev_dax
, adjust
, range_len(range
)
996 * Only allow adjustments that preserve the relative pgoff of existing
997 * allocations. I.e. the dev_dax->ranges array is ordered by increasing pgoff.
999 static bool adjust_ok(struct dev_dax
*dev_dax
, struct resource
*res
)
1001 struct dev_dax_range
*last
;
1004 if (dev_dax
->nr_range
== 0)
1006 if (strcmp(res
->name
, dev_name(&dev_dax
->dev
)) != 0)
1008 last
= &dev_dax
->ranges
[dev_dax
->nr_range
- 1];
1009 if (last
->range
.start
!= res
->start
|| last
->range
.end
!= res
->end
)
1011 for (i
= 0; i
< dev_dax
->nr_range
- 1; i
++) {
1012 struct dev_dax_range
*dax_range
= &dev_dax
->ranges
[i
];
1014 if (dax_range
->pgoff
> last
->pgoff
)
1021 static ssize_t
dev_dax_resize(struct dax_region
*dax_region
,
1022 struct dev_dax
*dev_dax
, resource_size_t size
)
1024 resource_size_t avail
= dax_region_avail_size(dax_region
), to_alloc
;
1025 resource_size_t dev_size
= dev_dax_size(dev_dax
);
1026 struct resource
*region_res
= &dax_region
->res
;
1027 struct device
*dev
= &dev_dax
->dev
;
1028 struct resource
*res
, *first
;
1029 resource_size_t alloc
= 0;
1034 if (size
== dev_size
)
1036 if (size
> dev_size
&& size
- dev_size
> avail
)
1038 if (size
< dev_size
)
1039 return dev_dax_shrink(dev_dax
, size
);
1041 to_alloc
= size
- dev_size
;
1042 if (dev_WARN_ONCE(dev
, !alloc_is_aligned(dev_dax
, to_alloc
),
1043 "resize of %pa misaligned\n", &to_alloc
))
1047 * Expand the device into the unused portion of the region. This
1048 * may involve adjusting the end of an existing resource, or
1049 * allocating a new resource.
1052 first
= region_res
->child
;
1054 return alloc_dev_dax_range(dev_dax
, dax_region
->res
.start
, to_alloc
);
1057 for (res
= first
; res
; res
= res
->sibling
) {
1058 struct resource
*next
= res
->sibling
;
1060 /* space at the beginning of the region */
1061 if (res
== first
&& res
->start
> dax_region
->res
.start
) {
1062 alloc
= min(res
->start
- dax_region
->res
.start
, to_alloc
);
1063 rc
= alloc_dev_dax_range(dev_dax
, dax_region
->res
.start
, alloc
);
1068 /* space between allocations */
1069 if (next
&& next
->start
> res
->end
+ 1)
1070 alloc
= min(next
->start
- (res
->end
+ 1), to_alloc
);
1072 /* space at the end of the region */
1073 if (!alloc
&& !next
&& res
->end
< region_res
->end
)
1074 alloc
= min(region_res
->end
- res
->end
, to_alloc
);
1079 if (adjust_ok(dev_dax
, res
)) {
1080 rc
= adjust_dev_dax_range(dev_dax
, res
, resource_size(res
) + alloc
);
1083 rc
= alloc_dev_dax_range(dev_dax
, res
->end
+ 1, alloc
);
1094 static ssize_t
size_store(struct device
*dev
, struct device_attribute
*attr
,
1095 const char *buf
, size_t len
)
1098 unsigned long long val
;
1099 struct dev_dax
*dev_dax
= to_dev_dax(dev
);
1100 struct dax_region
*dax_region
= dev_dax
->region
;
1102 rc
= kstrtoull(buf
, 0, &val
);
1106 if (!alloc_is_aligned(dev_dax
, val
)) {
1107 dev_dbg(dev
, "%s: size: %lld misaligned\n", __func__
, val
);
1111 rc
= down_write_killable(&dax_region_rwsem
);
1114 if (!dax_region
->dev
->driver
) {
1118 rc
= down_write_killable(&dax_dev_rwsem
);
1122 rc
= dev_dax_resize(dax_region
, dev_dax
, val
);
1125 up_write(&dax_dev_rwsem
);
1127 up_write(&dax_region_rwsem
);
1133 static DEVICE_ATTR_RW(size
);
1135 static ssize_t
range_parse(const char *opt
, size_t len
, struct range
*range
)
1137 unsigned long long addr
= 0;
1138 char *start
, *end
, *str
;
1139 ssize_t rc
= -EINVAL
;
1141 str
= kstrdup(opt
, GFP_KERNEL
);
1146 start
= strsep(&end
, "-");
1150 rc
= kstrtoull(start
, 16, &addr
);
1153 range
->start
= addr
;
1155 rc
= kstrtoull(end
, 16, &addr
);
1165 static ssize_t
mapping_store(struct device
*dev
, struct device_attribute
*attr
,
1166 const char *buf
, size_t len
)
1168 struct dev_dax
*dev_dax
= to_dev_dax(dev
);
1169 struct dax_region
*dax_region
= dev_dax
->region
;
1174 rc
= range_parse(buf
, len
, &r
);
1178 rc
= down_write_killable(&dax_region_rwsem
);
1181 if (!dax_region
->dev
->driver
) {
1182 up_write(&dax_region_rwsem
);
1185 rc
= down_write_killable(&dax_dev_rwsem
);
1187 up_write(&dax_region_rwsem
);
1191 to_alloc
= range_len(&r
);
1192 if (alloc_is_aligned(dev_dax
, to_alloc
))
1193 rc
= alloc_dev_dax_range(dev_dax
, r
.start
, to_alloc
);
1194 up_write(&dax_dev_rwsem
);
1195 up_write(&dax_region_rwsem
);
1197 return rc
== 0 ? len
: rc
;
1199 static DEVICE_ATTR_WO(mapping
);
1201 static ssize_t
align_show(struct device
*dev
,
1202 struct device_attribute
*attr
, char *buf
)
1204 struct dev_dax
*dev_dax
= to_dev_dax(dev
);
1206 return sysfs_emit(buf
, "%d\n", dev_dax
->align
);
1209 static ssize_t
dev_dax_validate_align(struct dev_dax
*dev_dax
)
1211 struct device
*dev
= &dev_dax
->dev
;
1214 for (i
= 0; i
< dev_dax
->nr_range
; i
++) {
1215 size_t len
= range_len(&dev_dax
->ranges
[i
].range
);
1217 if (!alloc_is_aligned(dev_dax
, len
)) {
1218 dev_dbg(dev
, "%s: align %u invalid for range %d\n",
1219 __func__
, dev_dax
->align
, i
);
1227 static ssize_t
align_store(struct device
*dev
, struct device_attribute
*attr
,
1228 const char *buf
, size_t len
)
1230 struct dev_dax
*dev_dax
= to_dev_dax(dev
);
1231 struct dax_region
*dax_region
= dev_dax
->region
;
1232 unsigned long val
, align_save
;
1235 rc
= kstrtoul(buf
, 0, &val
);
1239 if (!dax_align_valid(val
))
1242 rc
= down_write_killable(&dax_region_rwsem
);
1245 if (!dax_region
->dev
->driver
) {
1246 up_write(&dax_region_rwsem
);
1250 rc
= down_write_killable(&dax_dev_rwsem
);
1252 up_write(&dax_region_rwsem
);
1260 align_save
= dev_dax
->align
;
1261 dev_dax
->align
= val
;
1262 rc
= dev_dax_validate_align(dev_dax
);
1264 dev_dax
->align
= align_save
;
1266 up_write(&dax_dev_rwsem
);
1267 up_write(&dax_region_rwsem
);
1268 return rc
== 0 ? len
: rc
;
1270 static DEVICE_ATTR_RW(align
);
1272 static int dev_dax_target_node(struct dev_dax
*dev_dax
)
1274 struct dax_region
*dax_region
= dev_dax
->region
;
1276 return dax_region
->target_node
;
1279 static ssize_t
target_node_show(struct device
*dev
,
1280 struct device_attribute
*attr
, char *buf
)
1282 struct dev_dax
*dev_dax
= to_dev_dax(dev
);
1284 return sysfs_emit(buf
, "%d\n", dev_dax_target_node(dev_dax
));
1286 static DEVICE_ATTR_RO(target_node
);
1288 static ssize_t
resource_show(struct device
*dev
,
1289 struct device_attribute
*attr
, char *buf
)
1291 struct dev_dax
*dev_dax
= to_dev_dax(dev
);
1292 struct dax_region
*dax_region
= dev_dax
->region
;
1293 unsigned long long start
;
1295 if (dev_dax
->nr_range
< 1)
1296 start
= dax_region
->res
.start
;
1298 start
= dev_dax
->ranges
[0].range
.start
;
1300 return sysfs_emit(buf
, "%#llx\n", start
);
1302 static DEVICE_ATTR(resource
, 0400, resource_show
, NULL
);
1304 static ssize_t
modalias_show(struct device
*dev
, struct device_attribute
*attr
,
1308 * We only ever expect to handle device-dax instances, i.e. the
1309 * @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero
1311 return sysfs_emit(buf
, DAX_DEVICE_MODALIAS_FMT
"\n", 0);
1313 static DEVICE_ATTR_RO(modalias
);
1315 static ssize_t
numa_node_show(struct device
*dev
,
1316 struct device_attribute
*attr
, char *buf
)
1318 return sysfs_emit(buf
, "%d\n", dev_to_node(dev
));
1320 static DEVICE_ATTR_RO(numa_node
);
1322 static ssize_t
memmap_on_memory_show(struct device
*dev
,
1323 struct device_attribute
*attr
, char *buf
)
1325 struct dev_dax
*dev_dax
= to_dev_dax(dev
);
1327 return sysfs_emit(buf
, "%d\n", dev_dax
->memmap_on_memory
);
1330 static ssize_t
memmap_on_memory_store(struct device
*dev
,
1331 struct device_attribute
*attr
,
1332 const char *buf
, size_t len
)
1334 struct dev_dax
*dev_dax
= to_dev_dax(dev
);
1338 rc
= kstrtobool(buf
, &val
);
1342 if (val
== true && !mhp_supports_memmap_on_memory()) {
1343 dev_dbg(dev
, "memmap_on_memory is not available\n");
1347 rc
= down_write_killable(&dax_dev_rwsem
);
1351 if (dev_dax
->memmap_on_memory
!= val
&& dev
->driver
&&
1352 to_dax_drv(dev
->driver
)->type
== DAXDRV_KMEM_TYPE
) {
1353 up_write(&dax_dev_rwsem
);
1357 dev_dax
->memmap_on_memory
= val
;
1358 up_write(&dax_dev_rwsem
);
1362 static DEVICE_ATTR_RW(memmap_on_memory
);
1364 static umode_t
dev_dax_visible(struct kobject
*kobj
, struct attribute
*a
, int n
)
1366 struct device
*dev
= container_of(kobj
, struct device
, kobj
);
1367 struct dev_dax
*dev_dax
= to_dev_dax(dev
);
1368 struct dax_region
*dax_region
= dev_dax
->region
;
1370 if (a
== &dev_attr_target_node
.attr
&& dev_dax_target_node(dev_dax
) < 0)
1372 if (a
== &dev_attr_numa_node
.attr
&& !IS_ENABLED(CONFIG_NUMA
))
1374 if (a
== &dev_attr_mapping
.attr
&& is_static(dax_region
))
1376 if ((a
== &dev_attr_align
.attr
||
1377 a
== &dev_attr_size
.attr
) && is_static(dax_region
))
1382 static struct attribute
*dev_dax_attributes
[] = {
1383 &dev_attr_modalias
.attr
,
1384 &dev_attr_size
.attr
,
1385 &dev_attr_mapping
.attr
,
1386 &dev_attr_target_node
.attr
,
1387 &dev_attr_align
.attr
,
1388 &dev_attr_resource
.attr
,
1389 &dev_attr_numa_node
.attr
,
1390 &dev_attr_memmap_on_memory
.attr
,
1394 static const struct attribute_group dev_dax_attribute_group
= {
1395 .attrs
= dev_dax_attributes
,
1396 .is_visible
= dev_dax_visible
,
1399 static const struct attribute_group
*dax_attribute_groups
[] = {
1400 &dev_dax_attribute_group
,
1404 static void dev_dax_release(struct device
*dev
)
1406 struct dev_dax
*dev_dax
= to_dev_dax(dev
);
1407 struct dax_device
*dax_dev
= dev_dax
->dax_dev
;
1410 free_dev_dax_id(dev_dax
);
1411 kfree(dev_dax
->pgmap
);
1415 static const struct device_type dev_dax_type
= {
1416 .release
= dev_dax_release
,
1417 .groups
= dax_attribute_groups
,
1420 static struct dev_dax
*__devm_create_dev_dax(struct dev_dax_data
*data
)
1422 struct dax_region
*dax_region
= data
->dax_region
;
1423 struct device
*parent
= dax_region
->dev
;
1424 struct dax_device
*dax_dev
;
1425 struct dev_dax
*dev_dax
;
1426 struct inode
*inode
;
1430 dev_dax
= kzalloc(sizeof(*dev_dax
), GFP_KERNEL
);
1432 return ERR_PTR(-ENOMEM
);
1434 dev_dax
->region
= dax_region
;
1435 if (is_static(dax_region
)) {
1436 if (dev_WARN_ONCE(parent
, data
->id
< 0,
1437 "dynamic id specified to static region\n")) {
1442 dev_dax
->id
= data
->id
;
1444 if (dev_WARN_ONCE(parent
, data
->id
>= 0,
1445 "static id specified to dynamic region\n")) {
1450 rc
= alloc_dev_dax_id(dev_dax
);
1455 dev
= &dev_dax
->dev
;
1456 device_initialize(dev
);
1457 dev_set_name(dev
, "dax%d.%d", dax_region
->id
, dev_dax
->id
);
1459 rc
= alloc_dev_dax_range(dev_dax
, dax_region
->res
.start
, data
->size
);
1464 dev_WARN_ONCE(parent
, !is_static(dax_region
),
1465 "custom dev_pagemap requires a static dax_region\n");
1467 dev_dax
->pgmap
= kmemdup(data
->pgmap
,
1468 sizeof(struct dev_pagemap
), GFP_KERNEL
);
1469 if (!dev_dax
->pgmap
) {
1476 * No dax_operations since there is no access to this device outside of
1477 * mmap of the resulting character device.
1479 dax_dev
= alloc_dax(dev_dax
, NULL
);
1480 if (IS_ERR(dax_dev
)) {
1481 rc
= PTR_ERR(dax_dev
);
1484 set_dax_synchronous(dax_dev
);
1485 set_dax_nocache(dax_dev
);
1486 set_dax_nomc(dax_dev
);
1488 /* a device_dax instance is dead while the driver is not attached */
1491 dev_dax
->dax_dev
= dax_dev
;
1492 dev_dax
->target_node
= dax_region
->target_node
;
1493 dev_dax
->align
= dax_region
->align
;
1494 ida_init(&dev_dax
->ida
);
1496 dev_dax
->memmap_on_memory
= data
->memmap_on_memory
;
1498 inode
= dax_inode(dax_dev
);
1499 dev
->devt
= inode
->i_rdev
;
1500 dev
->bus
= &dax_bus_type
;
1501 dev
->parent
= parent
;
1502 dev
->type
= &dev_dax_type
;
1504 rc
= device_add(dev
);
1506 kill_dev_dax(dev_dax
);
1511 rc
= devm_add_action_or_reset(dax_region
->dev
, unregister_dev_dax
, dev
);
1515 /* register mapping device for the initial allocation range */
1516 if (dev_dax
->nr_range
&& range_len(&dev_dax
->ranges
[0].range
)) {
1517 rc
= devm_register_dax_mapping(dev_dax
, 0);
1525 kfree(dev_dax
->pgmap
);
1527 free_dev_dax_ranges(dev_dax
);
1529 free_dev_dax_id(dev_dax
);
1536 struct dev_dax
*devm_create_dev_dax(struct dev_dax_data
*data
)
1538 struct dev_dax
*dev_dax
;
1540 down_write(&dax_region_rwsem
);
1541 dev_dax
= __devm_create_dev_dax(data
);
1542 up_write(&dax_region_rwsem
);
1546 EXPORT_SYMBOL_GPL(devm_create_dev_dax
);
1548 int __dax_driver_register(struct dax_device_driver
*dax_drv
,
1549 struct module
*module
, const char *mod_name
)
1551 struct device_driver
*drv
= &dax_drv
->drv
;
1554 * dax_bus_probe() calls dax_drv->probe() unconditionally.
1555 * So better be safe than sorry and ensure it is provided.
1557 if (!dax_drv
->probe
)
1560 INIT_LIST_HEAD(&dax_drv
->ids
);
1561 drv
->owner
= module
;
1562 drv
->name
= mod_name
;
1563 drv
->mod_name
= mod_name
;
1564 drv
->bus
= &dax_bus_type
;
1566 return driver_register(drv
);
1568 EXPORT_SYMBOL_GPL(__dax_driver_register
);
1570 void dax_driver_unregister(struct dax_device_driver
*dax_drv
)
1572 struct device_driver
*drv
= &dax_drv
->drv
;
1573 struct dax_id
*dax_id
, *_id
;
1575 mutex_lock(&dax_bus_lock
);
1576 list_for_each_entry_safe(dax_id
, _id
, &dax_drv
->ids
, list
) {
1577 list_del(&dax_id
->list
);
1580 mutex_unlock(&dax_bus_lock
);
1581 driver_unregister(drv
);
1583 EXPORT_SYMBOL_GPL(dax_driver_unregister
);
1585 int __init
dax_bus_init(void)
1587 return bus_register(&dax_bus_type
);
1590 void __exit
dax_bus_exit(void)
1592 bus_unregister(&dax_bus_type
);