From 400188ae361a9d9a72a47a6cedaf2d2efcc84aa8 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 13 Feb 2025 15:50:18 +0100 Subject: [PATCH 01/42] kernfs: Acquire kernfs_rwsem in kernfs_notify_workfn(). kernfs_notify_workfn() dereferences kernfs_node::name and passes it later to fsnotify(). If the node is renamed then the previously observed name pointer becomes invalid. Acquire kernfs_root::kernfs_rwsem to block renames of the node. Acked-by: Tejun Heo Signed-off-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/20250213145023.2820193-2-bigeasy@linutronix.de Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/file.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index 0eb320617d7b1..c4ffa8dc89ebc 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -911,6 +911,7 @@ static void kernfs_notify_workfn(struct work_struct *work) /* kick fsnotify */ down_read(&root->kernfs_supers_rwsem); + down_read(&root->kernfs_rwsem); list_for_each_entry(info, &kernfs_root(kn)->supers, node) { struct kernfs_node *parent; struct inode *p_inode = NULL; @@ -947,6 +948,7 @@ static void kernfs_notify_workfn(struct work_struct *work) iput(inode); } + up_read(&root->kernfs_rwsem); up_read(&root->kernfs_supers_rwsem); kernfs_put(kn); goto repeat; From 122ab92dee80582c39740609a627198dd5b6b595 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 13 Feb 2025 15:50:19 +0100 Subject: [PATCH 02/42] kernfs: Acquire kernfs_rwsem in kernfs_get_parent_dentry(). kernfs_get_parent_dentry() passes kernfs_node::parent to kernfs_get_inode(). Acquire kernfs_root::kernfs_rwsem to ensure kernfs_node::parent isn't replaced during the operation. Acked-by: Tejun Heo Signed-off-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/20250213145023.2820193-3-bigeasy@linutronix.de Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/mount.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index 1358c21837f1a..b9b16e97bff18 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -145,7 +145,9 @@ static struct dentry *kernfs_fh_to_parent(struct super_block *sb, static struct dentry *kernfs_get_parent_dentry(struct dentry *child) { struct kernfs_node *kn = kernfs_dentry_node(child); + struct kernfs_root *root = kernfs_root(kn); + guard(rwsem_read)(&root->kernfs_rwsem); return d_obtain_alias(kernfs_get_inode(child->d_sb, kn->parent)); } From 5b2fabf7fe8f745ff214ff003e6067b64f172271 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 13 Feb 2025 15:50:20 +0100 Subject: [PATCH 03/42] kernfs: Acquire kernfs_rwsem in kernfs_node_dentry(). kernfs_node_dentry() passes kernfs_node::name to lookup_positive_unlocked(). Acquire kernfs_root::kernfs_rwsem to ensure the node is not renamed during the operation. Acked-by: Tejun Heo Signed-off-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/20250213145023.2820193-4-bigeasy@linutronix.de Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/mount.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index b9b16e97bff18..4a0ff08d589ca 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -209,6 +209,7 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn, { struct dentry *dentry; struct kernfs_node *knparent; + struct kernfs_root *root; BUG_ON(sb->s_op != &kernfs_sops); @@ -218,6 +219,9 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn, if (!kn->parent) return dentry; + root = kernfs_root(kn); + guard(rwsem_read)(&root->kernfs_rwsem); + knparent = find_next_ancestor(kn, NULL); if (WARN_ON(!knparent)) { dput(dentry); From 9aab10a0249eab4ec77c6a5e4f66442610c12a09 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 13 Feb 2025 15:50:21 +0100 Subject: [PATCH 04/42] kernfs: Don't re-lock kernfs_root::kernfs_rwsem in kernfs_fop_readdir(). The readdir operation iterates over all entries and invokes dir_emit() for every entry passing kernfs_node::name as argument. Since the name argument can change, and become invalid, the kernfs_root::kernfs_rwsem lock should not be dropped to prevent renames during the operation. The lock drop around dir_emit() has been initially introduced in commit 1e5289c97bba2 ("sysfs: Cache the last sysfs_dirent to improve readdir scalability v2") to avoid holding a global lock during a page fault. The lock drop is wrong since the support of renames and not a big burden since the lock is no longer global. Don't re-acquire kernfs_root::kernfs_rwsem while copying the name to the userpace buffer. Acked-by: Tejun Heo Signed-off-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/20250213145023.2820193-5-bigeasy@linutronix.de Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 5f0f8b95f44c0..43fbada678381 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -1869,10 +1869,10 @@ static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx) file->private_data = pos; kernfs_get(pos); - up_read(&root->kernfs_rwsem); - if (!dir_emit(ctx, name, len, ino, type)) + if (!dir_emit(ctx, name, len, ino, type)) { + up_read(&root->kernfs_rwsem); return 0; - down_read(&root->kernfs_rwsem); + } } up_read(&root->kernfs_rwsem); file->private_data = NULL; From 633488947ef66b194377411322dc9e12aab79b65 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 13 Feb 2025 15:50:22 +0100 Subject: [PATCH 05/42] kernfs: Use RCU to access kernfs_node::parent. kernfs_rename_lock is used to obtain stable kernfs_node::{name|parent} pointer. This is a preparation to access kernfs_node::parent under RCU and ensure that the pointer remains stable under the RCU lifetime guarantees. For a complete path, as it is done in kernfs_path_from_node(), the kernfs_rename_lock is still required in order to obtain a stable parent relationship while computing the relevant node depth. This must not change while the nodes are inspected in order to build the path. If the kernfs user never moves the nodes (changes the parent) then the kernfs_rename_lock is not required and the RCU guarantees are sufficient. This "restriction" can be set with KERNFS_ROOT_INVARIANT_PARENT. Otherwise the lock is required. Rename kernfs_node::parent to kernfs_node::__parent to denote the RCU access and use RCU accessor while accessing the node. Make cgroup use KERNFS_ROOT_INVARIANT_PARENT since the parent here can not change. Acked-by: Tejun Heo Cc: Yonghong Song Signed-off-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/20250213145023.2820193-6-bigeasy@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 65 +++++++++---- fs/kernfs/dir.c | 96 ++++++++++++------- fs/kernfs/kernfs-internal.h | 32 ++++++- fs/kernfs/mount.c | 10 +- fs/kernfs/symlink.c | 23 ++--- fs/sysfs/file.c | 24 +++-- include/linux/kernfs.h | 10 +- kernel/cgroup/cgroup-v1.c | 2 +- kernel/cgroup/cgroup.c | 24 ++++- .../selftests/bpf/progs/profiler.inc.h | 2 +- 10 files changed, 195 insertions(+), 93 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 6419e04d8a7b2..55dcdeea1a1b4 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -956,10 +956,20 @@ static int rdt_last_cmd_status_show(struct kernfs_open_file *of, return 0; } +static void *rdt_kn_parent_priv(struct kernfs_node *kn) +{ + /* + * The parent pointer is only valid within RCU section since it can be + * replaced. + */ + guard(rcu)(); + return rcu_dereference(kn->__parent)->priv; +} + static int rdt_num_closids_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct resctrl_schema *s = of->kn->parent->priv; + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); seq_printf(seq, "%u\n", s->num_closid); return 0; @@ -968,7 +978,7 @@ static int rdt_num_closids_show(struct kernfs_open_file *of, static int rdt_default_ctrl_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct resctrl_schema *s = of->kn->parent->priv; + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); struct rdt_resource *r = s->res; seq_printf(seq, "%x\n", r->default_ctrl); @@ -978,7 +988,7 @@ static int rdt_default_ctrl_show(struct kernfs_open_file *of, static int rdt_min_cbm_bits_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct resctrl_schema *s = of->kn->parent->priv; + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); struct rdt_resource *r = s->res; seq_printf(seq, "%u\n", r->cache.min_cbm_bits); @@ -988,7 +998,7 @@ static int rdt_min_cbm_bits_show(struct kernfs_open_file *of, static int rdt_shareable_bits_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct resctrl_schema *s = of->kn->parent->priv; + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); struct rdt_resource *r = s->res; seq_printf(seq, "%x\n", r->cache.shareable_bits); @@ -1012,7 +1022,7 @@ static int rdt_shareable_bits_show(struct kernfs_open_file *of, static int rdt_bit_usage_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct resctrl_schema *s = of->kn->parent->priv; + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); /* * Use unsigned long even though only 32 bits are used to ensure * test_bit() is used safely. @@ -1094,7 +1104,7 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of, static int rdt_min_bw_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct resctrl_schema *s = of->kn->parent->priv; + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); struct rdt_resource *r = s->res; seq_printf(seq, "%u\n", r->membw.min_bw); @@ -1104,7 +1114,7 @@ static int rdt_min_bw_show(struct kernfs_open_file *of, static int rdt_num_rmids_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); seq_printf(seq, "%d\n", r->num_rmid); @@ -1114,7 +1124,7 @@ static int rdt_num_rmids_show(struct kernfs_open_file *of, static int rdt_mon_features_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); struct mon_evt *mevt; list_for_each_entry(mevt, &r->evt_list, list) { @@ -1129,7 +1139,7 @@ static int rdt_mon_features_show(struct kernfs_open_file *of, static int rdt_bw_gran_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct resctrl_schema *s = of->kn->parent->priv; + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); struct rdt_resource *r = s->res; seq_printf(seq, "%u\n", r->membw.bw_gran); @@ -1139,7 +1149,7 @@ static int rdt_bw_gran_show(struct kernfs_open_file *of, static int rdt_delay_linear_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct resctrl_schema *s = of->kn->parent->priv; + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); struct rdt_resource *r = s->res; seq_printf(seq, "%u\n", r->membw.delay_linear); @@ -1157,7 +1167,7 @@ static int max_threshold_occ_show(struct kernfs_open_file *of, static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct resctrl_schema *s = of->kn->parent->priv; + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); struct rdt_resource *r = s->res; if (r->membw.throttle_mode == THREAD_THROTTLE_PER_THREAD) @@ -1222,7 +1232,7 @@ static enum resctrl_conf_type resctrl_peer_type(enum resctrl_conf_type my_type) static int rdt_has_sparse_bitmasks_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct resctrl_schema *s = of->kn->parent->priv; + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); struct rdt_resource *r = s->res; seq_printf(seq, "%u\n", r->cache.arch_has_sparse_bitmasks); @@ -1634,7 +1644,7 @@ static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid static int mbm_total_bytes_config_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); mbm_config_show(seq, r, QOS_L3_MBM_TOTAL_EVENT_ID); @@ -1644,7 +1654,7 @@ static int mbm_total_bytes_config_show(struct kernfs_open_file *of, static int mbm_local_bytes_config_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); mbm_config_show(seq, r, QOS_L3_MBM_LOCAL_EVENT_ID); @@ -1750,7 +1760,7 @@ static ssize_t mbm_total_bytes_config_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { - struct rdt_resource *r = of->kn->parent->priv; + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); int ret; /* Valid input requires a trailing newline */ @@ -1776,7 +1786,7 @@ static ssize_t mbm_local_bytes_config_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { - struct rdt_resource *r = of->kn->parent->priv; + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); int ret; /* Valid input requires a trailing newline */ @@ -2440,12 +2450,13 @@ static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn) * resource. "info" and its subdirectories don't * have rdtgroup structures, so return NULL here. */ - if (kn == kn_info || kn->parent == kn_info) + if (kn == kn_info || + rcu_access_pointer(kn->__parent) == kn_info) return NULL; else return kn->priv; } else { - return kn->parent->priv; + return rdt_kn_parent_priv(kn); } } @@ -3771,9 +3782,18 @@ static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask) return 0; } +static struct kernfs_node *rdt_kn_parent(struct kernfs_node *kn) +{ + /* + * Valid within the RCU section it was obtained or while rdtgroup_mutex + * is held. + */ + return rcu_dereference_check(kn->__parent, lockdep_is_held(&rdtgroup_mutex)); +} + static int rdtgroup_rmdir(struct kernfs_node *kn) { - struct kernfs_node *parent_kn = kn->parent; + struct kernfs_node *parent_kn; struct rdtgroup *rdtgrp; cpumask_var_t tmpmask; int ret = 0; @@ -3786,6 +3806,7 @@ static int rdtgroup_rmdir(struct kernfs_node *kn) ret = -EPERM; goto out; } + parent_kn = rdt_kn_parent(kn); /* * If the rdtgroup is a ctrl_mon group and parent directory @@ -3854,6 +3875,7 @@ static void mongrp_reparent(struct rdtgroup *rdtgrp, static int rdtgroup_rename(struct kernfs_node *kn, struct kernfs_node *new_parent, const char *new_name) { + struct kernfs_node *kn_parent; struct rdtgroup *new_prdtgrp; struct rdtgroup *rdtgrp; cpumask_var_t tmpmask; @@ -3888,8 +3910,9 @@ static int rdtgroup_rename(struct kernfs_node *kn, goto out; } - if (rdtgrp->type != RDTMON_GROUP || !kn->parent || - !is_mon_groups(kn->parent, kn->name)) { + kn_parent = rdt_kn_parent(kn); + if (rdtgrp->type != RDTMON_GROUP || !kn_parent || + !is_mon_groups(kn_parent, kn->name)) { rdt_last_cmd_puts("Source must be a MON group\n"); ret = -EPERM; goto out; diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 43fbada678381..1d370c497e8a3 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -17,7 +17,7 @@ #include "kernfs-internal.h" -static DEFINE_RWLOCK(kernfs_rename_lock); /* kn->parent and ->name */ +DEFINE_RWLOCK(kernfs_rename_lock); /* kn->parent and ->name */ /* * Don't use rename_lock to piggy back on pr_cont_buf. We don't want to * call pr_cont() while holding rename_lock. Because sometimes pr_cont() @@ -56,7 +56,7 @@ static int kernfs_name_locked(struct kernfs_node *kn, char *buf, size_t buflen) if (!kn) return strscpy(buf, "(null)", buflen); - return strscpy(buf, kn->parent ? kn->name : "/", buflen); + return strscpy(buf, rcu_access_pointer(kn->__parent) ? kn->name : "/", buflen); } /* kernfs_node_depth - compute depth from @from to @to */ @@ -64,9 +64,9 @@ static size_t kernfs_depth(struct kernfs_node *from, struct kernfs_node *to) { size_t depth = 0; - while (to->parent && to != from) { + while (rcu_dereference(to->__parent) && to != from) { depth++; - to = to->parent; + to = rcu_dereference(to->__parent); } return depth; } @@ -84,18 +84,18 @@ static struct kernfs_node *kernfs_common_ancestor(struct kernfs_node *a, db = kernfs_depth(rb->kn, b); while (da > db) { - a = a->parent; + a = rcu_dereference(a->__parent); da--; } while (db > da) { - b = b->parent; + b = rcu_dereference(b->__parent); db--; } /* worst case b and a will be the same at root */ while (b != a) { - b = b->parent; - a = a->parent; + b = rcu_dereference(b->__parent); + a = rcu_dereference(a->__parent); } return a; @@ -168,8 +168,9 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to, /* Calculate how many bytes we need for the rest */ for (i = depth_to - 1; i >= 0; i--) { + for (kn = kn_to, j = 0; j < i; j++) - kn = kn->parent; + kn = rcu_dereference(kn->__parent); len += scnprintf(buf + len, buflen - len, "/%s", kn->name); } @@ -226,6 +227,7 @@ int kernfs_path_from_node(struct kernfs_node *to, struct kernfs_node *from, unsigned long flags; int ret; + guard(rcu)(); read_lock_irqsave(&kernfs_rename_lock, flags); ret = kernfs_path_from_node_locked(to, from, buf, buflen); read_unlock_irqrestore(&kernfs_rename_lock, flags); @@ -295,7 +297,7 @@ struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn) unsigned long flags; read_lock_irqsave(&kernfs_rename_lock, flags); - parent = kn->parent; + parent = kernfs_parent(kn); kernfs_get(parent); read_unlock_irqrestore(&kernfs_rename_lock, flags); @@ -360,8 +362,12 @@ static int kernfs_sd_compare(const struct kernfs_node *left, */ static int kernfs_link_sibling(struct kernfs_node *kn) { - struct rb_node **node = &kn->parent->dir.children.rb_node; struct rb_node *parent = NULL; + struct kernfs_node *kn_parent; + struct rb_node **node; + + kn_parent = kernfs_parent(kn); + node = &kn_parent->dir.children.rb_node; while (*node) { struct kernfs_node *pos; @@ -380,13 +386,13 @@ static int kernfs_link_sibling(struct kernfs_node *kn) /* add new node and rebalance the tree */ rb_link_node(&kn->rb, parent, node); - rb_insert_color(&kn->rb, &kn->parent->dir.children); + rb_insert_color(&kn->rb, &kn_parent->dir.children); /* successfully added, account subdir number */ down_write(&kernfs_root(kn)->kernfs_iattr_rwsem); if (kernfs_type(kn) == KERNFS_DIR) - kn->parent->dir.subdirs++; - kernfs_inc_rev(kn->parent); + kn_parent->dir.subdirs++; + kernfs_inc_rev(kn_parent); up_write(&kernfs_root(kn)->kernfs_iattr_rwsem); return 0; @@ -407,16 +413,19 @@ static int kernfs_link_sibling(struct kernfs_node *kn) */ static bool kernfs_unlink_sibling(struct kernfs_node *kn) { + struct kernfs_node *kn_parent; + if (RB_EMPTY_NODE(&kn->rb)) return false; + kn_parent = kernfs_parent(kn); down_write(&kernfs_root(kn)->kernfs_iattr_rwsem); if (kernfs_type(kn) == KERNFS_DIR) - kn->parent->dir.subdirs--; - kernfs_inc_rev(kn->parent); + kn_parent->dir.subdirs--; + kernfs_inc_rev(kn_parent); up_write(&kernfs_root(kn)->kernfs_iattr_rwsem); - rb_erase(&kn->rb, &kn->parent->dir.children); + rb_erase(&kn->rb, &kn_parent->dir.children); RB_CLEAR_NODE(&kn->rb); return true; } @@ -562,7 +571,7 @@ void kernfs_put(struct kernfs_node *kn) * Moving/renaming is always done while holding reference. * kn->parent won't change beneath us. */ - parent = kn->parent; + parent = kernfs_parent(kn); WARN_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS, "kernfs_put: %s/%s: released with incorrect active_ref %d\n", @@ -701,7 +710,7 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent, name, mode, uid, gid, flags); if (kn) { kernfs_get(parent); - kn->parent = parent; + rcu_assign_pointer(kn->__parent, parent); } return kn; } @@ -769,13 +778,14 @@ struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root, */ int kernfs_add_one(struct kernfs_node *kn) { - struct kernfs_node *parent = kn->parent; - struct kernfs_root *root = kernfs_root(parent); + struct kernfs_root *root = kernfs_root(kn); struct kernfs_iattrs *ps_iattr; + struct kernfs_node *parent; bool has_ns; int ret; down_write(&root->kernfs_rwsem); + parent = kernfs_parent(kn); ret = -EINVAL; has_ns = kernfs_ns_enabled(parent); @@ -949,6 +959,11 @@ struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent, return kn; } +unsigned int kernfs_root_flags(struct kernfs_node *kn) +{ + return kernfs_root(kn)->flags; +} + /** * kernfs_create_root - create a new kernfs hierarchy * @scops: optional syscall operations for the hierarchy @@ -1112,7 +1127,7 @@ struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent, static int kernfs_dop_revalidate(struct inode *dir, const struct qstr *name, struct dentry *dentry, unsigned int flags) { - struct kernfs_node *kn; + struct kernfs_node *kn, *parent; struct kernfs_root *root; if (flags & LOOKUP_RCU) @@ -1163,8 +1178,9 @@ static int kernfs_dop_revalidate(struct inode *dir, const struct qstr *name, if (!kernfs_active(kn)) goto out_bad; + parent = kernfs_parent(kn); /* The kernfs node has been moved? */ - if (kernfs_dentry_node(dentry->d_parent) != kn->parent) + if (kernfs_dentry_node(dentry->d_parent) != parent) goto out_bad; /* The kernfs node has been renamed */ @@ -1172,7 +1188,7 @@ static int kernfs_dop_revalidate(struct inode *dir, const struct qstr *name, goto out_bad; /* The kernfs node has been moved to a different namespace */ - if (kn->parent && kernfs_ns_enabled(kn->parent) && + if (parent && kernfs_ns_enabled(parent) && kernfs_info(dentry->d_sb)->ns != kn->ns) goto out_bad; @@ -1365,7 +1381,7 @@ static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos, return kernfs_leftmost_descendant(rb_to_kn(rbn)); /* no sibling left, visit parent */ - return pos->parent; + return kernfs_parent(pos); } static void kernfs_activate_one(struct kernfs_node *kn) @@ -1377,7 +1393,7 @@ static void kernfs_activate_one(struct kernfs_node *kn) if (kernfs_active(kn) || (kn->flags & (KERNFS_HIDDEN | KERNFS_REMOVING))) return; - WARN_ON_ONCE(kn->parent && RB_EMPTY_NODE(&kn->rb)); + WARN_ON_ONCE(rcu_access_pointer(kn->__parent) && RB_EMPTY_NODE(&kn->rb)); WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS); atomic_sub(KN_DEACTIVATED_BIAS, &kn->active); @@ -1447,7 +1463,7 @@ void kernfs_show(struct kernfs_node *kn, bool show) static void __kernfs_remove(struct kernfs_node *kn) { - struct kernfs_node *pos; + struct kernfs_node *pos, *parent; /* Short-circuit if non-root @kn has already finished removal. */ if (!kn) @@ -1459,7 +1475,7 @@ static void __kernfs_remove(struct kernfs_node *kn) * This is for kernfs_remove_self() which plays with active ref * after removal. */ - if (kn->parent && RB_EMPTY_NODE(&kn->rb)) + if (kernfs_parent(kn) && RB_EMPTY_NODE(&kn->rb)) return; pr_debug("kernfs %s: removing\n", kn->name); @@ -1485,14 +1501,14 @@ static void __kernfs_remove(struct kernfs_node *kn) kernfs_get(pos); kernfs_drain(pos); - + parent = kernfs_parent(pos); /* * kernfs_unlink_sibling() succeeds once per node. Use it * to decide who's responsible for cleanups. */ - if (!pos->parent || kernfs_unlink_sibling(pos)) { + if (!parent || kernfs_unlink_sibling(pos)) { struct kernfs_iattrs *ps_iattr = - pos->parent ? pos->parent->iattr : NULL; + parent ? parent->iattr : NULL; /* update timestamps on the parent */ down_write(&kernfs_root(kn)->kernfs_iattr_rwsem); @@ -1722,7 +1738,7 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, int error; /* can't move or rename root */ - if (!kn->parent) + if (!rcu_access_pointer(kn->__parent)) return -EINVAL; root = kernfs_root(kn); @@ -1733,8 +1749,15 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, (new_parent->flags & KERNFS_EMPTY_DIR)) goto out; + old_parent = kernfs_parent(kn); + if (root->flags & KERNFS_ROOT_INVARIANT_PARENT) { + error = -EINVAL; + if (WARN_ON_ONCE(old_parent != new_parent)) + goto out; + } + error = 0; - if ((kn->parent == new_parent) && (kn->ns == new_ns) && + if ((old_parent == new_parent) && (kn->ns == new_ns) && (strcmp(kn->name, new_name) == 0)) goto out; /* nothing to rename */ @@ -1761,8 +1784,8 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, /* rename_lock protects ->parent and ->name accessors */ write_lock_irq(&kernfs_rename_lock); - old_parent = kn->parent; - kn->parent = new_parent; + old_parent = kernfs_parent(kn); + rcu_assign_pointer(kn->__parent, new_parent); kn->ns = new_ns; if (new_name) { @@ -1795,7 +1818,8 @@ static struct kernfs_node *kernfs_dir_pos(const void *ns, { if (pos) { int valid = kernfs_active(pos) && - pos->parent == parent && hash == pos->hash; + rcu_access_pointer(pos->__parent) == parent && + hash == pos->hash; kernfs_put(pos); if (!valid) pos = NULL; diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h index b42ee6547cdc1..c43bee18b79f7 100644 --- a/fs/kernfs/kernfs-internal.h +++ b/fs/kernfs/kernfs-internal.h @@ -19,6 +19,8 @@ #include #include +extern rwlock_t kernfs_rename_lock; + struct kernfs_iattrs { kuid_t ia_uid; kgid_t ia_gid; @@ -64,11 +66,14 @@ struct kernfs_root { * * Return: the kernfs_root @kn belongs to. */ -static inline struct kernfs_root *kernfs_root(struct kernfs_node *kn) +static inline struct kernfs_root *kernfs_root(const struct kernfs_node *kn) { + const struct kernfs_node *knp; /* if parent exists, it's always a dir; otherwise, @sd is a dir */ - if (kn->parent) - kn = kn->parent; + guard(rcu)(); + knp = rcu_dereference(kn->__parent); + if (knp) + kn = knp; return kn->dir.root; } @@ -97,6 +102,27 @@ struct kernfs_super_info { }; #define kernfs_info(SB) ((struct kernfs_super_info *)(SB->s_fs_info)) +static inline bool kernfs_root_is_locked(const struct kernfs_node *kn) +{ + return lockdep_is_held(&kernfs_root(kn)->kernfs_rwsem); +} + +static inline struct kernfs_node *kernfs_parent(const struct kernfs_node *kn) +{ + /* + * The kernfs_node::__parent remains valid within a RCU section. The kn + * can be reparented (and renamed) which changes the entry. This can be + * avoided by locking kernfs_root::kernfs_rwsem or kernfs_rename_lock. + * Both locks can be used to obtain a reference on __parent. Once the + * reference count reaches 0 then the node is about to be freed + * and can not be renamed (or become a different parent) anymore. + */ + return rcu_dereference_check(kn->__parent, + kernfs_root_is_locked(kn) || + lockdep_is_held(&kernfs_rename_lock) || + !atomic_read(&kn->count)); +} + static inline struct kernfs_node *kernfs_dentry_node(struct dentry *dentry) { if (d_really_is_negative(dentry)) diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index 4a0ff08d589ca..2252b16e6ef0b 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -148,7 +148,7 @@ static struct dentry *kernfs_get_parent_dentry(struct dentry *child) struct kernfs_root *root = kernfs_root(kn); guard(rwsem_read)(&root->kernfs_rwsem); - return d_obtain_alias(kernfs_get_inode(child->d_sb, kn->parent)); + return d_obtain_alias(kernfs_get_inode(child->d_sb, kernfs_parent(kn))); } static const struct export_operations kernfs_export_ops = { @@ -188,10 +188,10 @@ static struct kernfs_node *find_next_ancestor(struct kernfs_node *child, return NULL; } - while (child->parent != parent) { - if (!child->parent) + while (kernfs_parent(child) != parent) { + child = kernfs_parent(child); + if (!child) return NULL; - child = child->parent; } return child; @@ -216,7 +216,7 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn, dentry = dget(sb->s_root); /* Check if this is the root kernfs_node */ - if (!kn->parent) + if (!rcu_access_pointer(kn->__parent)) return dentry; root = kernfs_root(kn); diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c index 45371a70caa71..05c62ca93c53d 100644 --- a/fs/kernfs/symlink.c +++ b/fs/kernfs/symlink.c @@ -62,10 +62,10 @@ static int kernfs_get_target_path(struct kernfs_node *parent, /* go up to the root, stop at the base */ base = parent; - while (base->parent) { - kn = target->parent; - while (kn->parent && base != kn) - kn = kn->parent; + while (kernfs_parent(base)) { + kn = kernfs_parent(target); + while (kernfs_parent(kn) && base != kn) + kn = kernfs_parent(kn); if (base == kn) break; @@ -75,14 +75,14 @@ static int kernfs_get_target_path(struct kernfs_node *parent, strcpy(s, "../"); s += 3; - base = base->parent; + base = kernfs_parent(base); } /* determine end of target string for reverse fillup */ kn = target; - while (kn->parent && kn != base) { + while (kernfs_parent(kn) && kn != base) { len += strlen(kn->name) + 1; - kn = kn->parent; + kn = kernfs_parent(kn); } /* check limits */ @@ -94,7 +94,7 @@ static int kernfs_get_target_path(struct kernfs_node *parent, /* reverse fillup of target string from target to base */ kn = target; - while (kn->parent && kn != base) { + while (kernfs_parent(kn) && kn != base) { int slen = strlen(kn->name); len -= slen; @@ -102,7 +102,7 @@ static int kernfs_get_target_path(struct kernfs_node *parent, if (len) s[--len] = '/'; - kn = kn->parent; + kn = kernfs_parent(kn); } return 0; @@ -111,12 +111,13 @@ static int kernfs_get_target_path(struct kernfs_node *parent, static int kernfs_getlink(struct inode *inode, char *path) { struct kernfs_node *kn = inode->i_private; - struct kernfs_node *parent = kn->parent; + struct kernfs_node *parent; struct kernfs_node *target = kn->symlink.target_kn; - struct kernfs_root *root = kernfs_root(parent); + struct kernfs_root *root = kernfs_root(kn); int error; down_read(&root->kernfs_rwsem); + parent = kernfs_parent(kn); error = kernfs_get_target_path(parent, target, path); up_read(&root->kernfs_rwsem); diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 6931308876c4a..c3d3b079aedde 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -19,13 +19,19 @@ #include "sysfs.h" +static struct kobject *sysfs_file_kobj(struct kernfs_node *kn) +{ + guard(rcu)(); + return rcu_dereference(kn->__parent)->priv; +} + /* * Determine ktype->sysfs_ops for the given kernfs_node. This function * must be called while holding an active reference. */ static const struct sysfs_ops *sysfs_file_ops(struct kernfs_node *kn) { - struct kobject *kobj = kn->parent->priv; + struct kobject *kobj = sysfs_file_kobj(kn); if (kn->flags & KERNFS_LOCKDEP) lockdep_assert_held(kn); @@ -40,7 +46,7 @@ static const struct sysfs_ops *sysfs_file_ops(struct kernfs_node *kn) static int sysfs_kf_seq_show(struct seq_file *sf, void *v) { struct kernfs_open_file *of = sf->private; - struct kobject *kobj = of->kn->parent->priv; + struct kobject *kobj = sysfs_file_kobj(of->kn); const struct sysfs_ops *ops = sysfs_file_ops(of->kn); ssize_t count; char *buf; @@ -78,7 +84,7 @@ static ssize_t sysfs_kf_bin_read(struct kernfs_open_file *of, char *buf, size_t count, loff_t pos) { struct bin_attribute *battr = of->kn->priv; - struct kobject *kobj = of->kn->parent->priv; + struct kobject *kobj = sysfs_file_kobj(of->kn); loff_t size = file_inode(of->file)->i_size; if (!count) @@ -105,7 +111,7 @@ static ssize_t sysfs_kf_read(struct kernfs_open_file *of, char *buf, size_t count, loff_t pos) { const struct sysfs_ops *ops = sysfs_file_ops(of->kn); - struct kobject *kobj = of->kn->parent->priv; + struct kobject *kobj = sysfs_file_kobj(of->kn); ssize_t len; /* @@ -131,7 +137,7 @@ static ssize_t sysfs_kf_write(struct kernfs_open_file *of, char *buf, size_t count, loff_t pos) { const struct sysfs_ops *ops = sysfs_file_ops(of->kn); - struct kobject *kobj = of->kn->parent->priv; + struct kobject *kobj = sysfs_file_kobj(of->kn); if (!count) return 0; @@ -144,7 +150,7 @@ static ssize_t sysfs_kf_bin_write(struct kernfs_open_file *of, char *buf, size_t count, loff_t pos) { struct bin_attribute *battr = of->kn->priv; - struct kobject *kobj = of->kn->parent->priv; + struct kobject *kobj = sysfs_file_kobj(of->kn); loff_t size = file_inode(of->file)->i_size; if (size) { @@ -168,7 +174,7 @@ static int sysfs_kf_bin_mmap(struct kernfs_open_file *of, struct vm_area_struct *vma) { struct bin_attribute *battr = of->kn->priv; - struct kobject *kobj = of->kn->parent->priv; + struct kobject *kobj = sysfs_file_kobj(of->kn); return battr->mmap(of->file, kobj, battr, vma); } @@ -177,7 +183,7 @@ static loff_t sysfs_kf_bin_llseek(struct kernfs_open_file *of, loff_t offset, int whence) { struct bin_attribute *battr = of->kn->priv; - struct kobject *kobj = of->kn->parent->priv; + struct kobject *kobj = sysfs_file_kobj(of->kn); if (battr->llseek) return battr->llseek(of->file, kobj, battr, offset, whence); @@ -494,7 +500,7 @@ EXPORT_SYMBOL_GPL(sysfs_break_active_protection); */ void sysfs_unbreak_active_protection(struct kernfs_node *kn) { - struct kobject *kobj = kn->parent->priv; + struct kobject *kobj = sysfs_file_kobj(kn); kernfs_unbreak_active_protection(kn); kernfs_put(kn); diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 87c79d076d6d7..5dda9a268e44c 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -147,6 +147,11 @@ enum kernfs_root_flag { * Support user xattrs to be written to nodes rooted at this root. */ KERNFS_ROOT_SUPPORT_USER_XATTR = 0x0008, + + /* + * Renames must not change the parent node. + */ + KERNFS_ROOT_INVARIANT_PARENT = 0x0010, }; /* type-specific structures for kernfs_node union members */ @@ -199,8 +204,8 @@ struct kernfs_node { * never moved to a different parent, it is safe to access the * parent directly. */ - struct kernfs_node *parent; const char *name; + struct kernfs_node __rcu *__parent; struct rb_node rb; @@ -416,6 +421,7 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn, struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops, unsigned int flags, void *priv); void kernfs_destroy_root(struct kernfs_root *root); +unsigned int kernfs_root_flags(struct kernfs_node *kn); struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, umode_t mode, @@ -514,6 +520,8 @@ kernfs_create_root(struct kernfs_syscall_ops *scops, unsigned int flags, { return ERR_PTR(-ENOSYS); } static inline void kernfs_destroy_root(struct kernfs_root *root) { } +static inline unsigned int kernfs_root_flags(struct kernfs_node *kn) +{ return 0; } static inline struct kernfs_node * kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index e28d5f0d20ed0..c9752eb607ec9 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -844,7 +844,7 @@ static int cgroup1_rename(struct kernfs_node *kn, struct kernfs_node *new_parent if (kernfs_type(kn) != KERNFS_DIR) return -ENOTDIR; - if (kn->parent != new_parent) + if (rcu_access_pointer(kn->__parent) != new_parent) return -EIO; /* diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index d9061bd55436b..71819e58d70c9 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -633,9 +633,22 @@ int cgroup_task_count(const struct cgroup *cgrp) return count; } +static struct cgroup *kn_priv(struct kernfs_node *kn) +{ + struct kernfs_node *parent; + /* + * The parent can not be replaced due to KERNFS_ROOT_INVARIANT_PARENT. + * Therefore it is always safe to dereference this pointer outside of a + * RCU section. + */ + parent = rcu_dereference_check(kn->__parent, + kernfs_root_flags(kn) & KERNFS_ROOT_INVARIANT_PARENT); + return parent->priv; +} + struct cgroup_subsys_state *of_css(struct kernfs_open_file *of) { - struct cgroup *cgrp = of->kn->parent->priv; + struct cgroup *cgrp = kn_priv(of->kn); struct cftype *cft = of_cft(of); /* @@ -1612,7 +1625,7 @@ void cgroup_kn_unlock(struct kernfs_node *kn) if (kernfs_type(kn) == KERNFS_DIR) cgrp = kn->priv; else - cgrp = kn->parent->priv; + cgrp = kn_priv(kn); cgroup_unlock(); @@ -1644,7 +1657,7 @@ struct cgroup *cgroup_kn_lock_live(struct kernfs_node *kn, bool drain_offline) if (kernfs_type(kn) == KERNFS_DIR) cgrp = kn->priv; else - cgrp = kn->parent->priv; + cgrp = kn_priv(kn); /* * We're gonna grab cgroup_mutex which nests outside kernfs @@ -2118,7 +2131,8 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask) root->kf_root = kernfs_create_root(kf_sops, KERNFS_ROOT_CREATE_DEACTIVATED | KERNFS_ROOT_SUPPORT_EXPORTOP | - KERNFS_ROOT_SUPPORT_USER_XATTR, + KERNFS_ROOT_SUPPORT_USER_XATTR | + KERNFS_ROOT_INVARIANT_PARENT, root_cgrp); if (IS_ERR(root->kf_root)) { ret = PTR_ERR(root->kf_root); @@ -4119,7 +4133,7 @@ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { struct cgroup_file_ctx *ctx = of->priv; - struct cgroup *cgrp = of->kn->parent->priv; + struct cgroup *cgrp = kn_priv(of->kn); struct cftype *cft = of_cft(of); struct cgroup_subsys_state *css; int ret; diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h index 8bd1ebd7d6afd..813143b4985dc 100644 --- a/tools/testing/selftests/bpf/progs/profiler.inc.h +++ b/tools/testing/selftests/bpf/progs/profiler.inc.h @@ -223,7 +223,7 @@ static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node, if (bpf_cmp_likely(filepart_length, <=, MAX_PATH)) { payload += filepart_length; } - cgroup_node = BPF_CORE_READ(cgroup_node, parent); + cgroup_node = BPF_CORE_READ(cgroup_node, __parent); } return payload; } From 741c10b096bc4dd79cd9f215b6ef173bb953e75c Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 13 Feb 2025 15:50:23 +0100 Subject: [PATCH 06/42] kernfs: Use RCU to access kernfs_node::name. Using RCU lifetime rules to access kernfs_node::name can avoid the trouble with kernfs_rename_lock in kernfs_name() and kernfs_path_from_node() if the fs was created with KERNFS_ROOT_INVARIANT_PARENT. This is usefull as it allows to implement kernfs_path_from_node() only with RCU protection and avoiding kernfs_rename_lock. The lock is only required if the __parent node can be changed and the function requires an unchanged hierarchy while it iterates from the node to its parent. The change is needed to allow the lookup of the node's path (kernfs_path_from_node()) from context which runs always with disabled preemption and or interrutps even on PREEMPT_RT. The problem is that kernfs_rename_lock becomes a sleeping lock on PREEMPT_RT. I went through all ::name users and added the required access for the lookup with a few extensions: - rdtgroup_pseudo_lock_create() drops all locks and then uses the name later on. resctrl supports rename with different parents. Here I made a temporal copy of the name while it is used outside of the lock. - kernfs_rename_ns() accepts NULL as new_parent. This simplifies sysfs_move_dir_ns() where it can set NULL in order to reuse the current name. - kernfs_rename_ns() is only using kernfs_rename_lock if the parents are different. All users use either kernfs_rwsem (for stable path view) or just RCU for the lookup. The ::name uses always RCU free. Use RCU lifetime guarantees to access kernfs_node::name. Suggested-by: Tejun Heo Acked-by: Tejun Heo Reported-by: syzbot+6ea37e2e6ffccf41a7e6@syzkaller.appspotmail.com Closes: https://lore.kernel.org/lkml/67251dc6.050a0220.529b6.015e.GAE@google.com/ Reported-by: Hillf Danton Closes: https://lore.kernel.org/20241102001224.2789-1-hdanton@sina.com Signed-off-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/20250213145023.2820193-7-bigeasy@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/resctrl/internal.h | 5 + arch/x86/kernel/cpu/resctrl/pseudo_lock.c | 14 ++- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 10 +- fs/kernfs/dir.c | 113 ++++++++++++---------- fs/kernfs/file.c | 4 +- fs/kernfs/kernfs-internal.h | 5 + fs/kernfs/mount.c | 5 +- fs/kernfs/symlink.c | 7 +- fs/sysfs/dir.c | 2 +- include/linux/kernfs.h | 4 +- security/selinux/hooks.c | 7 +- 11 files changed, 105 insertions(+), 71 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 20c898f09b7e7..dd5d6b4bfcc22 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -507,6 +507,11 @@ int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, extern struct mutex rdtgroup_mutex; +static inline const char *rdt_kn_name(const struct kernfs_node *kn) +{ + return rcu_dereference_check(kn->name, lockdep_is_held(&rdtgroup_mutex)); +} + extern struct rdt_hw_resource rdt_resources_all[]; extern struct rdtgroup rdtgroup_default; extern struct dentry *debugfs_resctrl; diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c index 42cc162f7fc91..7a2db7fa41083 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c @@ -52,7 +52,8 @@ static char *pseudo_lock_devnode(const struct device *dev, umode_t *mode) rdtgrp = dev_get_drvdata(dev); if (mode) *mode = 0600; - return kasprintf(GFP_KERNEL, "pseudo_lock/%s", rdtgrp->kn->name); + guard(mutex)(&rdtgroup_mutex); + return kasprintf(GFP_KERNEL, "pseudo_lock/%s", rdt_kn_name(rdtgrp->kn)); } static const struct class pseudo_lock_class = { @@ -1293,6 +1294,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp) struct task_struct *thread; unsigned int new_minor; struct device *dev; + char *kn_name __free(kfree) = NULL; int ret; ret = pseudo_lock_region_alloc(plr); @@ -1304,6 +1306,11 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp) ret = -EINVAL; goto out_region; } + kn_name = kstrdup(rdt_kn_name(rdtgrp->kn), GFP_KERNEL); + if (!kn_name) { + ret = -ENOMEM; + goto out_cstates; + } plr->thread_done = 0; @@ -1348,8 +1355,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp) mutex_unlock(&rdtgroup_mutex); if (!IS_ERR_OR_NULL(debugfs_resctrl)) { - plr->debugfs_dir = debugfs_create_dir(rdtgrp->kn->name, - debugfs_resctrl); + plr->debugfs_dir = debugfs_create_dir(kn_name, debugfs_resctrl); if (!IS_ERR_OR_NULL(plr->debugfs_dir)) debugfs_create_file("pseudo_lock_measure", 0200, plr->debugfs_dir, rdtgrp, @@ -1358,7 +1364,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp) dev = device_create(&pseudo_lock_class, NULL, MKDEV(pseudo_lock_major, new_minor), - rdtgrp, "%s", rdtgrp->kn->name); + rdtgrp, "%s", kn_name); mutex_lock(&rdtgroup_mutex); diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 55dcdeea1a1b4..10afc4eaa467e 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -916,14 +916,14 @@ int proc_resctrl_show(struct seq_file *s, struct pid_namespace *ns, continue; seq_printf(s, "res:%s%s\n", (rdtg == &rdtgroup_default) ? "/" : "", - rdtg->kn->name); + rdt_kn_name(rdtg->kn)); seq_puts(s, "mon:"); list_for_each_entry(crg, &rdtg->mon.crdtgrp_list, mon.crdtgrp_list) { if (!resctrl_arch_match_rmid(tsk, crg->mon.parent->closid, crg->mon.rmid)) continue; - seq_printf(s, "%s", crg->kn->name); + seq_printf(s, "%s", rdt_kn_name(crg->kn)); break; } seq_putc(s, '\n'); @@ -3675,7 +3675,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, */ static bool is_mon_groups(struct kernfs_node *kn, const char *name) { - return (!strcmp(kn->name, "mon_groups") && + return (!strcmp(rdt_kn_name(kn), "mon_groups") && strcmp(name, "mon_groups")); } @@ -3824,7 +3824,7 @@ static int rdtgroup_rmdir(struct kernfs_node *kn) ret = rdtgroup_rmdir_ctrl(rdtgrp, tmpmask); } } else if (rdtgrp->type == RDTMON_GROUP && - is_mon_groups(parent_kn, kn->name)) { + is_mon_groups(parent_kn, rdt_kn_name(kn))) { ret = rdtgroup_rmdir_mon(rdtgrp, tmpmask); } else { ret = -EPERM; @@ -3912,7 +3912,7 @@ static int rdtgroup_rename(struct kernfs_node *kn, kn_parent = rdt_kn_parent(kn); if (rdtgrp->type != RDTMON_GROUP || !kn_parent || - !is_mon_groups(kn_parent, kn->name)) { + !is_mon_groups(kn_parent, rdt_kn_name(kn))) { rdt_last_cmd_puts("Source must be a MON group\n"); ret = -EPERM; goto out; diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 1d370c497e8a3..c5a578c46759a 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -51,14 +51,6 @@ static bool kernfs_lockdep(struct kernfs_node *kn) #endif } -static int kernfs_name_locked(struct kernfs_node *kn, char *buf, size_t buflen) -{ - if (!kn) - return strscpy(buf, "(null)", buflen); - - return strscpy(buf, rcu_access_pointer(kn->__parent) ? kn->name : "/", buflen); -} - /* kernfs_node_depth - compute depth from @from to @to */ static size_t kernfs_depth(struct kernfs_node *from, struct kernfs_node *to) { @@ -168,11 +160,13 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to, /* Calculate how many bytes we need for the rest */ for (i = depth_to - 1; i >= 0; i--) { + const char *name; for (kn = kn_to, j = 0; j < i; j++) kn = rcu_dereference(kn->__parent); - len += scnprintf(buf + len, buflen - len, "/%s", kn->name); + name = rcu_dereference(kn->name); + len += scnprintf(buf + len, buflen - len, "/%s", name); } return len; @@ -196,13 +190,18 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to, */ int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen) { - unsigned long flags; - int ret; + struct kernfs_node *kn_parent; - read_lock_irqsave(&kernfs_rename_lock, flags); - ret = kernfs_name_locked(kn, buf, buflen); - read_unlock_irqrestore(&kernfs_rename_lock, flags); - return ret; + if (!kn) + return strscpy(buf, "(null)", buflen); + + guard(rcu)(); + /* + * KERNFS_ROOT_INVARIANT_PARENT is ignored here. The name is RCU freed and + * the parent is either existing or not. + */ + kn_parent = rcu_dereference(kn->__parent); + return strscpy(buf, kn_parent ? rcu_dereference(kn->name) : "/", buflen); } /** @@ -224,14 +223,17 @@ int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen) int kernfs_path_from_node(struct kernfs_node *to, struct kernfs_node *from, char *buf, size_t buflen) { - unsigned long flags; - int ret; + struct kernfs_root *root; guard(rcu)(); - read_lock_irqsave(&kernfs_rename_lock, flags); - ret = kernfs_path_from_node_locked(to, from, buf, buflen); - read_unlock_irqrestore(&kernfs_rename_lock, flags); - return ret; + if (to) { + root = kernfs_root(to); + if (!(root->flags & KERNFS_ROOT_INVARIANT_PARENT)) { + guard(read_lock_irqsave)(&kernfs_rename_lock); + return kernfs_path_from_node_locked(to, from, buf, buflen); + } + } + return kernfs_path_from_node_locked(to, from, buf, buflen); } EXPORT_SYMBOL_GPL(kernfs_path_from_node); @@ -338,13 +340,13 @@ static int kernfs_name_compare(unsigned int hash, const char *name, return -1; if (ns > kn->ns) return 1; - return strcmp(name, kn->name); + return strcmp(name, kernfs_rcu_name(kn)); } static int kernfs_sd_compare(const struct kernfs_node *left, const struct kernfs_node *right) { - return kernfs_name_compare(left->hash, left->name, left->ns, right); + return kernfs_name_compare(left->hash, kernfs_rcu_name(left), left->ns, right); } /** @@ -542,7 +544,8 @@ static void kernfs_free_rcu(struct rcu_head *rcu) { struct kernfs_node *kn = container_of(rcu, struct kernfs_node, rcu); - kfree_const(kn->name); + /* If the whole node goes away, then name can't be used outside */ + kfree_const(rcu_access_pointer(kn->name)); if (kn->iattr) { simple_xattrs_free(&kn->iattr->xattrs, NULL); @@ -575,7 +578,8 @@ void kernfs_put(struct kernfs_node *kn) WARN_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS, "kernfs_put: %s/%s: released with incorrect active_ref %d\n", - parent ? parent->name : "", kn->name, atomic_read(&kn->active)); + parent ? rcu_dereference(parent->name) : "", + rcu_dereference(kn->name), atomic_read(&kn->active)); if (kernfs_type(kn) == KERNFS_LINK) kernfs_put(kn->symlink.target_kn); @@ -652,7 +656,7 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, atomic_set(&kn->active, KN_DEACTIVATED_BIAS); RB_CLEAR_NODE(&kn->rb); - kn->name = name; + rcu_assign_pointer(kn->name, name); kn->mode = mode; kn->flags = flags; @@ -790,7 +794,8 @@ int kernfs_add_one(struct kernfs_node *kn) ret = -EINVAL; has_ns = kernfs_ns_enabled(parent); if (WARN(has_ns != (bool)kn->ns, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n", - has_ns ? "required" : "invalid", parent->name, kn->name)) + has_ns ? "required" : "invalid", + kernfs_rcu_name(parent), kernfs_rcu_name(kn))) goto out_unlock; if (kernfs_type(parent) != KERNFS_DIR) @@ -800,7 +805,7 @@ int kernfs_add_one(struct kernfs_node *kn) if (parent->flags & (KERNFS_REMOVING | KERNFS_EMPTY_DIR)) goto out_unlock; - kn->hash = kernfs_name_hash(kn->name, kn->ns); + kn->hash = kernfs_name_hash(kernfs_rcu_name(kn), kn->ns); ret = kernfs_link_sibling(kn); if (ret) @@ -856,7 +861,7 @@ static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent, if (has_ns != (bool)ns) { WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n", - has_ns ? "required" : "invalid", parent->name, name); + has_ns ? "required" : "invalid", kernfs_rcu_name(parent), name); return NULL; } @@ -1135,8 +1140,6 @@ static int kernfs_dop_revalidate(struct inode *dir, const struct qstr *name, /* Negative hashed dentry? */ if (d_really_is_negative(dentry)) { - struct kernfs_node *parent; - /* If the kernfs parent node has changed discard and * proceed to ->lookup. * @@ -1184,7 +1187,7 @@ static int kernfs_dop_revalidate(struct inode *dir, const struct qstr *name, goto out_bad; /* The kernfs node has been renamed */ - if (strcmp(dentry->d_name.name, kn->name) != 0) + if (strcmp(dentry->d_name.name, kernfs_rcu_name(kn)) != 0) goto out_bad; /* The kernfs node has been moved to a different namespace */ @@ -1478,7 +1481,7 @@ static void __kernfs_remove(struct kernfs_node *kn) if (kernfs_parent(kn) && RB_EMPTY_NODE(&kn->rb)) return; - pr_debug("kernfs %s: removing\n", kn->name); + pr_debug("kernfs %s: removing\n", kernfs_rcu_name(kn)); /* prevent new usage by marking all nodes removing and deactivating */ pos = NULL; @@ -1734,7 +1737,7 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, { struct kernfs_node *old_parent; struct kernfs_root *root; - const char *old_name = NULL; + const char *old_name; int error; /* can't move or rename root */ @@ -1757,8 +1760,11 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, } error = 0; + old_name = kernfs_rcu_name(kn); + if (!new_name) + new_name = old_name; if ((old_parent == new_parent) && (kn->ns == new_ns) && - (strcmp(kn->name, new_name) == 0)) + (strcmp(old_name, new_name) == 0)) goto out; /* nothing to rename */ error = -EEXIST; @@ -1766,7 +1772,7 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, goto out; /* rename kernfs_node */ - if (strcmp(kn->name, new_name) != 0) { + if (strcmp(old_name, new_name) != 0) { error = -ENOMEM; new_name = kstrdup_const(new_name, GFP_KERNEL); if (!new_name) @@ -1779,27 +1785,32 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, * Move to the appropriate place in the appropriate directories rbtree. */ kernfs_unlink_sibling(kn); - kernfs_get(new_parent); - /* rename_lock protects ->parent and ->name accessors */ - write_lock_irq(&kernfs_rename_lock); + /* rename_lock protects ->parent accessors */ + if (old_parent != new_parent) { + kernfs_get(new_parent); + write_lock_irq(&kernfs_rename_lock); - old_parent = kernfs_parent(kn); - rcu_assign_pointer(kn->__parent, new_parent); + rcu_assign_pointer(kn->__parent, new_parent); - kn->ns = new_ns; - if (new_name) { - old_name = kn->name; - kn->name = new_name; - } + kn->ns = new_ns; + if (new_name) + rcu_assign_pointer(kn->name, new_name); - write_unlock_irq(&kernfs_rename_lock); + write_unlock_irq(&kernfs_rename_lock); + kernfs_put(old_parent); + } else { + /* name assignment is RCU protected, parent is the same */ + kn->ns = new_ns; + if (new_name) + rcu_assign_pointer(kn->name, new_name); + } - kn->hash = kernfs_name_hash(kn->name, kn->ns); + kn->hash = kernfs_name_hash(new_name ?: old_name, kn->ns); kernfs_link_sibling(kn); - kernfs_put(old_parent); - kfree_const(old_name); + if (new_name && !is_kernel_rodata((unsigned long)old_name)) + kfree_rcu_mightsleep(old_name); error = 0; out: @@ -1884,7 +1895,7 @@ static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx) for (pos = kernfs_dir_pos(ns, parent, ctx->pos, pos); pos; pos = kernfs_dir_next_pos(ns, parent, ctx->pos, pos)) { - const char *name = pos->name; + const char *name = kernfs_rcu_name(pos); unsigned int type = fs_umode_to_dtype(pos->mode); int len = strlen(name); ino_t ino = kernfs_ino(pos); diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index c4ffa8dc89ebc..66fe8fe41f060 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -915,6 +915,7 @@ static void kernfs_notify_workfn(struct work_struct *work) list_for_each_entry(info, &kernfs_root(kn)->supers, node) { struct kernfs_node *parent; struct inode *p_inode = NULL; + const char *kn_name; struct inode *inode; struct qstr name; @@ -928,7 +929,8 @@ static void kernfs_notify_workfn(struct work_struct *work) if (!inode) continue; - name = QSTR(kn->name); + kn_name = kernfs_rcu_name(kn); + name = QSTR(kn_name); parent = kernfs_get_parent(kn); if (parent) { p_inode = ilookup(info->sb, kernfs_ino(parent)); diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h index c43bee18b79f7..40a2a9cd819d0 100644 --- a/fs/kernfs/kernfs-internal.h +++ b/fs/kernfs/kernfs-internal.h @@ -107,6 +107,11 @@ static inline bool kernfs_root_is_locked(const struct kernfs_node *kn) return lockdep_is_held(&kernfs_root(kn)->kernfs_rwsem); } +static inline const char *kernfs_rcu_name(const struct kernfs_node *kn) +{ + return rcu_dereference_check(kn->name, kernfs_root_is_locked(kn)); +} + static inline struct kernfs_node *kernfs_parent(const struct kernfs_node *kn) { /* diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index 2252b16e6ef0b..d1f512b7bf867 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -231,6 +231,7 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn, do { struct dentry *dtmp; struct kernfs_node *kntmp; + const char *name; if (kn == knparent) return dentry; @@ -239,8 +240,8 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn, dput(dentry); return ERR_PTR(-EINVAL); } - dtmp = lookup_positive_unlocked(kntmp->name, dentry, - strlen(kntmp->name)); + name = rcu_dereference(kntmp->name); + dtmp = lookup_positive_unlocked(name, dentry, strlen(name)); dput(dentry); if (IS_ERR(dtmp)) return dtmp; diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c index 05c62ca93c53d..0bd8a2143723d 100644 --- a/fs/kernfs/symlink.c +++ b/fs/kernfs/symlink.c @@ -81,7 +81,7 @@ static int kernfs_get_target_path(struct kernfs_node *parent, /* determine end of target string for reverse fillup */ kn = target; while (kernfs_parent(kn) && kn != base) { - len += strlen(kn->name) + 1; + len += strlen(kernfs_rcu_name(kn)) + 1; kn = kernfs_parent(kn); } @@ -95,10 +95,11 @@ static int kernfs_get_target_path(struct kernfs_node *parent, /* reverse fillup of target string from target to base */ kn = target; while (kernfs_parent(kn) && kn != base) { - int slen = strlen(kn->name); + const char *name = kernfs_rcu_name(kn); + int slen = strlen(name); len -= slen; - memcpy(s + len, kn->name, slen); + memcpy(s + len, name, slen); if (len) s[--len] = '/'; diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 4df2afa551dc6..94e12efd92f21 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -123,7 +123,7 @@ int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, new_parent = new_parent_kobj && new_parent_kobj->sd ? new_parent_kobj->sd : sysfs_root_kn; - return kernfs_rename_ns(kn, new_parent, kn->name, new_ns); + return kernfs_rename_ns(kn, new_parent, NULL, new_ns); } /** diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 5dda9a268e44c..b5a5f32fdfd1a 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -204,8 +204,8 @@ struct kernfs_node { * never moved to a different parent, it is safe to access the * parent directly. */ - const char *name; struct kernfs_node __rcu *__parent; + const char __rcu *name; struct rb_node rb; @@ -400,7 +400,7 @@ static inline bool kernfs_ns_enabled(struct kernfs_node *kn) } int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen); -int kernfs_path_from_node(struct kernfs_node *root_kn, struct kernfs_node *kn, +int kernfs_path_from_node(struct kernfs_node *kn_to, struct kernfs_node *kn_from, char *buf, size_t buflen); void pr_cont_kernfs_name(struct kernfs_node *kn); void pr_cont_kernfs_path(struct kernfs_node *kn); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 7b867dfec88ba..7dee9616147d2 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3584,10 +3584,13 @@ static int selinux_kernfs_init_security(struct kernfs_node *kn_dir, newsid = tsec->create_sid; } else { u16 secclass = inode_mode_to_security_class(kn->mode); + const char *kn_name; struct qstr q; - q.name = kn->name; - q.hash_len = hashlen_string(kn_dir, kn->name); + /* kn is fresh, can't be renamed, name goes not away */ + kn_name = rcu_dereference_check(kn->name, true); + q.name = kn_name; + q.hash_len = hashlen_string(kn_dir, kn_name); rc = security_transition_sid(tsec->sid, parent_sid, secclass, &q, From 6ef5b6fae304091593956be59065c0c8633ad9e8 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 18 Feb 2025 17:39:38 +0100 Subject: [PATCH 07/42] kernfs: Drop kernfs_rwsem while invoking lookup_positive_unlocked(). syzbot reported two warnings: - kernfs_node::name was accessed outside of a RCU section so it created warning. The kernfs_rwsem was held so it was okay but it wasn't seen. - While kernfs_rwsem was held invoked lookup_positive_unlocked()-> kernfs_dop_revalidate() which acquired kernfs_rwsem. kernfs_rwsem was both acquired as a read lock so it can be acquired twice. However if a writer acquires the lock after the first reader then neither the writer nor the second reader can obtain the lock so it deadlocks. The reason for the lock is to ensure that kernfs_node::name remain stable during lookup_positive_unlocked()'s invocation. The function can not be invoked within a RCU section because it may sleep. Make a temporary copy of the kernfs_node::name under the lock so GFP_KERNEL can be used and use this instead. Reported-by: syzbot+ecccecbc636b455f9084@syzkaller.appspotmail.com Fixes: 5b2fabf7fe8f ("kernfs: Acquire kernfs_rwsem in kernfs_node_dentry().") Signed-off-by: Sebastian Andrzej Siewior Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20250218163938.xmvjlJ0K@linutronix.de Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/mount.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index d1f512b7bf867..f1cea282aae32 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -220,12 +220,19 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn, return dentry; root = kernfs_root(kn); - guard(rwsem_read)(&root->kernfs_rwsem); - - knparent = find_next_ancestor(kn, NULL); - if (WARN_ON(!knparent)) { - dput(dentry); + /* + * As long as kn is valid, its parent can not vanish. This is cgroup's + * kn so it not have its parent replaced. Therefore it is safe to use + * the ancestor node outside of the RCU or locked section. + */ + if (WARN_ON_ONCE(!(root->flags & KERNFS_ROOT_INVARIANT_PARENT))) return ERR_PTR(-EINVAL); + scoped_guard(rcu) { + knparent = find_next_ancestor(kn, NULL); + if (WARN_ON(!knparent)) { + dput(dentry); + return ERR_PTR(-EINVAL); + } } do { @@ -235,14 +242,22 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn, if (kn == knparent) return dentry; - kntmp = find_next_ancestor(kn, knparent); - if (WARN_ON(!kntmp)) { + + scoped_guard(rwsem_read, &root->kernfs_rwsem) { + kntmp = find_next_ancestor(kn, knparent); + if (WARN_ON(!kntmp)) { + dput(dentry); + return ERR_PTR(-EINVAL); + } + name = kstrdup(kernfs_rcu_name(kntmp), GFP_KERNEL); + } + if (!name) { dput(dentry); - return ERR_PTR(-EINVAL); + return ERR_PTR(-ENOMEM); } - name = rcu_dereference(kntmp->name); dtmp = lookup_positive_unlocked(name, dentry, strlen(name)); dput(dentry); + kfree(name); if (IS_ERR(dtmp)) return dtmp; knparent = kntmp; From 8fd74a31eaf3def0d57264ada57fc981902aeadf Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Sat, 8 Feb 2025 22:15:26 +0800 Subject: [PATCH 08/42] driver core: class: Remove needless return in void API class_remove_file() Remove return since both class_remove_file() and class_remove_file_ns() are void functions. Signed-off-by: Zijun Hu Link: https://lore.kernel.org/r/20250208-cls_rmv_return-v1-1-091b37945aac@quicinc.com Signed-off-by: Greg Kroah-Hartman --- include/linux/device/class.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/device/class.h b/include/linux/device/class.h index 45ee3a6349993..65880e60c7208 100644 --- a/include/linux/device/class.h +++ b/include/linux/device/class.h @@ -193,7 +193,7 @@ static inline int __must_check class_create_file(const struct class *class, static inline void class_remove_file(const struct class *class, const struct class_attribute *attr) { - return class_remove_file_ns(class, attr, NULL); + class_remove_file_ns(class, attr, NULL); } /* Simple class attribute that is just a static string */ From a44073c28bc6d4118891d61e31c9fa9dc4333dc0 Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Sat, 8 Feb 2025 23:18:39 +0800 Subject: [PATCH 09/42] driver core: Remove needless return in void API device_remove_group() Remove return since both device_remove_group() and device_remove_groups() are void functions. Fixes: e323b2dddc1c ("driver core: add device_{add|remove}_group() helpers") Signed-off-by: Zijun Hu Link: https://lore.kernel.org/r/20250208-fix_device_remove_group-v1-1-8a5b0ac0ce5c@quicinc.com Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/device.h b/include/linux/device.h index 80a5b32689866..605b60254f6d8 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1268,7 +1268,7 @@ static inline void device_remove_group(struct device *dev, { const struct attribute_group *groups[] = { grp, NULL }; - return device_remove_groups(dev, groups); + device_remove_groups(dev, groups); } int __must_check devm_device_add_group(struct device *dev, From b1b620bfa984b8fb91a284b60df702346b4294a4 Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Mon, 3 Feb 2025 09:50:00 +0700 Subject: [PATCH 10/42] kernel: Fix "select" wording on HZ_250 description HZ_250 config description contains alternative choice for NTSC media users (HZ_300), which is written as "selected 300Hz". This is incorrect, as it implies that HZ_300 is automatically selected whereas the user has chosen HZ_250 instead. Fix the wording to "select 300Hz". Signed-off-by: Bagas Sanjaya Link: https://lore.kernel.org/r/20250203025000.17953-1-bagasdotme@gmail.com Signed-off-by: Greg Kroah-Hartman --- kernel/Kconfig.hz | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz index 38ef6d06888ef..ce1435cb08b1e 100644 --- a/kernel/Kconfig.hz +++ b/kernel/Kconfig.hz @@ -30,7 +30,7 @@ choice 250 Hz is a good compromise choice allowing server performance while also showing good interactive responsiveness even on SMP and NUMA systems. If you are going to be using NTSC video - or multimedia, selected 300Hz instead. + or multimedia, select 300Hz instead. config HZ_300 bool "300 HZ" From 6fb1ee255ed92b903b9b74e8483d05390cf9cfe6 Mon Sep 17 00:00:00 2001 From: Bharadwaj Raju Date: Tue, 4 Feb 2025 03:33:09 +0530 Subject: [PATCH 11/42] drivers/base/bus.c: fix spelling of "subsystem" Fix spelling, "subystem" -> "subsystem" Signed-off-by: Bharadwaj Raju Link: https://lore.kernel.org/r/20250203220312.1052986-1-bharadwaj.raju777@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 6b9e65a42cd2e..5ea3b03af9ba6 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -1291,7 +1291,7 @@ EXPORT_SYMBOL_GPL(subsys_system_register); * @groups: default attributes for the root device * * All 'virtual' subsystems have a /sys/devices/system/ root device - * with the name of the subystem. The root device can carry subsystem-wide + * with the name of the subsystem. The root device can carry subsystem-wide * attributes. All registered devices are below this single root device. * There's no restriction on device naming. This is for kernel software * constructs which need sysfs interface. From 0514059ca09e407614a02fb3687ea78e607e6526 Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Sat, 8 Feb 2025 22:45:48 +0800 Subject: [PATCH 12/42] MAINTAINERS: Add driver core headers to DRIVER CORE maintainers According to get_maintainer.pl output, there are neither maintainer nor supporter for the following driver core headers: include/linux/device.h include/linux/device/ Add them to DRIVER CORE maintainers. Signed-off-by: Zijun Hu Link: https://lore.kernel.org/r/20250208-drv_core_hdr-v1-1-8205b0483e3f@quicinc.com Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index efee40ea589f7..23ab175943596 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7098,7 +7098,9 @@ F: Documentation/core-api/kobject.rst F: drivers/base/ F: fs/debugfs/ F: fs/sysfs/ +F: include/linux/device/ F: include/linux/debugfs.h +F: include/linux/device.h F: include/linux/fwnode.h F: include/linux/kobj* F: include/linux/property.h From 177cbd5249b1af0b92e47fbd480f277cf3a0598d Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 5 Feb 2025 12:58:52 -0800 Subject: [PATCH 13/42] drivers: base: component: Allow more space for device name Some drivers use - as the aggregate device name which uses more than 20 chars, causing the status not to be aligned correctly. Example for mei_gsc_proxy on LNL: Before: aggregate_device name status ------------------------------------------------------------- 0000:00:16.0-0f73db04-97ab-4125-b893-e904ad0d5464 bound After: aggregate_device name status ----------------------------------------------------------------------- 0000:00:16.0-0f73db04-97ab-4125-b893-e904ad0d5464 bound Give it 10 more chars for proper alignment. Signed-off-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250205205851.2355820-2-lucas.demarchi@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/component.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/base/component.c b/drivers/base/component.c index 741497324d78a..747c5542c70fa 100644 --- a/drivers/base/component.c +++ b/drivers/base/component.c @@ -87,17 +87,17 @@ static int component_devices_show(struct seq_file *s, void *data) size_t i; mutex_lock(&component_mutex); - seq_printf(s, "%-40s %20s\n", "aggregate_device name", "status"); - seq_puts(s, "-------------------------------------------------------------\n"); - seq_printf(s, "%-40s %20s\n\n", + seq_printf(s, "%-50s %20s\n", "aggregate_device name", "status"); + seq_puts(s, "-----------------------------------------------------------------------\n"); + seq_printf(s, "%-50s %20s\n\n", dev_name(m->parent), m->bound ? "bound" : "not bound"); - seq_printf(s, "%-40s %20s\n", "device name", "status"); - seq_puts(s, "-------------------------------------------------------------\n"); + seq_printf(s, "%-50s %20s\n", "device name", "status"); + seq_puts(s, "-----------------------------------------------------------------------\n"); for (i = 0; i < match->num; i++) { struct component *component = match->compare[i].component; - seq_printf(s, "%-40s %20s\n", + seq_printf(s, "%-50s %20s\n", component ? dev_name(component->dev) : "(unknown)", component ? (component->bound ? "bound" : "not bound") : "not registered"); } From 1d2d45b62784e81b6e08ec0ab7cca4eaa23ff581 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Tue, 11 Feb 2025 14:24:09 +0100 Subject: [PATCH 14/42] driver core: location: Use str_yes_no() helper function Remove hard-coded strings by using the str_yes_no() helper function. Signed-off-by: Thorsten Blum Link: https://lore.kernel.org/r/20250211132409.700073-2-thorsten.blum@linux.dev Signed-off-by: Greg Kroah-Hartman --- drivers/base/physical_location.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/base/physical_location.c b/drivers/base/physical_location.c index 5db06e825c94f..a5539e294d4df 100644 --- a/drivers/base/physical_location.c +++ b/drivers/base/physical_location.c @@ -7,6 +7,7 @@ #include #include +#include #include "physical_location.h" @@ -116,7 +117,7 @@ static ssize_t dock_show(struct device *dev, struct device_attribute *attr, char *buf) { return sysfs_emit(buf, "%s\n", - dev->physical_location->dock ? "yes" : "no"); + str_yes_no(dev->physical_location->dock)); } static DEVICE_ATTR_RO(dock); @@ -124,7 +125,7 @@ static ssize_t lid_show(struct device *dev, struct device_attribute *attr, char *buf) { return sysfs_emit(buf, "%s\n", - dev->physical_location->lid ? "yes" : "no"); + str_yes_no(dev->physical_location->lid)); } static DEVICE_ATTR_RO(lid); From 7de24e20a7aa83295e567982b0b29f3b53152759 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 14 Jan 2025 22:25:14 +0100 Subject: [PATCH 15/42] cxl/port: Constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/20250114-sysfs-const-bin_attr-cxl-v1-1-5afa23fe2a52@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- drivers/cxl/port.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index d2bfd1ff54924..a35fc55528459 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -153,7 +153,7 @@ static int cxl_port_probe(struct device *dev) } static ssize_t CDAT_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, char *buf, + const struct bin_attribute *bin_attr, char *buf, loff_t offset, size_t count) { struct device *dev = kobj_to_dev(kobj); @@ -170,7 +170,7 @@ static ssize_t CDAT_read(struct file *filp, struct kobject *kobj, port->cdat.length); } -static BIN_ATTR_ADMIN_RO(CDAT, 0); +static const BIN_ATTR_ADMIN_RO(CDAT, 0); static umode_t cxl_port_bin_attr_is_visible(struct kobject *kobj, const struct bin_attribute *attr, int i) @@ -184,13 +184,13 @@ static umode_t cxl_port_bin_attr_is_visible(struct kobject *kobj, return 0; } -static struct bin_attribute *cxl_cdat_bin_attributes[] = { +static const struct bin_attribute *const cxl_cdat_bin_attributes[] = { &bin_attr_CDAT, NULL, }; -static struct attribute_group cxl_cdat_attribute_group = { - .bin_attrs = cxl_cdat_bin_attributes, +static const struct attribute_group cxl_cdat_attribute_group = { + .bin_attrs_new = cxl_cdat_bin_attributes, .is_bin_visible = cxl_port_bin_attr_is_visible, }; From 14e694dbf285e74af97d50212e3272d0d22ea653 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sat, 25 Jan 2025 11:12:14 +0100 Subject: [PATCH 16/42] firmware: dmi: Mark bin_attributes as __ro_after_init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The attributes are only modified during the __init phase. Protect them against accidental or intentional modifications afterwards. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20250125-sysfs-const-bin_attr-dmi-v2-1-ece1895936f4@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/dmi_scan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index fde0656481cc9..70d39adf50dca 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -761,8 +761,8 @@ static void __init dmi_scan_machine(void) pr_info("DMI not present or invalid.\n"); } -static BIN_ATTR_SIMPLE_ADMIN_RO(smbios_entry_point); -static BIN_ATTR_SIMPLE_ADMIN_RO(DMI); +static __ro_after_init BIN_ATTR_SIMPLE_ADMIN_RO(smbios_entry_point); +static __ro_after_init BIN_ATTR_SIMPLE_ADMIN_RO(DMI); static int __init dmi_init(void) { From 80d3989b9ce3b05f7a12cc5c4c8c565fc4ed88f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sat, 25 Jan 2025 11:12:15 +0100 Subject: [PATCH 17/42] firmware: dmi: Define bin_attributes through macro MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The macro makes the code shorter and simplifies constification of the callback arguments. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20250125-sysfs-const-bin_attr-dmi-v2-2-ece1895936f4@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/dmi-sysfs.c | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/drivers/firmware/dmi-sysfs.c b/drivers/firmware/dmi-sysfs.c index 8d91997036e4c..6baa921a6664d 100644 --- a/drivers/firmware/dmi-sysfs.c +++ b/drivers/firmware/dmi-sysfs.c @@ -431,9 +431,9 @@ static ssize_t dmi_sel_raw_read_helper(struct dmi_sysfs_entry *entry, } } -static ssize_t dmi_sel_raw_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, - char *buf, loff_t pos, size_t count) +static ssize_t raw_event_log_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t pos, size_t count) { struct dmi_sysfs_entry *entry = to_entry(kobj->parent); struct dmi_read_state state = { @@ -445,10 +445,7 @@ static ssize_t dmi_sel_raw_read(struct file *filp, struct kobject *kobj, return find_dmi_entry(entry, dmi_sel_raw_read_helper, &state); } -static struct bin_attribute dmi_sel_raw_attr = { - .attr = {.name = "raw_event_log", .mode = 0400}, - .read = dmi_sel_raw_read, -}; +static BIN_ATTR_ADMIN_RO(raw_event_log, 0); static int dmi_system_event_log(struct dmi_sysfs_entry *entry) { @@ -464,7 +461,7 @@ static int dmi_system_event_log(struct dmi_sysfs_entry *entry) if (ret) goto out_free; - ret = sysfs_create_bin_file(entry->child, &dmi_sel_raw_attr); + ret = sysfs_create_bin_file(entry->child, &bin_attr_raw_event_log); if (ret) goto out_del; @@ -537,10 +534,10 @@ static ssize_t dmi_entry_raw_read_helper(struct dmi_sysfs_entry *entry, &state->pos, dh, entry_length); } -static ssize_t dmi_entry_raw_read(struct file *filp, - struct kobject *kobj, - struct bin_attribute *bin_attr, - char *buf, loff_t pos, size_t count) +static ssize_t raw_read(struct file *filp, + struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t pos, size_t count) { struct dmi_sysfs_entry *entry = to_entry(kobj); struct dmi_read_state state = { @@ -552,10 +549,7 @@ static ssize_t dmi_entry_raw_read(struct file *filp, return find_dmi_entry(entry, dmi_entry_raw_read_helper, &state); } -static const struct bin_attribute dmi_entry_raw_attr = { - .attr = {.name = "raw", .mode = 0400}, - .read = dmi_entry_raw_read, -}; +static const BIN_ATTR_ADMIN_RO(raw, 0); static void dmi_sysfs_entry_release(struct kobject *kobj) { @@ -630,7 +624,7 @@ static void __init dmi_sysfs_register_handle(const struct dmi_header *dh, goto out_err; /* Create the raw binary file to access the entry */ - *ret = sysfs_create_bin_file(&entry->kobj, &dmi_entry_raw_attr); + *ret = sysfs_create_bin_file(&entry->kobj, &bin_attr_raw); if (*ret) goto out_err; From 1c83b02c91c1cc42c3030788f437fe403c7e3b37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sat, 25 Jan 2025 11:12:16 +0100 Subject: [PATCH 18/42] firmware: dmi: Constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20250125-sysfs-const-bin_attr-dmi-v2-3-ece1895936f4@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/dmi-sysfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/dmi-sysfs.c b/drivers/firmware/dmi-sysfs.c index 6baa921a6664d..9cc963b2edc0e 100644 --- a/drivers/firmware/dmi-sysfs.c +++ b/drivers/firmware/dmi-sysfs.c @@ -432,7 +432,7 @@ static ssize_t dmi_sel_raw_read_helper(struct dmi_sysfs_entry *entry, } static ssize_t raw_event_log_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buf, loff_t pos, size_t count) { struct dmi_sysfs_entry *entry = to_entry(kobj->parent); @@ -445,7 +445,7 @@ static ssize_t raw_event_log_read(struct file *filp, struct kobject *kobj, return find_dmi_entry(entry, dmi_sel_raw_read_helper, &state); } -static BIN_ATTR_ADMIN_RO(raw_event_log, 0); +static const BIN_ATTR_ADMIN_RO(raw_event_log, 0); static int dmi_system_event_log(struct dmi_sysfs_entry *entry) { @@ -536,7 +536,7 @@ static ssize_t dmi_entry_raw_read_helper(struct dmi_sysfs_entry *entry, static ssize_t raw_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buf, loff_t pos, size_t count) { struct dmi_sysfs_entry *entry = to_entry(kobj); From 7787bfb3b0ea62432d3ffcd31eb66daec4b462ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 16 Dec 2024 12:34:47 +0100 Subject: [PATCH 19/42] drm/sysfs: Constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Reviewed-by: Jani Nikula Reviewed-by: Andi Shyti Link: https://lore.kernel.org/r/20241216-sysfs-const-bin_attr-drm-v1-1-210f2b36b9bf@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_sysfs.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c index fb3bbb6adcd16..60c1f26edb6fa 100644 --- a/drivers/gpu/drm/drm_sysfs.c +++ b/drivers/gpu/drm/drm_sysfs.c @@ -261,7 +261,7 @@ static ssize_t enabled_show(struct device *device, } static ssize_t edid_show(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, char *buf, loff_t off, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct device *connector_dev = kobj_to_dev(kobj); @@ -315,21 +315,21 @@ static struct attribute *connector_dev_attrs[] = { NULL }; -static struct bin_attribute edid_attr = { +static const struct bin_attribute edid_attr = { .attr.name = "edid", .attr.mode = 0444, .size = 0, - .read = edid_show, + .read_new = edid_show, }; -static struct bin_attribute *connector_bin_attrs[] = { +static const struct bin_attribute *const connector_bin_attrs[] = { &edid_attr, NULL }; static const struct attribute_group connector_dev_group = { .attrs = connector_dev_attrs, - .bin_attrs = connector_bin_attrs, + .bin_attrs_new = connector_bin_attrs, }; static const struct attribute_group *connector_dev_groups[] = { From cf3864d84fe98b7f7ff37a90156c3fc8f2c0067e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 16 Dec 2024 12:34:48 +0100 Subject: [PATCH 20/42] drm/lima: Constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Reviewed-by: Andi Shyti Link: https://lore.kernel.org/r/20241216-sysfs-const-bin_attr-drm-v1-2-210f2b36b9bf@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/lima/lima_drv.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/lima/lima_drv.c b/drivers/gpu/drm/lima/lima_drv.c index 2067c5b65c579..11ace5cebf4c8 100644 --- a/drivers/gpu/drm/lima/lima_drv.c +++ b/drivers/gpu/drm/lima/lima_drv.c @@ -310,7 +310,7 @@ static bool lima_read_block(struct lima_block_reader *reader, } static ssize_t lima_error_state_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, char *buf, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct device *dev = kobj_to_dev(kobj); @@ -336,7 +336,7 @@ static ssize_t lima_error_state_read(struct file *filp, struct kobject *kobj, } static ssize_t lima_error_state_write(struct file *file, struct kobject *kobj, - struct bin_attribute *attr, char *buf, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct device *dev = kobj_to_dev(kobj); @@ -362,8 +362,8 @@ static const struct bin_attribute lima_error_state_attr = { .attr.name = "error", .attr.mode = 0600, .size = 0, - .read = lima_error_state_read, - .write = lima_error_state_write, + .read_new = lima_error_state_read, + .write_new = lima_error_state_write, }; static int lima_pdev_probe(struct platform_device *pdev) From e3626a456599304f59f827494746940ec025ad6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 16 Dec 2024 12:34:49 +0100 Subject: [PATCH 21/42] drm/i915: Constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Reviewed-by: Andi Shyti Reviewed-by: Jani Nikula Link: https://lore.kernel.org/r/20241216-sysfs-const-bin_attr-drm-v1-3-210f2b36b9bf@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gpu_error.c | 8 ++++---- drivers/gpu/drm/i915/i915_sysfs.c | 12 ++++++------ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 819ab933bb105..a6613eed33980 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -2490,7 +2490,7 @@ void i915_gpu_error_debugfs_register(struct drm_i915_private *i915) } static ssize_t error_state_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, char *buf, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { @@ -2526,7 +2526,7 @@ static ssize_t error_state_read(struct file *filp, struct kobject *kobj, } static ssize_t error_state_write(struct file *file, struct kobject *kobj, - struct bin_attribute *attr, char *buf, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct device *kdev = kobj_to_dev(kobj); @@ -2542,8 +2542,8 @@ static const struct bin_attribute error_state_attr = { .attr.name = "error", .attr.mode = S_IRUSR | S_IWUSR, .size = 0, - .read = error_state_read, - .write = error_state_write, + .read_new = error_state_read, + .write_new = error_state_write, }; void i915_gpu_error_sysfs_setup(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 8775beab9cb84..f936e8f1f1294 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -60,7 +60,7 @@ static int l3_access_valid(struct drm_i915_private *i915, loff_t offset) static ssize_t i915_l3_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, char *buf, + const struct bin_attribute *attr, char *buf, loff_t offset, size_t count) { struct device *kdev = kobj_to_dev(kobj); @@ -88,7 +88,7 @@ i915_l3_read(struct file *filp, struct kobject *kobj, static ssize_t i915_l3_write(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, char *buf, + const struct bin_attribute *attr, char *buf, loff_t offset, size_t count) { struct device *kdev = kobj_to_dev(kobj); @@ -140,8 +140,8 @@ i915_l3_write(struct file *filp, struct kobject *kobj, static const struct bin_attribute dpf_attrs = { .attr = {.name = "l3_parity", .mode = (S_IRUSR | S_IWUSR)}, .size = GEN7_L3LOG_SIZE, - .read = i915_l3_read, - .write = i915_l3_write, + .read_new = i915_l3_read, + .write_new = i915_l3_write, .mmap = NULL, .private = (void *)0 }; @@ -149,8 +149,8 @@ static const struct bin_attribute dpf_attrs = { static const struct bin_attribute dpf_attrs_1 = { .attr = {.name = "l3_parity_slice_1", .mode = (S_IRUSR | S_IWUSR)}, .size = GEN7_L3LOG_SIZE, - .read = i915_l3_read, - .write = i915_l3_write, + .read_new = i915_l3_read, + .write_new = i915_l3_write, .mmap = NULL, .private = (void *)1 }; From 2d0f5001b61c4831d413d12c10caed0e99d73b25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 16 Dec 2024 12:34:50 +0100 Subject: [PATCH 22/42] drm/amdgpu: Constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Reviewed-by: Alex Deucher Link: https://lore.kernel.org/r/20241216-sysfs-const-bin_attr-drm-v1-4-210f2b36b9bf@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 14 +++++++------- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 13 ++++++------- 3 files changed, 16 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d100bb7a137cd..e6fa63f97687a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -225,7 +225,7 @@ static DEVICE_ATTR(pcie_replay_count, 0444, amdgpu_device_get_pcie_replay_count, NULL); static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj, - struct bin_attribute *attr, char *buf, + const struct bin_attribute *attr, char *buf, loff_t ppos, size_t count) { struct device *dev = kobj_to_dev(kobj); @@ -261,8 +261,8 @@ static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj, return bytes_read; } -BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL, - AMDGPU_SYS_REG_STATE_END); +static const BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL, + AMDGPU_SYS_REG_STATE_END); int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index e5fc80ed06eae..bb02846797eb7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -4000,7 +4000,7 @@ int is_psp_fw_valid(struct psp_bin_desc bin) } static ssize_t amdgpu_psp_vbflash_write(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buffer, loff_t pos, size_t count) { struct device *dev = kobj_to_dev(kobj); @@ -4036,7 +4036,7 @@ static ssize_t amdgpu_psp_vbflash_write(struct file *filp, struct kobject *kobj, } static ssize_t amdgpu_psp_vbflash_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, char *buffer, + const struct bin_attribute *bin_attr, char *buffer, loff_t pos, size_t count) { struct device *dev = kobj_to_dev(kobj); @@ -4088,11 +4088,11 @@ static ssize_t amdgpu_psp_vbflash_read(struct file *filp, struct kobject *kobj, * Writing to this file will stage an IFWI for update. Reading from this file * will trigger the update process. */ -static struct bin_attribute psp_vbflash_bin_attr = { +static const struct bin_attribute psp_vbflash_bin_attr = { .attr = {.name = "psp_vbflash", .mode = 0660}, .size = 0, - .write = amdgpu_psp_vbflash_write, - .read = amdgpu_psp_vbflash_read, + .write_new = amdgpu_psp_vbflash_write, + .read_new = amdgpu_psp_vbflash_read, }; /** @@ -4119,7 +4119,7 @@ static ssize_t amdgpu_psp_vbflash_status(struct device *dev, } static DEVICE_ATTR(psp_vbflash_status, 0440, amdgpu_psp_vbflash_status, NULL); -static struct bin_attribute *bin_flash_attrs[] = { +static const struct bin_attribute *const bin_flash_attrs[] = { &psp_vbflash_bin_attr, NULL }; @@ -4155,7 +4155,7 @@ static umode_t amdgpu_bin_flash_attr_is_visible(struct kobject *kobj, const struct attribute_group amdgpu_flash_attr_group = { .attrs = flash_attrs, - .bin_attrs = bin_flash_attrs, + .bin_attrs_new = bin_flash_attrs, .is_bin_visible = amdgpu_bin_flash_attr_is_visible, .is_visible = amdgpu_flash_attr_is_visible, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index f0924aa3f4e48..83a5f7180cde1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1733,7 +1733,7 @@ static char *amdgpu_ras_badpage_flags_str(unsigned int flags) */ static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f, - struct kobject *kobj, struct bin_attribute *attr, + struct kobject *kobj, const struct bin_attribute *attr, char *buf, loff_t ppos, size_t count) { struct amdgpu_ras *con = @@ -2065,8 +2065,8 @@ void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev) /* debugfs end */ /* ras fs */ -static BIN_ATTR(gpu_vram_bad_pages, S_IRUGO, - amdgpu_ras_sysfs_badpages_read, NULL, 0); +static const BIN_ATTR(gpu_vram_bad_pages, S_IRUGO, + amdgpu_ras_sysfs_badpages_read, NULL, 0); static DEVICE_ATTR(features, S_IRUGO, amdgpu_ras_sysfs_features_read, NULL); static DEVICE_ATTR(version, 0444, @@ -2088,7 +2088,7 @@ static int amdgpu_ras_fs_init(struct amdgpu_device *adev) &con->event_state_attr.attr, NULL }; - struct bin_attribute *bin_attrs[] = { + const struct bin_attribute *bin_attrs[] = { NULL, NULL, }; @@ -2114,11 +2114,10 @@ static int amdgpu_ras_fs_init(struct amdgpu_device *adev) if (amdgpu_bad_page_threshold != 0) { /* add bad_page_features entry */ - bin_attr_gpu_vram_bad_pages.private = NULL; con->badpages_attr = bin_attr_gpu_vram_bad_pages; + sysfs_bin_attr_init(&con->badpages_attr); bin_attrs[0] = &con->badpages_attr; - group.bin_attrs = bin_attrs; - sysfs_bin_attr_init(bin_attrs[0]); + group.bin_attrs_new = bin_attrs; } r = sysfs_create_group(&adev->dev->kobj, &group); From 600aa8d31af9bf46c62ca0375cc2abb4f1d20c8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 16 Dec 2024 12:34:51 +0100 Subject: [PATCH 23/42] drm/amd/display: Constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Reviewed-by: Harry Wentland Link: https://lore.kernel.org/r/20241216-sysfs-const-bin_attr-drm-v1-5-210f2b36b9bf@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index e339c7a8d541c..e27d077396327 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -614,7 +614,7 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) * incorrect/corrupted and we should correct our SRM by getting it from PSP */ static ssize_t srm_data_write(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, char *buffer, + const struct bin_attribute *bin_attr, char *buffer, loff_t pos, size_t count) { struct hdcp_workqueue *work; @@ -638,7 +638,7 @@ static ssize_t srm_data_write(struct file *filp, struct kobject *kobj, } static ssize_t srm_data_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, char *buffer, + const struct bin_attribute *bin_attr, char *buffer, loff_t pos, size_t count) { struct hdcp_workqueue *work; @@ -698,8 +698,8 @@ static ssize_t srm_data_read(struct file *filp, struct kobject *kobj, static const struct bin_attribute data_attr = { .attr = {.name = "hdcp_srm", .mode = 0664}, .size = PSP_HDCP_SRM_FIRST_GEN_MAX_SIZE, /* Limit SRM size */ - .write = srm_data_write, - .read = srm_data_read, + .write_new = srm_data_write, + .read_new = srm_data_read, }; struct hdcp_workqueue *hdcp_create_workqueue(struct amdgpu_device *adev, From 4ab0279857bb0b1c7a1ed61186527e33db693b6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sun, 15 Dec 2024 15:10:53 +0100 Subject: [PATCH 24/42] fsi: core: Use const 'struct bin_attribute' callbacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now provides callback variants that explicitly take a const pointer. Make use of it to match the attribute definition. Signed-off-by: Thomas Weißschuh Reviewed-by: Eddie James Link: https://lore.kernel.org/r/20241215-sysfs-const-bin_attr-fsi-v1-1-b717f76a0146@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- drivers/fsi/fsi-core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/fsi/fsi-core.c b/drivers/fsi/fsi-core.c index e2e1e9df61154..50e8736039fe6 100644 --- a/drivers/fsi/fsi-core.c +++ b/drivers/fsi/fsi-core.c @@ -554,7 +554,7 @@ static unsigned long aligned_access_size(size_t offset, size_t count) } static ssize_t fsi_slave_sysfs_raw_read(struct file *file, - struct kobject *kobj, struct bin_attribute *attr, char *buf, + struct kobject *kobj, const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct fsi_slave *slave = to_fsi_slave(kobj_to_dev(kobj)); @@ -581,7 +581,7 @@ static ssize_t fsi_slave_sysfs_raw_read(struct file *file, } static ssize_t fsi_slave_sysfs_raw_write(struct file *file, - struct kobject *kobj, struct bin_attribute *attr, + struct kobject *kobj, const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct fsi_slave *slave = to_fsi_slave(kobj_to_dev(kobj)); @@ -613,8 +613,8 @@ static const struct bin_attribute fsi_slave_raw_attr = { .mode = 0600, }, .size = 0, - .read = fsi_slave_sysfs_raw_read, - .write = fsi_slave_sysfs_raw_write, + .read_new = fsi_slave_sysfs_raw_read, + .write_new = fsi_slave_sysfs_raw_write, }; static void fsi_slave_release(struct device *dev) From f800cc58598eb4564cabcc4129d5d1a6f7f598b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 16 Dec 2024 13:25:12 +0100 Subject: [PATCH 25/42] accel/habanalabs: constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Reviewed-by: Jani Nikula Link: https://lore.kernel.org/r/20241216-sysfs-const-bin_attr-habanalabs-v1-1-b35463197efb@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- drivers/accel/habanalabs/common/sysfs.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/accel/habanalabs/common/sysfs.c b/drivers/accel/habanalabs/common/sysfs.c index e9f8ccc0bbf9d..9d58efa2ff380 100644 --- a/drivers/accel/habanalabs/common/sysfs.c +++ b/drivers/accel/habanalabs/common/sysfs.c @@ -368,7 +368,7 @@ static ssize_t max_power_store(struct device *dev, } static ssize_t eeprom_read_handler(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, char *buf, loff_t offset, + const struct bin_attribute *attr, char *buf, loff_t offset, size_t max_size) { struct device *dev = kobj_to_dev(kobj); @@ -443,10 +443,10 @@ static DEVICE_ATTR_RO(security_enabled); static DEVICE_ATTR_RO(module_id); static DEVICE_ATTR_RO(parent_device); -static struct bin_attribute bin_attr_eeprom = { +static const struct bin_attribute bin_attr_eeprom = { .attr = {.name = "eeprom", .mode = (0444)}, .size = PAGE_SIZE, - .read = eeprom_read_handler + .read_new = eeprom_read_handler }; static struct attribute *hl_dev_attrs[] = { @@ -472,14 +472,14 @@ static struct attribute *hl_dev_attrs[] = { NULL, }; -static struct bin_attribute *hl_dev_bin_attrs[] = { +static const struct bin_attribute *const hl_dev_bin_attrs[] = { &bin_attr_eeprom, NULL }; static struct attribute_group hl_dev_attr_group = { .attrs = hl_dev_attrs, - .bin_attrs = hl_dev_bin_attrs, + .bin_attrs_new = hl_dev_bin_attrs, }; static struct attribute_group hl_dev_clks_attr_group; From f9c883f0df2765c0cb1db3c306a36787ccb2055a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sun, 22 Dec 2024 21:00:43 +0100 Subject: [PATCH 26/42] Input: goodix-berlin - constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20241222-sysfs-const-bin_attr-input-v1-1-1229dbe5ae71@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- drivers/input/touchscreen/goodix_berlin_core.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/input/touchscreen/goodix_berlin_core.c b/drivers/input/touchscreen/goodix_berlin_core.c index 3fc03cf0ca23f..9b53d98055e93 100644 --- a/drivers/input/touchscreen/goodix_berlin_core.c +++ b/drivers/input/touchscreen/goodix_berlin_core.c @@ -673,7 +673,7 @@ static void goodix_berlin_power_off_act(void *data) } static ssize_t registers_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { struct device *dev = kobj_to_dev(kobj); @@ -686,7 +686,7 @@ static ssize_t registers_read(struct file *filp, struct kobject *kobj, } static ssize_t registers_write(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { struct device *dev = kobj_to_dev(kobj); @@ -698,15 +698,15 @@ static ssize_t registers_write(struct file *filp, struct kobject *kobj, return error ? error : count; } -static BIN_ATTR_ADMIN_RW(registers, 0); +static const BIN_ATTR_ADMIN_RW(registers, 0); -static struct bin_attribute *goodix_berlin_bin_attrs[] = { +static const struct bin_attribute *const goodix_berlin_bin_attrs[] = { &bin_attr_registers, NULL, }; static const struct attribute_group goodix_berlin_attr_group = { - .bin_attrs = goodix_berlin_bin_attrs, + .bin_attrs_new = goodix_berlin_bin_attrs, }; const struct attribute_group *goodix_berlin_groups[] = { From ae7a15fb2920844e61cc71199cd1a08795716c54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sun, 15 Dec 2024 15:15:48 +0100 Subject: [PATCH 27/42] efi/mokvar: Use const 'struct bin_attribute' callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now provides callback variants that explicitly take a const pointer. Use them so the non-const variants can be removed. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20241215-sysfs-const-bin_attr-mokvar-v1-1-d5a3d1fff8d1@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/mokvar-table.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/mokvar-table.c b/drivers/firmware/efi/mokvar-table.c index 5ed0602c2f75f..59b090849a2a8 100644 --- a/drivers/firmware/efi/mokvar-table.c +++ b/drivers/firmware/efi/mokvar-table.c @@ -266,7 +266,7 @@ struct efi_mokvar_table_entry *efi_mokvar_entry_find(const char *name) * amount of data in this mokvar config table entry. */ static ssize_t efi_mokvar_sysfs_read(struct file *file, struct kobject *kobj, - struct bin_attribute *bin_attr, char *buf, + const struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { struct efi_mokvar_table_entry *mokvar_entry = bin_attr->private; @@ -343,7 +343,7 @@ static int __init efi_mokvar_sysfs_init(void) mokvar_sysfs->bin_attr.attr.name = mokvar_entry->name; mokvar_sysfs->bin_attr.attr.mode = 0400; mokvar_sysfs->bin_attr.size = mokvar_entry->data_size; - mokvar_sysfs->bin_attr.read = efi_mokvar_sysfs_read; + mokvar_sysfs->bin_attr.read_new = efi_mokvar_sysfs_read; err = sysfs_create_bin_file(mokvar_kobj, &mokvar_sysfs->bin_attr); From 05a9896fa9e15466456a1b1dc9d2eacdf3551b79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sun, 15 Dec 2024 14:18:59 +0100 Subject: [PATCH 28/42] pcmcia: cistpl: Constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20241215-sysfs-const-bin_attr-pcmcia-v1-1-ebb82e47d834@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- drivers/pcmcia/cistpl.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pcmcia/cistpl.c b/drivers/pcmcia/cistpl.c index d018f36f3a893..0c801e4ccc6c2 100644 --- a/drivers/pcmcia/cistpl.c +++ b/drivers/pcmcia/cistpl.c @@ -1540,7 +1540,7 @@ static ssize_t pccard_extract_cis(struct pcmcia_socket *s, char *buf, static ssize_t pccard_show_cis(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { unsigned int size = 0x200; @@ -1571,7 +1571,7 @@ static ssize_t pccard_show_cis(struct file *filp, struct kobject *kobj, static ssize_t pccard_store_cis(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { struct pcmcia_socket *s; @@ -1605,6 +1605,6 @@ static ssize_t pccard_store_cis(struct file *filp, struct kobject *kobj, const struct bin_attribute pccard_cis_attr = { .attr = { .name = "cis", .mode = S_IRUGO | S_IWUSR }, .size = 0x200, - .read = pccard_show_cis, - .write = pccard_store_cis, + .read_new = pccard_show_cis, + .write_new = pccard_store_cis, }; From 10f10210f674a79b1214abae58077475f5de81b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 16 Dec 2024 05:07:43 +0100 Subject: [PATCH 29/42] powerpc/secvar: Mark __init functions as such MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The setup functions are only called during the init phase of the kernel. They can be discarded and their memory reused after that. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20241216-sysfs-const-bin_attr-powerpc-v1-1-bbed8906f476@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/secvar-sysfs.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/secvar-sysfs.c b/arch/powerpc/kernel/secvar-sysfs.c index fbeb1cbac01b2..b7536fbe8c4f4 100644 --- a/arch/powerpc/kernel/secvar-sysfs.c +++ b/arch/powerpc/kernel/secvar-sysfs.c @@ -130,7 +130,7 @@ static const struct kobj_type secvar_ktype = { .default_groups = secvar_attr_groups, }; -static int update_kobj_size(void) +static __init int update_kobj_size(void) { u64 varsize; @@ -145,7 +145,7 @@ static int update_kobj_size(void) return 0; } -static int secvar_sysfs_config(struct kobject *kobj) +static __init int secvar_sysfs_config(struct kobject *kobj) { struct attribute_group config_group = { .name = "config", @@ -158,7 +158,7 @@ static int secvar_sysfs_config(struct kobject *kobj) return 0; } -static int add_var(const char *name) +static __init int add_var(const char *name) { struct kobject *kobj; int rc; @@ -181,7 +181,7 @@ static int add_var(const char *name) return 0; } -static int secvar_sysfs_load(void) +static __init int secvar_sysfs_load(void) { u64 namesize = 0; char *name; @@ -209,7 +209,7 @@ static int secvar_sysfs_load(void) return rc; } -static int secvar_sysfs_load_static(void) +static __init int secvar_sysfs_load_static(void) { const char * const *name_ptr = secvar_ops->var_names; int rc; @@ -224,7 +224,7 @@ static int secvar_sysfs_load_static(void) return 0; } -static int secvar_sysfs_init(void) +static __init int secvar_sysfs_init(void) { u64 max_size; int rc; From 982d13db108c9a30b98d1eb3445011dbc5616532 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 16 Dec 2024 05:07:44 +0100 Subject: [PATCH 30/42] powerpc/secvar: Constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20241216-sysfs-const-bin_attr-powerpc-v1-2-bbed8906f476@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/secvar-sysfs.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/secvar-sysfs.c b/arch/powerpc/kernel/secvar-sysfs.c index b7536fbe8c4f4..afb690a172b4e 100644 --- a/arch/powerpc/kernel/secvar-sysfs.c +++ b/arch/powerpc/kernel/secvar-sysfs.c @@ -52,7 +52,7 @@ static ssize_t size_show(struct kobject *kobj, struct kobj_attribute *attr, } static ssize_t data_read(struct file *filep, struct kobject *kobj, - struct bin_attribute *attr, char *buf, loff_t off, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { char *data; @@ -85,7 +85,7 @@ static ssize_t data_read(struct file *filep, struct kobject *kobj, } static ssize_t update_write(struct file *filep, struct kobject *kobj, - struct bin_attribute *attr, char *buf, loff_t off, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { int rc; @@ -104,11 +104,11 @@ static struct kobj_attribute format_attr = __ATTR_RO(format); static struct kobj_attribute size_attr = __ATTR_RO(size); -static struct bin_attribute data_attr = __BIN_ATTR_RO(data, 0); +static struct bin_attribute data_attr __ro_after_init = __BIN_ATTR_RO(data, 0); -static struct bin_attribute update_attr = __BIN_ATTR_WO(update, 0); +static struct bin_attribute update_attr __ro_after_init = __BIN_ATTR_WO(update, 0); -static struct bin_attribute *secvar_bin_attrs[] = { +static const struct bin_attribute *const secvar_bin_attrs[] = { &data_attr, &update_attr, NULL, @@ -121,7 +121,7 @@ static struct attribute *secvar_attrs[] = { static const struct attribute_group secvar_attr_group = { .attrs = secvar_attrs, - .bin_attrs = secvar_bin_attrs, + .bin_attrs_new = secvar_bin_attrs, }; __ATTRIBUTE_GROUPS(secvar_attr); From f629576662e024f57cb2c8d4ca6f297db9b904a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 16 Dec 2024 05:07:45 +0100 Subject: [PATCH 31/42] powerpc/powernv/ultravisor: Constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20241216-sysfs-const-bin_attr-powerpc-v1-3-bbed8906f476@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/powernv/ultravisor.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/powernv/ultravisor.c b/arch/powerpc/platforms/powernv/ultravisor.c index 67c8c4b2d8b17..157d9a8134e44 100644 --- a/arch/powerpc/platforms/powernv/ultravisor.c +++ b/arch/powerpc/platforms/powernv/ultravisor.c @@ -32,15 +32,15 @@ int __init early_init_dt_scan_ultravisor(unsigned long node, const char *uname, static struct memcons *uv_memcons; static ssize_t uv_msglog_read(struct file *file, struct kobject *kobj, - struct bin_attribute *bin_attr, char *to, + const struct bin_attribute *bin_attr, char *to, loff_t pos, size_t count) { return memcons_copy(uv_memcons, to, pos, count); } -static struct bin_attribute uv_msglog_attr = { +static struct bin_attribute uv_msglog_attr __ro_after_init = { .attr = {.name = "msglog", .mode = 0400}, - .read = uv_msglog_read + .read_new = uv_msglog_read }; static int __init uv_init(void) From f2b62c03a28279cbeeb34d12037740a7e7703a6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 16 Dec 2024 05:07:46 +0100 Subject: [PATCH 32/42] powerpc/powernv/opal: Constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20241216-sysfs-const-bin_attr-powerpc-v1-4-bbed8906f476@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/powernv/opal-core.c | 10 +++++----- arch/powerpc/platforms/powernv/opal-dump.c | 4 ++-- arch/powerpc/platforms/powernv/opal-elog.c | 4 ++-- arch/powerpc/platforms/powernv/opal-flash.c | 4 ++-- arch/powerpc/platforms/powernv/opal-msglog.c | 6 +++--- 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal-core.c b/arch/powerpc/platforms/powernv/opal-core.c index c9a9b759cc928..d95a5f67211b7 100644 --- a/arch/powerpc/platforms/powernv/opal-core.c +++ b/arch/powerpc/platforms/powernv/opal-core.c @@ -159,7 +159,7 @@ static Elf64_Word *__init auxv_to_elf64_notes(Elf64_Word *buf, * Returns number of bytes read on success, -errno on failure. */ static ssize_t read_opalcore(struct file *file, struct kobject *kobj, - struct bin_attribute *bin_attr, char *to, + const struct bin_attribute *bin_attr, char *to, loff_t pos, size_t count) { struct opalcore *m; @@ -206,9 +206,9 @@ static ssize_t read_opalcore(struct file *file, struct kobject *kobj, return (tpos - pos); } -static struct bin_attribute opal_core_attr = { +static struct bin_attribute opal_core_attr __ro_after_init = { .attr = {.name = "core", .mode = 0400}, - .read = read_opalcore + .read_new = read_opalcore }; /* @@ -599,7 +599,7 @@ static struct attribute *mpipl_attr[] = { NULL, }; -static struct bin_attribute *mpipl_bin_attr[] = { +static const struct bin_attribute *const mpipl_bin_attr[] = { &opal_core_attr, NULL, @@ -607,7 +607,7 @@ static struct bin_attribute *mpipl_bin_attr[] = { static const struct attribute_group mpipl_group = { .attrs = mpipl_attr, - .bin_attrs = mpipl_bin_attr, + .bin_attrs_new = mpipl_bin_attr, }; static int __init opalcore_init(void) diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c index 608e4b68c5ea9..27e25693cf399 100644 --- a/arch/powerpc/platforms/powernv/opal-dump.c +++ b/arch/powerpc/platforms/powernv/opal-dump.c @@ -286,7 +286,7 @@ static int64_t dump_read_data(struct dump_obj *dump) } static ssize_t dump_attr_read(struct file *filep, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buffer, loff_t pos, size_t count) { ssize_t rc; @@ -342,7 +342,7 @@ static void create_dump_obj(uint32_t id, size_t size, uint32_t type) dump->dump_attr.attr.name = "dump"; dump->dump_attr.attr.mode = 0400; dump->dump_attr.size = size; - dump->dump_attr.read = dump_attr_read; + dump->dump_attr.read_new = dump_attr_read; dump->id = id; dump->size = size; diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index 5db1e733143bf..de33f354e9fdd 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -156,7 +156,7 @@ static const struct kobj_type elog_ktype = { #define OPAL_MAX_ERRLOG_SIZE 16384 static ssize_t raw_attr_read(struct file *filep, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buffer, loff_t pos, size_t count) { int opal_rc; @@ -203,7 +203,7 @@ static void create_elog_obj(uint64_t id, size_t size, uint64_t type) elog->raw_attr.attr.name = "raw"; elog->raw_attr.attr.mode = 0400; elog->raw_attr.size = size; - elog->raw_attr.read = raw_attr_read; + elog->raw_attr.read_new = raw_attr_read; elog->id = id; elog->size = size; diff --git a/arch/powerpc/platforms/powernv/opal-flash.c b/arch/powerpc/platforms/powernv/opal-flash.c index d5ea04e8e4c52..fd8c8621e9734 100644 --- a/arch/powerpc/platforms/powernv/opal-flash.c +++ b/arch/powerpc/platforms/powernv/opal-flash.c @@ -432,7 +432,7 @@ static int alloc_image_buf(char *buffer, size_t count) * and pre-allocate required memory. */ static ssize_t image_data_write(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buffer, loff_t pos, size_t count) { int rc; @@ -493,7 +493,7 @@ static ssize_t image_data_write(struct file *filp, struct kobject *kobj, static const struct bin_attribute image_data_attr = { .attr = {.name = "image", .mode = 0200}, .size = MAX_IMAGE_SIZE, /* Limit image size */ - .write = image_data_write, + .write_new = image_data_write, }; static struct kobj_attribute validate_attribute = diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c b/arch/powerpc/platforms/powernv/opal-msglog.c index 22d6efe17b0d0..f1988d0ab45ce 100644 --- a/arch/powerpc/platforms/powernv/opal-msglog.c +++ b/arch/powerpc/platforms/powernv/opal-msglog.c @@ -94,15 +94,15 @@ ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count) } static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj, - struct bin_attribute *bin_attr, char *to, + const struct bin_attribute *bin_attr, char *to, loff_t pos, size_t count) { return opal_msglog_copy(to, pos, count); } -static struct bin_attribute opal_msglog_attr = { +static struct bin_attribute opal_msglog_attr __ro_after_init = { .attr = {.name = "msglog", .mode = 0400}, - .read = opal_msglog_read + .read_new = opal_msglog_read }; struct memcons *__init memcons_init(struct device_node *node, const char *mc_prop_name) From 4aad348d0fa62f816ae3e9dfbfc6663443357c0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 16 Dec 2024 05:07:47 +0100 Subject: [PATCH 33/42] powerpc/perf/hv-24x7: Constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20241216-sysfs-const-bin_attr-powerpc-v1-5-bbed8906f476@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/perf/hv-24x7.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index d400fa391c276..b0768f3d28930 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -998,7 +998,7 @@ static int create_events_from_catalog(struct attribute ***events_, } static ssize_t catalog_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, char *buf, + const struct bin_attribute *bin_attr, char *buf, loff_t offset, size_t count) { long hret; @@ -1108,14 +1108,14 @@ PAGE_0_ATTR(catalog_version, "%lld\n", (unsigned long long)be64_to_cpu(page_0->version)); PAGE_0_ATTR(catalog_len, "%lld\n", (unsigned long long)be32_to_cpu(page_0->length) * 4096); -static BIN_ATTR_RO(catalog, 0/* real length varies */); +static const BIN_ATTR_RO(catalog, 0/* real length varies */); static DEVICE_ATTR_RO(domains); static DEVICE_ATTR_RO(sockets); static DEVICE_ATTR_RO(chipspersocket); static DEVICE_ATTR_RO(coresperchip); static DEVICE_ATTR_RO(cpumask); -static struct bin_attribute *if_bin_attrs[] = { +static const struct bin_attribute *const if_bin_attrs[] = { &bin_attr_catalog, NULL, }; @@ -1141,7 +1141,7 @@ static struct attribute *if_attrs[] = { static const struct attribute_group if_group = { .name = "interface", - .bin_attrs = if_bin_attrs, + .bin_attrs_new = if_bin_attrs, .attrs = if_attrs, }; From 80f756cabfbf81d0d629b45eb8e3f9f0196728d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 14 Jan 2025 22:50:20 +0100 Subject: [PATCH 34/42] firmware: qemu_fw_cfg: constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Acked-by: Gabriel Somlo Acked-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/20250114-sysfs-const-bin_attr-qemu_fw_cfg-v1-1-76f525a3ee72@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/qemu_fw_cfg.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c index d58da3e4500a5..2615fb780e3c4 100644 --- a/drivers/firmware/qemu_fw_cfg.c +++ b/drivers/firmware/qemu_fw_cfg.c @@ -460,7 +460,7 @@ static const struct kobj_type fw_cfg_sysfs_entry_ktype = { /* raw-read method and attribute */ static ssize_t fw_cfg_sysfs_read_raw(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buf, loff_t pos, size_t count) { struct fw_cfg_sysfs_entry *entry = to_entry(kobj); @@ -474,9 +474,9 @@ static ssize_t fw_cfg_sysfs_read_raw(struct file *filp, struct kobject *kobj, return fw_cfg_read_blob(entry->select, buf, pos, count); } -static struct bin_attribute fw_cfg_sysfs_attr_raw = { +static const struct bin_attribute fw_cfg_sysfs_attr_raw = { .attr = { .name = "raw", .mode = S_IRUSR }, - .read = fw_cfg_sysfs_read_raw, + .read_new = fw_cfg_sysfs_read_raw, }; /* From 5d0fbf548cbfcd1d7559b2daddedbeaa212d477f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 16 Dec 2024 13:22:54 +0100 Subject: [PATCH 35/42] rapidio: constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20241216-sysfs-const-bin_attr-rapidio-v1-1-0f47f4719683@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- drivers/rapidio/rio-sysfs.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/rapidio/rio-sysfs.c b/drivers/rapidio/rio-sysfs.c index 90d3912105338..6f89b232f1d5d 100644 --- a/drivers/rapidio/rio-sysfs.c +++ b/drivers/rapidio/rio-sysfs.c @@ -114,7 +114,7 @@ static struct attribute *rio_dev_attrs[] = { static ssize_t rio_read_config(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { struct rio_dev *dev = to_rio_dev(kobj_to_dev(kobj)); @@ -185,7 +185,7 @@ rio_read_config(struct file *filp, struct kobject *kobj, static ssize_t rio_write_config(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { struct rio_dev *dev = to_rio_dev(kobj_to_dev(kobj)); @@ -241,17 +241,17 @@ rio_write_config(struct file *filp, struct kobject *kobj, return count; } -static struct bin_attribute rio_config_attr = { +static const struct bin_attribute rio_config_attr = { .attr = { .name = "config", .mode = S_IRUGO | S_IWUSR, }, .size = RIO_MAINT_SPACE_SZ, - .read = rio_read_config, - .write = rio_write_config, + .read_new = rio_read_config, + .write_new = rio_write_config, }; -static struct bin_attribute *rio_dev_bin_attrs[] = { +static const struct bin_attribute *const rio_dev_bin_attrs[] = { &rio_config_attr, NULL, }; @@ -278,7 +278,7 @@ static umode_t rio_dev_is_attr_visible(struct kobject *kobj, static const struct attribute_group rio_dev_group = { .attrs = rio_dev_attrs, .is_visible = rio_dev_is_attr_visible, - .bin_attrs = rio_dev_bin_attrs, + .bin_attrs_new = rio_dev_bin_attrs, }; const struct attribute_group *rio_dev_groups[] = { From e965efc4aa14d9195d26a956a6bff5041110a155 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Fri, 22 Nov 2024 11:31:15 +0100 Subject: [PATCH 36/42] efi: rci2: mark bin_attribute as __ro_after_init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The attribute is only modified during __init phase. Protect it against accidental or intentional modifications afterwards. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20241122-sysfs-const-bin_attr-rci2-v1-1-3db1ec9aa203@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/rci2-table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/rci2-table.c b/drivers/firmware/efi/rci2-table.c index 4fd45d6f69a4d..c1bedd244817b 100644 --- a/drivers/firmware/efi/rci2-table.c +++ b/drivers/firmware/efi/rci2-table.c @@ -40,7 +40,7 @@ static u8 *rci2_base; static u32 rci2_table_len; unsigned long rci2_table_phys __ro_after_init = EFI_INVALID_TABLE_ADDR; -static BIN_ATTR_SIMPLE_ADMIN_RO(rci2); +static __ro_after_init BIN_ATTR_SIMPLE_ADMIN_RO(rci2); static u16 checksum(void) { From c5020c5be9d266f66fa5ba3286f0e8d2d2265970 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 21 Feb 2025 09:42:32 +0100 Subject: [PATCH 37/42] kernfs: Move dput() outside of the RCU section. Al Viro pointed out that dput() might sleep and must not be invoked within an RCU section. Keep only find_next_ancestor() winthin the RCU section. Correct the wording in the comment. Fixes: 6ef5b6fae3040 ("kernfs: Drop kernfs_rwsem while invoking lookup_positive_unlocked().") Reported-by: Al Viro Signed-off-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/20250221084232.xksA_IQ4@linutronix.de Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/mount.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index f1cea282aae32..5124e196c2bfd 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -222,17 +222,17 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn, root = kernfs_root(kn); /* * As long as kn is valid, its parent can not vanish. This is cgroup's - * kn so it not have its parent replaced. Therefore it is safe to use + * kn so it can't have its parent replaced. Therefore it is safe to use * the ancestor node outside of the RCU or locked section. */ if (WARN_ON_ONCE(!(root->flags & KERNFS_ROOT_INVARIANT_PARENT))) return ERR_PTR(-EINVAL); scoped_guard(rcu) { knparent = find_next_ancestor(kn, NULL); - if (WARN_ON(!knparent)) { - dput(dentry); - return ERR_PTR(-EINVAL); - } + } + if (WARN_ON(!knparent)) { + dput(dentry); + return ERR_PTR(-EINVAL); } do { From 354fd6e86fac60b7c1ce2e6c83d4e6bf8af95f59 Mon Sep 17 00:00:00 2001 From: Fiona Behrens Date: Mon, 17 Feb 2025 21:58:14 +0100 Subject: [PATCH 38/42] rust: io: rename `io::Io` accessors Rename the I/O accessors provided by `Io` to encode the type as number instead of letter. This is in preparation for Port I/O support to use a trait for generic accessors. Add a `c_fn` argument to the accessor generation macro to translate between rust and C names. Suggested-by: Danilo Krummrich Link: https://rust-for-linux.zulipchat.com/#narrow/channel/288089-General/topic/PIO.20support/near/499460541 Signed-off-by: Fiona Behrens Acked-by: Danilo Krummrich Acked-by: Daniel Almeida Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20250217-io-generic-rename-v1-1-06d97a9e3179@kloenk.dev Signed-off-by: Greg Kroah-Hartman --- rust/kernel/io.rs | 66 ++++++++++++++++----------------- samples/rust/rust_driver_pci.rs | 12 +++--- 2 files changed, 39 insertions(+), 39 deletions(-) diff --git a/rust/kernel/io.rs b/rust/kernel/io.rs index d4a73e52e3ee6..72d80a6f131e3 100644 --- a/rust/kernel/io.rs +++ b/rust/kernel/io.rs @@ -98,9 +98,9 @@ impl IoRaw { ///# fn no_run() -> Result<(), Error> { /// // SAFETY: Invalid usage for example purposes. /// let iomem = unsafe { IoMem::<{ core::mem::size_of::() }>::new(0xBAAAAAAD)? }; -/// iomem.writel(0x42, 0x0); -/// assert!(iomem.try_writel(0x42, 0x0).is_ok()); -/// assert!(iomem.try_writel(0x42, 0x4).is_err()); +/// iomem.write32(0x42, 0x0); +/// assert!(iomem.try_write32(0x42, 0x0).is_ok()); +/// assert!(iomem.try_write32(0x42, 0x4).is_err()); /// # Ok(()) /// # } /// ``` @@ -108,7 +108,7 @@ impl IoRaw { pub struct Io(IoRaw); macro_rules! define_read { - ($(#[$attr:meta])* $name:ident, $try_name:ident, $type_name:ty) => { + ($(#[$attr:meta])* $name:ident, $try_name:ident, $c_fn:ident -> $type_name:ty) => { /// Read IO data from a given offset known at compile time. /// /// Bound checks are performed on compile time, hence if the offset is not known at compile @@ -119,7 +119,7 @@ macro_rules! define_read { let addr = self.io_addr_assert::<$type_name>(offset); // SAFETY: By the type invariant `addr` is a valid address for MMIO operations. - unsafe { bindings::$name(addr as _) } + unsafe { bindings::$c_fn(addr as _) } } /// Read IO data from a given offset. @@ -131,13 +131,13 @@ macro_rules! define_read { let addr = self.io_addr::<$type_name>(offset)?; // SAFETY: By the type invariant `addr` is a valid address for MMIO operations. - Ok(unsafe { bindings::$name(addr as _) }) + Ok(unsafe { bindings::$c_fn(addr as _) }) } }; } macro_rules! define_write { - ($(#[$attr:meta])* $name:ident, $try_name:ident, $type_name:ty) => { + ($(#[$attr:meta])* $name:ident, $try_name:ident, $c_fn:ident <- $type_name:ty) => { /// Write IO data from a given offset known at compile time. /// /// Bound checks are performed on compile time, hence if the offset is not known at compile @@ -148,7 +148,7 @@ macro_rules! define_write { let addr = self.io_addr_assert::<$type_name>(offset); // SAFETY: By the type invariant `addr` is a valid address for MMIO operations. - unsafe { bindings::$name(value, addr as _, ) } + unsafe { bindings::$c_fn(value, addr as _, ) } } /// Write IO data from a given offset. @@ -160,7 +160,7 @@ macro_rules! define_write { let addr = self.io_addr::<$type_name>(offset)?; // SAFETY: By the type invariant `addr` is a valid address for MMIO operations. - unsafe { bindings::$name(value, addr as _) } + unsafe { bindings::$c_fn(value, addr as _) } Ok(()) } }; @@ -218,43 +218,43 @@ impl Io { self.addr() + offset } - define_read!(readb, try_readb, u8); - define_read!(readw, try_readw, u16); - define_read!(readl, try_readl, u32); + define_read!(read8, try_read8, readb -> u8); + define_read!(read16, try_read16, readw -> u16); + define_read!(read32, try_read32, readl -> u32); define_read!( #[cfg(CONFIG_64BIT)] - readq, - try_readq, - u64 + read64, + try_read64, + readq -> u64 ); - define_read!(readb_relaxed, try_readb_relaxed, u8); - define_read!(readw_relaxed, try_readw_relaxed, u16); - define_read!(readl_relaxed, try_readl_relaxed, u32); + define_read!(read8_relaxed, try_read8_relaxed, readb_relaxed -> u8); + define_read!(read16_relaxed, try_read16_relaxed, readw_relaxed -> u16); + define_read!(read32_relaxed, try_read32_relaxed, readl_relaxed -> u32); define_read!( #[cfg(CONFIG_64BIT)] - readq_relaxed, - try_readq_relaxed, - u64 + read64_relaxed, + try_read64_relaxed, + readq_relaxed -> u64 ); - define_write!(writeb, try_writeb, u8); - define_write!(writew, try_writew, u16); - define_write!(writel, try_writel, u32); + define_write!(write8, try_write8, writeb <- u8); + define_write!(write16, try_write16, writew <- u16); + define_write!(write32, try_write32, writel <- u32); define_write!( #[cfg(CONFIG_64BIT)] - writeq, - try_writeq, - u64 + write64, + try_write64, + writeq <- u64 ); - define_write!(writeb_relaxed, try_writeb_relaxed, u8); - define_write!(writew_relaxed, try_writew_relaxed, u16); - define_write!(writel_relaxed, try_writel_relaxed, u32); + define_write!(write8_relaxed, try_write8_relaxed, writeb_relaxed <- u8); + define_write!(write16_relaxed, try_write16_relaxed, writew_relaxed <- u16); + define_write!(write32_relaxed, try_write32_relaxed, writel_relaxed <- u32); define_write!( #[cfg(CONFIG_64BIT)] - writeq_relaxed, - try_writeq_relaxed, - u64 + write64_relaxed, + try_write64_relaxed, + writeq_relaxed <- u64 ); } diff --git a/samples/rust/rust_driver_pci.rs b/samples/rust/rust_driver_pci.rs index 1fb6e44f33951..ddc52db71a82a 100644 --- a/samples/rust/rust_driver_pci.rs +++ b/samples/rust/rust_driver_pci.rs @@ -43,17 +43,17 @@ kernel::pci_device_table!( impl SampleDriver { fn testdev(index: &TestIndex, bar: &Bar0) -> Result { // Select the test. - bar.writeb(index.0, Regs::TEST); + bar.write8(index.0, Regs::TEST); - let offset = u32::from_le(bar.readl(Regs::OFFSET)) as usize; - let data = bar.readb(Regs::DATA); + let offset = u32::from_le(bar.read32(Regs::OFFSET)) as usize; + let data = bar.read8(Regs::DATA); // Write `data` to `offset` to increase `count` by one. // - // Note that we need `try_writeb`, since `offset` can't be checked at compile-time. - bar.try_writeb(data, offset)?; + // Note that we need `try_write8`, since `offset` can't be checked at compile-time. + bar.try_write8(data, offset)?; - Ok(bar.readl(Regs::COUNT)) + Ok(bar.read32(Regs::COUNT)) } } From 040b17ae0e15bd7100432e0a20c6557d463a8c9f Mon Sep 17 00:00:00 2001 From: Fiona Behrens Date: Mon, 24 Feb 2025 19:36:43 +0100 Subject: [PATCH 39/42] rust: io: fix devres test with new io accessor functions Fix doctest of `Devres` which still used `writeb` instead of `write8`. Fixes: 354fd6e86fac ("rust: io: rename `io::Io` accessors") Signed-off-by: Fiona Behrens Link: https://lore.kernel.org/r/20250224-rust-iowrite-read8-fix-v1-1-c6abee346897@kloenk.dev Signed-off-by: Greg Kroah-Hartman --- rust/kernel/devres.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/devres.rs b/rust/kernel/devres.rs index 942376f6f3af4..ddb1ce4a78d94 100644 --- a/rust/kernel/devres.rs +++ b/rust/kernel/devres.rs @@ -92,7 +92,7 @@ struct DevresInner { /// let devres = Devres::new(&dev, iomem, GFP_KERNEL)?; /// /// let res = devres.try_access().ok_or(ENXIO)?; -/// res.writel(0x42, 0x0); +/// res.write8(0x42, 0x0); /// # Ok(()) /// # } /// ``` From 6853d9d13dbe596a73ae968e6fb27ba9680b2441 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Tue, 25 Feb 2025 16:29:00 -0500 Subject: [PATCH 40/42] rust/faux: Drop #[repr(transparent)] from faux::Registration I think this change got missed during review, we don't need #[repr(transparent)] since Registration just holds a single NonNull. This attribute had originally been added by me when I was still figuring out how the bindings should look like but got committed by mistake. So, just drop it. Signed-off-by: Lyude Paul Cc: Greg Kroah-Hartman Acked-by: Danilo Krummrich Reviewed-by: Fiona Behrens Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250225213112.872264-2-lyude@redhat.com Signed-off-by: Greg Kroah-Hartman --- rust/kernel/faux.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/rust/kernel/faux.rs b/rust/kernel/faux.rs index 5acc0c02d451f..41751403cd868 100644 --- a/rust/kernel/faux.rs +++ b/rust/kernel/faux.rs @@ -19,7 +19,6 @@ use core::ptr::{addr_of_mut, null, null_mut, NonNull}; /// `self.0` always holds a valid pointer to an initialized and registered [`struct faux_device`]. /// /// [`struct faux_device`]: srctree/include/linux/device/faux.h -#[repr(transparent)] pub struct Registration(NonNull); impl Registration { From 95cb0cb546c2892b7a31ff2fce6573f201a214b8 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Thu, 27 Feb 2025 14:35:06 -0500 Subject: [PATCH 41/42] rust/faux: Add missing parent argument to Registration::new() A little late in the review of the faux device interface, we added the ability to specify a parent device when creating new faux devices - but this never got ported over to the rust bindings. So, let's add the missing argument now so we don't have to convert other users later down the line. Signed-off-by: Lyude Paul Cc: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20250227193522.198344-1-lyude@redhat.com Signed-off-by: Greg Kroah-Hartman --- rust/kernel/faux.rs | 13 +++++++++++-- samples/rust/rust_driver_faux.rs | 2 +- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/rust/kernel/faux.rs b/rust/kernel/faux.rs index 41751403cd868..3277f35c3f79b 100644 --- a/rust/kernel/faux.rs +++ b/rust/kernel/faux.rs @@ -23,11 +23,20 @@ pub struct Registration(NonNull); impl Registration { /// Create and register a new faux device with the given name. - pub fn new(name: &CStr) -> Result { + pub fn new(name: &CStr, parent: Option<&device::Device>) -> Result { // SAFETY: // - `name` is copied by this function into its own storage // - `faux_ops` is safe to leave NULL according to the C API - let dev = unsafe { bindings::faux_device_create(name.as_char_ptr(), null_mut(), null()) }; + // - `parent` can be either NULL or a pointer to a `struct device`, and `faux_device_create` + // will take a reference to `parent` using `device_add` - ensuring that it remains valid + // for the lifetime of the faux device. + let dev = unsafe { + bindings::faux_device_create( + name.as_char_ptr(), + parent.map_or(null_mut(), |p| p.as_raw()), + null(), + ) + }; // The above function will return either a valid device, or NULL on failure // INVARIANT: The device will remain registered until faux_device_destroy() is called, which diff --git a/samples/rust/rust_driver_faux.rs b/samples/rust/rust_driver_faux.rs index 048c6cb98b29a..58a3a94121bff 100644 --- a/samples/rust/rust_driver_faux.rs +++ b/samples/rust/rust_driver_faux.rs @@ -20,7 +20,7 @@ impl Module for SampleModule { fn init(_module: &'static ThisModule) -> Result { pr_info!("Initialising Rust Faux Device Sample\n"); - let reg = faux::Registration::new(c_str!("rust-faux-sample-device"))?; + let reg = faux::Registration::new(c_str!("rust-faux-sample-device"), None)?; dev_info!(reg.as_ref(), "Hello from faux device!\n"); From 21b0dc55bed6d9b5dd5d1ad22b75d9d1c7426bbc Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 25 Feb 2025 07:35:46 +0100 Subject: [PATCH 42/42] driver core: faux: only create the device if probe() succeeds It's really hard to know if a faux device properly passes the callback to probe() without having to poke around in the faux_device structure and then clean up. Instead of having to have every user of the api do this logic, just do it in the faux device core itself. This makes the use of a custom probe() callback for a faux device much simpler overall. Suggested-by: Kurt Borja Cc: Rafael J. Wysocki Reviewed-by: Kurt Borja Reviewed-by: Danilo Krummrich Link: https://lore.kernel.org/r/2025022545-unroasted-common-fa0e@gregkh Signed-off-by: Greg Kroah-Hartman --- drivers/base/faux.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/base/faux.c b/drivers/base/faux.c index 531e9d789ee04..407c1d1aad50b 100644 --- a/drivers/base/faux.c +++ b/drivers/base/faux.c @@ -102,7 +102,9 @@ static void faux_device_release(struct device *dev) * * Note, when this function is called, the functions specified in struct * faux_ops can be called before the function returns, so be prepared for - * everything to be properly initialized before that point in time. + * everything to be properly initialized before that point in time. If the + * probe callback (if one is present) does NOT succeed, the creation of the + * device will fail and NULL will be returned. * * Return: * * NULL if an error happened with creating the device @@ -147,6 +149,17 @@ struct faux_device *faux_device_create_with_groups(const char *name, return NULL; } + /* + * Verify that we did bind the driver to the device (i.e. probe worked), + * if not, let's fail the creation as trying to guess if probe was + * successful is almost impossible to determine by the caller. + */ + if (!dev->driver) { + dev_err(dev, "probe did not succeed, tearing down the device\n"); + faux_device_destroy(faux_dev); + faux_dev = NULL; + } + return faux_dev; } EXPORT_SYMBOL_GPL(faux_device_create_with_groups);