From 304a2c4aad0fff887ce493e4197bf9cbaf394479 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 22 Sep 2023 04:36:00 +0000 Subject: [PATCH 0001/1397] locking/ww_mutex/test: Fix potential workqueue corruption [ Upstream commit bccdd808902f8c677317cec47c306e42b93b849e ] In some cases running with the test-ww_mutex code, I was seeing odd behavior where sometimes it seemed flush_workqueue was returning before all the work threads were finished. Often this would cause strange crashes as the mutexes would be freed while they were being used. Looking at the code, there is a lifetime problem as the controlling thread that spawns the work allocates the "struct stress" structures that are passed to the workqueue threads. Then when the workqueue threads are finished, they free the stress struct that was passed to them. Unfortunately the workqueue work_struct node is in the stress struct. Which means the work_struct is freed before the work thread returns and while flush_workqueue is waiting. It seems like a better idea to have the controlling thread both allocate and free the stress structures, so that we can be sure we don't corrupt the workqueue by freeing the structure prematurely. So this patch reworks the test to do so, and with this change I no longer see the early flush_workqueue returns. Signed-off-by: John Stultz Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230922043616.19282-3-jstultz@google.com Signed-off-by: Sasha Levin --- kernel/locking/test-ww_mutex.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c index 93cca6e69860..7c5a8f05497f 100644 --- a/kernel/locking/test-ww_mutex.c +++ b/kernel/locking/test-ww_mutex.c @@ -466,7 +466,6 @@ static void stress_inorder_work(struct work_struct *work) } while (!time_after(jiffies, stress->timeout)); kfree(order); - kfree(stress); } struct reorder_lock { @@ -531,7 +530,6 @@ static void stress_reorder_work(struct work_struct *work) list_for_each_entry_safe(ll, ln, &locks, link) kfree(ll); kfree(order); - kfree(stress); } static void stress_one_work(struct work_struct *work) @@ -552,8 +550,6 @@ static void stress_one_work(struct work_struct *work) break; } } while (!time_after(jiffies, stress->timeout)); - - kfree(stress); } #define STRESS_INORDER BIT(0) @@ -564,15 +560,24 @@ static void stress_one_work(struct work_struct *work) static int stress(int nlocks, int nthreads, unsigned int flags) { struct ww_mutex *locks; - int n; + struct stress *stress_array; + int n, count; locks = kmalloc_array(nlocks, sizeof(*locks), GFP_KERNEL); if (!locks) return -ENOMEM; + stress_array = kmalloc_array(nthreads, sizeof(*stress_array), + GFP_KERNEL); + if (!stress_array) { + kfree(locks); + return -ENOMEM; + } + for (n = 0; n < nlocks; n++) ww_mutex_init(&locks[n], &ww_class); + count = 0; for (n = 0; nthreads; n++) { struct stress *stress; void (*fn)(struct work_struct *work); @@ -596,9 +601,7 @@ static int stress(int nlocks, int nthreads, unsigned int flags) if (!fn) continue; - stress = kmalloc(sizeof(*stress), GFP_KERNEL); - if (!stress) - break; + stress = &stress_array[count++]; INIT_WORK(&stress->work, fn); stress->locks = locks; @@ -613,6 +616,7 @@ static int stress(int nlocks, int nthreads, unsigned int flags) for (n = 0; n < nlocks; n++) ww_mutex_destroy(&locks[n]); + kfree(stress_array); kfree(locks); return 0; From d5e09e385e8abfdbbd9af3e2c83e4e8ca854f2ef Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 12 Sep 2023 13:04:29 +0100 Subject: [PATCH 0002/1397] btrfs: abort transaction on generation mismatch when marking eb as dirty [ Upstream commit 50564b651d01c19ce732819c5b3c3fd60707188e ] When marking an extent buffer as dirty, at btrfs_mark_buffer_dirty(), we check if its generation matches the running transaction and if not we just print a warning. Such mismatch is an indicator that something really went wrong and only printing a warning message (and stack trace) is not enough to prevent a corruption. Allowing a transaction to commit with such an extent buffer will trigger an error if we ever try to read it from disk due to a generation mismatch with its parent generation. So abort the current transaction with -EUCLEAN if we notice a generation mismatch. For this we need to pass a transaction handle to btrfs_mark_buffer_dirty() which is always available except in test code, in which case we can pass NULL since it operates on dummy extent buffers and all test roots have a single node/leaf (root node at level 0). Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/block-group.c | 4 +- fs/btrfs/ctree.c | 109 +++++++++++++++------------ fs/btrfs/ctree.h | 11 ++- fs/btrfs/delayed-inode.c | 2 +- fs/btrfs/dev-replace.c | 2 +- fs/btrfs/dir-item.c | 8 +- fs/btrfs/disk-io.c | 13 +++- fs/btrfs/disk-io.h | 3 +- fs/btrfs/extent-tree.c | 36 +++++---- fs/btrfs/file-item.c | 17 +++-- fs/btrfs/file.c | 34 ++++----- fs/btrfs/free-space-cache.c | 6 +- fs/btrfs/free-space-tree.c | 17 +++-- fs/btrfs/inode-item.c | 16 ++-- fs/btrfs/inode.c | 10 +-- fs/btrfs/ioctl.c | 4 +- fs/btrfs/qgroup.c | 14 ++-- fs/btrfs/relocation.c | 10 +-- fs/btrfs/root-tree.c | 4 +- fs/btrfs/tests/extent-buffer-tests.c | 6 +- fs/btrfs/tests/inode-tests.c | 12 ++- fs/btrfs/tree-log.c | 12 +-- fs/btrfs/uuid-tree.c | 6 +- fs/btrfs/volumes.c | 10 +-- fs/btrfs/xattr.c | 8 +- 25 files changed, 205 insertions(+), 169 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index b2e5107b7cec..5a97db988810 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -2601,7 +2601,7 @@ static int insert_dev_extent(struct btrfs_trans_handle *trans, btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset); btrfs_set_dev_extent_length(leaf, extent, num_bytes); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); out: btrfs_free_path(path); return ret; @@ -3025,7 +3025,7 @@ static int update_block_group_item(struct btrfs_trans_handle *trans, cache->global_root_id); btrfs_set_stack_block_group_flags(&bgi, cache->flags); write_extent_buffer(leaf, &bgi, bi, sizeof(bgi)); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); fail: btrfs_release_path(path); /* diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 617d4827eec2..118ad4d2cbbe 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -359,7 +359,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, return ret; } - btrfs_mark_buffer_dirty(cow); + btrfs_mark_buffer_dirty(trans, cow); *cow_ret = cow; return 0; } @@ -627,7 +627,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, cow->start); btrfs_set_node_ptr_generation(parent, parent_slot, trans->transid); - btrfs_mark_buffer_dirty(parent); + btrfs_mark_buffer_dirty(trans, parent); if (last_ref) { ret = btrfs_tree_mod_log_free_eb(buf); if (ret) { @@ -643,7 +643,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, if (unlock_orig) btrfs_tree_unlock(buf); free_extent_buffer_stale(buf); - btrfs_mark_buffer_dirty(cow); + btrfs_mark_buffer_dirty(trans, cow); *cow_ret = cow; return 0; } @@ -1197,7 +1197,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, goto out; } btrfs_set_node_key(parent, &right_key, pslot + 1); - btrfs_mark_buffer_dirty(parent); + btrfs_mark_buffer_dirty(trans, parent); } } if (btrfs_header_nritems(mid) == 1) { @@ -1255,7 +1255,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, goto out; } btrfs_set_node_key(parent, &mid_key, pslot); - btrfs_mark_buffer_dirty(parent); + btrfs_mark_buffer_dirty(trans, parent); } /* update the path */ @@ -1362,7 +1362,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, return ret; } btrfs_set_node_key(parent, &disk_key, pslot); - btrfs_mark_buffer_dirty(parent); + btrfs_mark_buffer_dirty(trans, parent); if (btrfs_header_nritems(left) > orig_slot) { path->nodes[level] = left; path->slots[level + 1] -= 1; @@ -1422,7 +1422,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, return ret; } btrfs_set_node_key(parent, &disk_key, pslot + 1); - btrfs_mark_buffer_dirty(parent); + btrfs_mark_buffer_dirty(trans, parent); if (btrfs_header_nritems(mid) <= orig_slot) { path->nodes[level] = right; @@ -2678,7 +2678,8 @@ int btrfs_get_next_valid_item(struct btrfs_root *root, struct btrfs_key *key, * higher levels * */ -static void fixup_low_keys(struct btrfs_path *path, +static void fixup_low_keys(struct btrfs_trans_handle *trans, + struct btrfs_path *path, struct btrfs_disk_key *key, int level) { int i; @@ -2695,7 +2696,7 @@ static void fixup_low_keys(struct btrfs_path *path, BTRFS_MOD_LOG_KEY_REPLACE); BUG_ON(ret < 0); btrfs_set_node_key(t, key, tslot); - btrfs_mark_buffer_dirty(path->nodes[i]); + btrfs_mark_buffer_dirty(trans, path->nodes[i]); if (tslot != 0) break; } @@ -2707,10 +2708,11 @@ static void fixup_low_keys(struct btrfs_path *path, * This function isn't completely safe. It's the caller's responsibility * that the new key won't break the order */ -void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info, +void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, struct btrfs_path *path, const struct btrfs_key *new_key) { + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_disk_key disk_key; struct extent_buffer *eb; int slot; @@ -2748,9 +2750,9 @@ void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info, btrfs_cpu_key_to_disk(&disk_key, new_key); btrfs_set_item_key(eb, &disk_key, slot); - btrfs_mark_buffer_dirty(eb); + btrfs_mark_buffer_dirty(trans, eb); if (slot == 0) - fixup_low_keys(path, &disk_key, 1); + fixup_low_keys(trans, path, &disk_key, 1); } /* @@ -2881,8 +2883,8 @@ static int push_node_left(struct btrfs_trans_handle *trans, } btrfs_set_header_nritems(src, src_nritems - push_items); btrfs_set_header_nritems(dst, dst_nritems + push_items); - btrfs_mark_buffer_dirty(src); - btrfs_mark_buffer_dirty(dst); + btrfs_mark_buffer_dirty(trans, src); + btrfs_mark_buffer_dirty(trans, dst); return ret; } @@ -2957,8 +2959,8 @@ static int balance_node_right(struct btrfs_trans_handle *trans, btrfs_set_header_nritems(src, src_nritems - push_items); btrfs_set_header_nritems(dst, dst_nritems + push_items); - btrfs_mark_buffer_dirty(src); - btrfs_mark_buffer_dirty(dst); + btrfs_mark_buffer_dirty(trans, src); + btrfs_mark_buffer_dirty(trans, dst); return ret; } @@ -3007,7 +3009,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, btrfs_set_node_ptr_generation(c, 0, lower_gen); - btrfs_mark_buffer_dirty(c); + btrfs_mark_buffer_dirty(trans, c); old = root->node; ret = btrfs_tree_mod_log_insert_root(root->node, c, false); @@ -3079,7 +3081,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, WARN_ON(trans->transid == 0); btrfs_set_node_ptr_generation(lower, slot, trans->transid); btrfs_set_header_nritems(lower, nritems + 1); - btrfs_mark_buffer_dirty(lower); + btrfs_mark_buffer_dirty(trans, lower); return 0; } @@ -3158,8 +3160,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans, btrfs_set_header_nritems(split, c_nritems - mid); btrfs_set_header_nritems(c, mid); - btrfs_mark_buffer_dirty(c); - btrfs_mark_buffer_dirty(split); + btrfs_mark_buffer_dirty(trans, c); + btrfs_mark_buffer_dirty(trans, split); ret = insert_ptr(trans, path, &disk_key, split->start, path->slots[level + 1] + 1, level + 1); @@ -3325,15 +3327,15 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, btrfs_set_header_nritems(left, left_nritems); if (left_nritems) - btrfs_mark_buffer_dirty(left); + btrfs_mark_buffer_dirty(trans, left); else btrfs_clear_buffer_dirty(trans, left); - btrfs_mark_buffer_dirty(right); + btrfs_mark_buffer_dirty(trans, right); btrfs_item_key(right, &disk_key, 0); btrfs_set_node_key(upper, &disk_key, slot + 1); - btrfs_mark_buffer_dirty(upper); + btrfs_mark_buffer_dirty(trans, upper); /* then fixup the leaf pointer in the path */ if (path->slots[0] >= left_nritems) { @@ -3545,14 +3547,14 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, btrfs_set_token_item_offset(&token, i, push_space); } - btrfs_mark_buffer_dirty(left); + btrfs_mark_buffer_dirty(trans, left); if (right_nritems) - btrfs_mark_buffer_dirty(right); + btrfs_mark_buffer_dirty(trans, right); else btrfs_clear_buffer_dirty(trans, right); btrfs_item_key(right, &disk_key, 0); - fixup_low_keys(path, &disk_key, 1); + fixup_low_keys(trans, path, &disk_key, 1); /* then fixup the leaf pointer in the path */ if (path->slots[0] < push_items) { @@ -3683,8 +3685,8 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans, if (ret < 0) return ret; - btrfs_mark_buffer_dirty(right); - btrfs_mark_buffer_dirty(l); + btrfs_mark_buffer_dirty(trans, right); + btrfs_mark_buffer_dirty(trans, l); BUG_ON(path->slots[0] != slot); if (mid <= slot) { @@ -3925,7 +3927,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, path->nodes[0] = right; path->slots[0] = 0; if (path->slots[1] == 0) - fixup_low_keys(path, &disk_key, 1); + fixup_low_keys(trans, path, &disk_key, 1); } /* * We create a new leaf 'right' for the required ins_len and @@ -4024,7 +4026,8 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans, return ret; } -static noinline int split_item(struct btrfs_path *path, +static noinline int split_item(struct btrfs_trans_handle *trans, + struct btrfs_path *path, const struct btrfs_key *new_key, unsigned long split_offset) { @@ -4083,7 +4086,7 @@ static noinline int split_item(struct btrfs_path *path, write_extent_buffer(leaf, buf + split_offset, btrfs_item_ptr_offset(leaf, slot), item_size - split_offset); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); BUG_ON(btrfs_leaf_free_space(leaf) < 0); kfree(buf); @@ -4117,7 +4120,7 @@ int btrfs_split_item(struct btrfs_trans_handle *trans, if (ret) return ret; - ret = split_item(path, new_key, split_offset); + ret = split_item(trans, path, new_key, split_offset); return ret; } @@ -4127,7 +4130,8 @@ int btrfs_split_item(struct btrfs_trans_handle *trans, * off the end of the item or if we shift the item to chop bytes off * the front. */ -void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end) +void btrfs_truncate_item(struct btrfs_trans_handle *trans, + struct btrfs_path *path, u32 new_size, int from_end) { int slot; struct extent_buffer *leaf; @@ -4203,11 +4207,11 @@ void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end) btrfs_set_disk_key_offset(&disk_key, offset + size_diff); btrfs_set_item_key(leaf, &disk_key, slot); if (slot == 0) - fixup_low_keys(path, &disk_key, 1); + fixup_low_keys(trans, path, &disk_key, 1); } btrfs_set_item_size(leaf, slot, new_size); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); if (btrfs_leaf_free_space(leaf) < 0) { btrfs_print_leaf(leaf); @@ -4218,7 +4222,8 @@ void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end) /* * make the item pointed to by the path bigger, data_size is the added size. */ -void btrfs_extend_item(struct btrfs_path *path, u32 data_size) +void btrfs_extend_item(struct btrfs_trans_handle *trans, + struct btrfs_path *path, u32 data_size) { int slot; struct extent_buffer *leaf; @@ -4268,7 +4273,7 @@ void btrfs_extend_item(struct btrfs_path *path, u32 data_size) data_end = old_data; old_size = btrfs_item_size(leaf, slot); btrfs_set_item_size(leaf, slot, old_size + data_size); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); if (btrfs_leaf_free_space(leaf) < 0) { btrfs_print_leaf(leaf); @@ -4279,6 +4284,7 @@ void btrfs_extend_item(struct btrfs_path *path, u32 data_size) /* * Make space in the node before inserting one or more items. * + * @trans: transaction handle * @root: root we are inserting items to * @path: points to the leaf/slot where we are going to insert new items * @batch: information about the batch of items to insert @@ -4286,7 +4292,8 @@ void btrfs_extend_item(struct btrfs_path *path, u32 data_size) * Main purpose is to save stack depth by doing the bulk of the work in a * function that doesn't call btrfs_search_slot */ -static void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path, +static void setup_items_for_insert(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, const struct btrfs_item_batch *batch) { struct btrfs_fs_info *fs_info = root->fs_info; @@ -4306,7 +4313,7 @@ static void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *p */ if (path->slots[0] == 0) { btrfs_cpu_key_to_disk(&disk_key, &batch->keys[0]); - fixup_low_keys(path, &disk_key, 1); + fixup_low_keys(trans, path, &disk_key, 1); } btrfs_unlock_up_safe(path, 1); @@ -4365,7 +4372,7 @@ static void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *p } btrfs_set_header_nritems(leaf, nritems + batch->nr); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); if (btrfs_leaf_free_space(leaf) < 0) { btrfs_print_leaf(leaf); @@ -4376,12 +4383,14 @@ static void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *p /* * Insert a new item into a leaf. * + * @trans: Transaction handle. * @root: The root of the btree. * @path: A path pointing to the target leaf and slot. * @key: The key of the new item. * @data_size: The size of the data associated with the new key. */ -void btrfs_setup_item_for_insert(struct btrfs_root *root, +void btrfs_setup_item_for_insert(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, const struct btrfs_key *key, u32 data_size) @@ -4393,7 +4402,7 @@ void btrfs_setup_item_for_insert(struct btrfs_root *root, batch.total_data_size = data_size; batch.nr = 1; - setup_items_for_insert(root, path, &batch); + setup_items_for_insert(trans, root, path, &batch); } /* @@ -4419,7 +4428,7 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, slot = path->slots[0]; BUG_ON(slot < 0); - setup_items_for_insert(root, path, batch); + setup_items_for_insert(trans, root, path, batch); return 0; } @@ -4444,7 +4453,7 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, leaf = path->nodes[0]; ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); write_extent_buffer(leaf, data, ptr, data_size); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); } btrfs_free_path(path); return ret; @@ -4475,7 +4484,7 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans, return ret; path->slots[0]++; - btrfs_setup_item_for_insert(root, path, new_key, item_size); + btrfs_setup_item_for_insert(trans, root, path, new_key, item_size); leaf = path->nodes[0]; memcpy_extent_buffer(leaf, btrfs_item_ptr_offset(leaf, path->slots[0]), @@ -4533,9 +4542,9 @@ int btrfs_del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_disk_key disk_key; btrfs_node_key(parent, &disk_key, 0); - fixup_low_keys(path, &disk_key, level + 1); + fixup_low_keys(trans, path, &disk_key, level + 1); } - btrfs_mark_buffer_dirty(parent); + btrfs_mark_buffer_dirty(trans, parent); return 0; } @@ -4632,7 +4641,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_disk_key disk_key; btrfs_item_key(leaf, &disk_key, 0); - fixup_low_keys(path, &disk_key, 1); + fixup_low_keys(trans, path, &disk_key, 1); } /* @@ -4697,11 +4706,11 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, * dirtied this buffer */ if (path->nodes[0] == leaf) - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); free_extent_buffer(leaf); } } else { - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); } } return ret; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ff40acd63a37..06333a74d6c4 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -518,7 +518,7 @@ int btrfs_previous_item(struct btrfs_root *root, int type); int btrfs_previous_extent_item(struct btrfs_root *root, struct btrfs_path *path, u64 min_objectid); -void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info, +void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, struct btrfs_path *path, const struct btrfs_key *new_key); struct extent_buffer *btrfs_root_node(struct btrfs_root *root); @@ -545,8 +545,10 @@ int btrfs_block_can_be_shared(struct btrfs_trans_handle *trans, struct extent_buffer *buf); int btrfs_del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level, int slot); -void btrfs_extend_item(struct btrfs_path *path, u32 data_size); -void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end); +void btrfs_extend_item(struct btrfs_trans_handle *trans, + struct btrfs_path *path, u32 data_size); +void btrfs_truncate_item(struct btrfs_trans_handle *trans, + struct btrfs_path *path, u32 new_size, int from_end); int btrfs_split_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, @@ -610,7 +612,8 @@ struct btrfs_item_batch { int nr; }; -void btrfs_setup_item_for_insert(struct btrfs_root *root, +void btrfs_setup_item_for_insert(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, const struct btrfs_key *key, u32 data_size); diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 90aaedce1548..16f9e5f474cc 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1030,7 +1030,7 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans, struct btrfs_inode_item); write_extent_buffer(leaf, &node->inode_item, (unsigned long)inode_item, sizeof(struct btrfs_inode_item)); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); if (!test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags)) goto out; diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index fff22ed55c42..fe6ba17a0509 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -442,7 +442,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans) dev_replace->item_needs_writeback = 0; up_write(&dev_replace->rwsem); - btrfs_mark_buffer_dirty(eb); + btrfs_mark_buffer_dirty(trans, eb); out: btrfs_free_path(path); diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 082eb0e19598..9c07d5c3e5ad 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -38,7 +38,7 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle di = btrfs_match_dir_item_name(fs_info, path, name, name_len); if (di) return ERR_PTR(-EEXIST); - btrfs_extend_item(path, data_size); + btrfs_extend_item(trans, path, data_size); } else if (ret < 0) return ERR_PTR(ret); WARN_ON(ret > 0); @@ -93,7 +93,7 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, write_extent_buffer(leaf, name, name_ptr, name_len); write_extent_buffer(leaf, data, data_ptr, data_len); - btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(trans, path->nodes[0]); return ret; } @@ -153,7 +153,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, name_ptr = (unsigned long)(dir_item + 1); write_extent_buffer(leaf, name->name, name_ptr, name->len); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); second_insert: /* FIXME, use some real flag for selecting the extra index */ @@ -439,7 +439,7 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, start = btrfs_item_ptr_offset(leaf, path->slots[0]); memmove_extent_buffer(leaf, ptr, ptr + sub_item_len, item_len - (ptr + sub_item_len - start)); - btrfs_truncate_item(path, item_len - sub_item_len, 1); + btrfs_truncate_item(trans, path, item_len - sub_item_len, 1); } return ret; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 68f60d50e1fd..6b78517b1fd5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -867,7 +867,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, } root->node = leaf; - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); root->commit_root = btrfs_root_node(root); set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); @@ -942,7 +942,7 @@ int btrfs_alloc_log_tree_node(struct btrfs_trans_handle *trans, root->node = leaf; - btrfs_mark_buffer_dirty(root->node); + btrfs_mark_buffer_dirty(trans, root->node); btrfs_tree_unlock(root->node); return 0; @@ -4423,7 +4423,8 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info) btrfs_close_devices(fs_info->fs_devices); } -void btrfs_mark_buffer_dirty(struct extent_buffer *buf) +void btrfs_mark_buffer_dirty(struct btrfs_trans_handle *trans, + struct extent_buffer *buf) { struct btrfs_fs_info *fs_info = buf->fs_info; u64 transid = btrfs_header_generation(buf); @@ -4437,10 +4438,14 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) if (unlikely(test_bit(EXTENT_BUFFER_UNMAPPED, &buf->bflags))) return; #endif + /* This is an active transaction (its state < TRANS_STATE_UNBLOCKED). */ + ASSERT(trans->transid == fs_info->generation); btrfs_assert_tree_write_locked(buf); - if (transid != fs_info->generation) + if (transid != fs_info->generation) { WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, found %llu running %llu\n", buf->start, transid, fs_info->generation); + btrfs_abort_transaction(trans, -EUCLEAN); + } set_extent_buffer_dirty(buf); #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY /* diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 02b645744a82..50dab8f639dc 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -104,7 +104,8 @@ static inline struct btrfs_root *btrfs_grab_root(struct btrfs_root *root) } void btrfs_put_root(struct btrfs_root *root); -void btrfs_mark_buffer_dirty(struct extent_buffer *buf); +void btrfs_mark_buffer_dirty(struct btrfs_trans_handle *trans, + struct extent_buffer *buf); int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, int atomic); int btrfs_read_extent_buffer(struct extent_buffer *buf, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fc313fce5bbd..91fe57e87583 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -575,7 +575,7 @@ static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans, btrfs_set_extent_data_ref_count(leaf, ref, num_refs); } } - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); ret = 0; fail: btrfs_release_path(path); @@ -623,7 +623,7 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans, btrfs_set_extent_data_ref_count(leaf, ref1, num_refs); else if (key.type == BTRFS_SHARED_DATA_REF_KEY) btrfs_set_shared_data_ref_count(leaf, ref2, num_refs); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); } return ret; } @@ -976,7 +976,7 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, * helper to add new inline back ref */ static noinline_for_stack -void setup_inline_extent_backref(struct btrfs_fs_info *fs_info, +void setup_inline_extent_backref(struct btrfs_trans_handle *trans, struct btrfs_path *path, struct btrfs_extent_inline_ref *iref, u64 parent, u64 root_objectid, @@ -999,7 +999,7 @@ void setup_inline_extent_backref(struct btrfs_fs_info *fs_info, type = extent_ref_type(parent, owner); size = btrfs_extent_inline_ref_size(type); - btrfs_extend_item(path, size); + btrfs_extend_item(trans, path, size); ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); refs = btrfs_extent_refs(leaf, ei); @@ -1033,7 +1033,7 @@ void setup_inline_extent_backref(struct btrfs_fs_info *fs_info, } else { btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid); } - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); } static int lookup_extent_backref(struct btrfs_trans_handle *trans, @@ -1066,7 +1066,9 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans, /* * helper to update/remove inline back ref */ -static noinline_for_stack int update_inline_extent_backref(struct btrfs_path *path, +static noinline_for_stack int update_inline_extent_backref( + struct btrfs_trans_handle *trans, + struct btrfs_path *path, struct btrfs_extent_inline_ref *iref, int refs_to_mod, struct btrfs_delayed_extent_op *extent_op) @@ -1174,9 +1176,9 @@ static noinline_for_stack int update_inline_extent_backref(struct btrfs_path *pa memmove_extent_buffer(leaf, ptr, ptr + size, end - ptr - size); item_size -= size; - btrfs_truncate_item(path, item_size, 1); + btrfs_truncate_item(trans, path, item_size, 1); } - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); return 0; } @@ -1206,9 +1208,10 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans, bytenr, num_bytes, root_objectid, path->slots[0]); return -EUCLEAN; } - ret = update_inline_extent_backref(path, iref, refs_to_add, extent_op); + ret = update_inline_extent_backref(trans, path, iref, + refs_to_add, extent_op); } else if (ret == -ENOENT) { - setup_inline_extent_backref(trans->fs_info, path, iref, parent, + setup_inline_extent_backref(trans, path, iref, parent, root_objectid, owner, offset, refs_to_add, extent_op); ret = 0; @@ -1226,7 +1229,8 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans, BUG_ON(!is_data && refs_to_drop != 1); if (iref) - ret = update_inline_extent_backref(path, iref, -refs_to_drop, NULL); + ret = update_inline_extent_backref(trans, path, iref, + -refs_to_drop, NULL); else if (is_data) ret = remove_extent_data_ref(trans, root, path, refs_to_drop); else @@ -1510,7 +1514,7 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, if (extent_op) __run_delayed_extent_op(extent_op, leaf, item); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); btrfs_release_path(path); /* now insert the actual backref */ @@ -1678,7 +1682,7 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans, ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); __run_delayed_extent_op(extent_op, leaf, ei); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); out: btrfs_free_path(path); return err; @@ -3151,7 +3155,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, } } else { btrfs_set_extent_refs(leaf, ei, refs); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); } if (found_extent) { ret = remove_extent_backref(trans, extent_root, path, @@ -4659,7 +4663,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, btrfs_set_extent_data_ref_count(leaf, ref, ref_mod); } - btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(trans, path->nodes[0]); btrfs_free_path(path); return alloc_reserved_extent(trans, ins->objectid, ins->offset); @@ -4734,7 +4738,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, btrfs_set_extent_inline_ref_offset(leaf, iref, ref->root); } - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); btrfs_free_path(path); return alloc_reserved_extent(trans, node->bytenr, fs_info->nodesize); diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 1ce5dd154499..45cae356e89b 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -194,7 +194,7 @@ int btrfs_insert_hole_extent(struct btrfs_trans_handle *trans, btrfs_set_file_extent_encryption(leaf, item, 0); btrfs_set_file_extent_other_encoding(leaf, item, 0); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); out: btrfs_free_path(path); return ret; @@ -811,11 +811,12 @@ blk_status_t btrfs_alloc_dummy_sum(struct btrfs_bio *bbio) * This calls btrfs_truncate_item with the correct args based on the overlap, * and fixes up the key as required. */ -static noinline void truncate_one_csum(struct btrfs_fs_info *fs_info, +static noinline void truncate_one_csum(struct btrfs_trans_handle *trans, struct btrfs_path *path, struct btrfs_key *key, u64 bytenr, u64 len) { + struct btrfs_fs_info *fs_info = trans->fs_info; struct extent_buffer *leaf; const u32 csum_size = fs_info->csum_size; u64 csum_end; @@ -836,7 +837,7 @@ static noinline void truncate_one_csum(struct btrfs_fs_info *fs_info, */ u32 new_size = (bytenr - key->offset) >> blocksize_bits; new_size *= csum_size; - btrfs_truncate_item(path, new_size, 1); + btrfs_truncate_item(trans, path, new_size, 1); } else if (key->offset >= bytenr && csum_end > end_byte && end_byte > key->offset) { /* @@ -848,10 +849,10 @@ static noinline void truncate_one_csum(struct btrfs_fs_info *fs_info, u32 new_size = (csum_end - end_byte) >> blocksize_bits; new_size *= csum_size; - btrfs_truncate_item(path, new_size, 0); + btrfs_truncate_item(trans, path, new_size, 0); key->offset = end_byte; - btrfs_set_item_key_safe(fs_info, path, key); + btrfs_set_item_key_safe(trans, path, key); } else { BUG(); } @@ -994,7 +995,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, key.offset = end_byte - 1; } else { - truncate_one_csum(fs_info, path, &key, bytenr, len); + truncate_one_csum(trans, path, &key, bytenr, len); if (key.offset < bytenr) break; } @@ -1202,7 +1203,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, diff /= csum_size; diff *= csum_size; - btrfs_extend_item(path, diff); + btrfs_extend_item(trans, path, diff); ret = 0; goto csum; } @@ -1249,7 +1250,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, ins_size /= csum_size; total_bytes += ins_size * fs_info->sectorsize; - btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(trans, path->nodes[0]); if (total_bytes < sums->len) { btrfs_release_path(path); cond_resched(); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 361535c71c0f..23a145ca9457 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -368,7 +368,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, btrfs_set_file_extent_offset(leaf, fi, extent_offset); btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - args->start); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); if (update_refs && disk_bytenr > 0) { btrfs_init_generic_ref(&ref, @@ -405,13 +405,13 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, memcpy(&new_key, &key, sizeof(new_key)); new_key.offset = args->end; - btrfs_set_item_key_safe(fs_info, path, &new_key); + btrfs_set_item_key_safe(trans, path, &new_key); extent_offset += args->end - key.offset; btrfs_set_file_extent_offset(leaf, fi, extent_offset); btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - args->end); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); if (update_refs && disk_bytenr > 0) args->bytes_found += args->end - key.offset; break; @@ -431,7 +431,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, btrfs_set_file_extent_num_bytes(leaf, fi, args->start - key.offset); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); if (update_refs && disk_bytenr > 0) args->bytes_found += extent_end - args->start; if (args->end == extent_end) @@ -536,7 +536,8 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, if (btrfs_comp_cpu_keys(&key, &slot_key) > 0) path->slots[0]++; } - btrfs_setup_item_for_insert(root, path, &key, args->extent_item_size); + btrfs_setup_item_for_insert(trans, root, path, &key, + args->extent_item_size); args->extent_inserted = true; } @@ -593,7 +594,6 @@ static int extent_mergeable(struct extent_buffer *leaf, int slot, int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, u64 start, u64 end) { - struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *root = inode->root; struct extent_buffer *leaf; struct btrfs_path *path; @@ -664,7 +664,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, ino, bytenr, orig_offset, &other_start, &other_end)) { new_key.offset = end; - btrfs_set_item_key_safe(fs_info, path, &new_key); + btrfs_set_item_key_safe(trans, path, &new_key); fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); btrfs_set_file_extent_generation(leaf, fi, @@ -679,7 +679,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, trans->transid); btrfs_set_file_extent_num_bytes(leaf, fi, end - other_start); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); goto out; } } @@ -698,7 +698,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, trans->transid); path->slots[0]++; new_key.offset = start; - btrfs_set_item_key_safe(fs_info, path, &new_key); + btrfs_set_item_key_safe(trans, path, &new_key); fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); @@ -708,7 +708,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, other_end - start); btrfs_set_file_extent_offset(leaf, fi, start - orig_offset); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); goto out; } } @@ -742,7 +742,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, btrfs_set_file_extent_offset(leaf, fi, split - orig_offset); btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - split); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, bytenr, num_bytes, 0); @@ -814,7 +814,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG); btrfs_set_file_extent_generation(leaf, fi, trans->transid); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); } else { fi = btrfs_item_ptr(leaf, del_slot - 1, struct btrfs_file_extent_item); @@ -823,7 +823,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, btrfs_set_file_extent_generation(leaf, fi, trans->transid); btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - key.offset); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); ret = btrfs_del_items(trans, root, path, del_slot, del_nr); if (ret < 0) { @@ -2104,7 +2104,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); btrfs_set_file_extent_offset(leaf, fi, 0); btrfs_set_file_extent_generation(leaf, fi, trans->transid); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); goto out; } @@ -2112,7 +2112,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, u64 num_bytes; key.offset = offset; - btrfs_set_item_key_safe(fs_info, path, &key); + btrfs_set_item_key_safe(trans, path, &key); fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end - @@ -2121,7 +2121,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); btrfs_set_file_extent_offset(leaf, fi, 0); btrfs_set_file_extent_generation(leaf, fi, trans->transid); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); goto out; } btrfs_release_path(path); @@ -2273,7 +2273,7 @@ static int btrfs_insert_replace_extent(struct btrfs_trans_handle *trans, btrfs_set_file_extent_num_bytes(leaf, extent, replace_len); if (extent_info->is_new_extent) btrfs_set_file_extent_generation(leaf, extent, trans->transid); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); btrfs_release_path(path); ret = btrfs_inode_set_file_extent_range(inode, extent_info->file_offset, diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 27fad70451aa..8dd8ef760321 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -195,7 +195,7 @@ static int __create_free_space_inode(struct btrfs_root *root, btrfs_set_inode_nlink(leaf, inode_item, 1); btrfs_set_inode_transid(leaf, inode_item, trans->transid); btrfs_set_inode_block_group(leaf, inode_item, offset); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); btrfs_release_path(path); key.objectid = BTRFS_FREE_SPACE_OBJECTID; @@ -213,7 +213,7 @@ static int __create_free_space_inode(struct btrfs_root *root, struct btrfs_free_space_header); memzero_extent_buffer(leaf, (unsigned long)header, sizeof(*header)); btrfs_set_free_space_key(leaf, header, &disk_key); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); btrfs_release_path(path); return 0; @@ -1185,7 +1185,7 @@ update_cache_item(struct btrfs_trans_handle *trans, btrfs_set_free_space_entries(leaf, header, entries); btrfs_set_free_space_bitmaps(leaf, header, bitmaps); btrfs_set_free_space_generation(leaf, header, trans->transid); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); btrfs_release_path(path); return 0; diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index c0e734082dcc..7b598b070700 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -89,7 +89,7 @@ static int add_new_free_space_info(struct btrfs_trans_handle *trans, struct btrfs_free_space_info); btrfs_set_free_space_extent_count(leaf, info, 0); btrfs_set_free_space_flags(leaf, info, 0); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); ret = 0; out: @@ -287,7 +287,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans, flags |= BTRFS_FREE_SPACE_USING_BITMAPS; btrfs_set_free_space_flags(leaf, info, flags); expected_extent_count = btrfs_free_space_extent_count(leaf, info); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); btrfs_release_path(path); if (extent_count != expected_extent_count) { @@ -324,7 +324,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans, ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); write_extent_buffer(leaf, bitmap_cursor, ptr, data_size); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); btrfs_release_path(path); i += extent_size; @@ -430,7 +430,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans, flags &= ~BTRFS_FREE_SPACE_USING_BITMAPS; btrfs_set_free_space_flags(leaf, info, flags); expected_extent_count = btrfs_free_space_extent_count(leaf, info); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); btrfs_release_path(path); nrbits = block_group->length >> block_group->fs_info->sectorsize_bits; @@ -495,7 +495,7 @@ static int update_free_space_extent_count(struct btrfs_trans_handle *trans, extent_count += new_extents; btrfs_set_free_space_extent_count(path->nodes[0], info, extent_count); - btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(trans, path->nodes[0]); btrfs_release_path(path); if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) && @@ -533,7 +533,8 @@ int free_space_test_bit(struct btrfs_block_group *block_group, return !!extent_buffer_test_bit(leaf, ptr, i); } -static void free_space_set_bits(struct btrfs_block_group *block_group, +static void free_space_set_bits(struct btrfs_trans_handle *trans, + struct btrfs_block_group *block_group, struct btrfs_path *path, u64 *start, u64 *size, int bit) { @@ -563,7 +564,7 @@ static void free_space_set_bits(struct btrfs_block_group *block_group, extent_buffer_bitmap_set(leaf, ptr, first, last - first); else extent_buffer_bitmap_clear(leaf, ptr, first, last - first); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); *size -= end - *start; *start = end; @@ -656,7 +657,7 @@ static int modify_free_space_bitmap(struct btrfs_trans_handle *trans, cur_start = start; cur_size = size; while (1) { - free_space_set_bits(block_group, path, &cur_start, &cur_size, + free_space_set_bits(trans, block_group, path, &cur_start, &cur_size, !remove); if (cur_size == 0) break; diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 4c322b720a80..d3ff97374d48 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -167,7 +167,7 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans, memmove_extent_buffer(leaf, ptr, ptr + del_len, item_size - (ptr + del_len - item_start)); - btrfs_truncate_item(path, item_size - del_len, 1); + btrfs_truncate_item(trans, path, item_size - del_len, 1); out: btrfs_free_path(path); @@ -229,7 +229,7 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, item_start = btrfs_item_ptr_offset(leaf, path->slots[0]); memmove_extent_buffer(leaf, ptr, ptr + sub_item_len, item_size - (ptr + sub_item_len - item_start)); - btrfs_truncate_item(path, item_size - sub_item_len, 1); + btrfs_truncate_item(trans, path, item_size - sub_item_len, 1); out: btrfs_free_path(path); @@ -282,7 +282,7 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans, name)) goto out; - btrfs_extend_item(path, ins_len); + btrfs_extend_item(trans, path, ins_len); ret = 0; } if (ret < 0) @@ -299,7 +299,7 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans, ptr = (unsigned long)&extref->name; write_extent_buffer(path->nodes[0], name->name, ptr, name->len); - btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(trans, path->nodes[0]); out: btrfs_free_path(path); @@ -338,7 +338,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, goto out; old_size = btrfs_item_size(path->nodes[0], path->slots[0]); - btrfs_extend_item(path, ins_len); + btrfs_extend_item(trans, path, ins_len); ref = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_inode_ref); ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size); @@ -364,7 +364,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, ptr = (unsigned long)(ref + 1); } write_extent_buffer(path->nodes[0], name->name, ptr, name->len); - btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(trans, path->nodes[0]); out: btrfs_free_path(path); @@ -591,7 +591,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, num_dec = (orig_num_bytes - extent_num_bytes); if (extent_start != 0) control->sub_bytes += num_dec; - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); } else { extent_num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); @@ -617,7 +617,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, btrfs_set_file_extent_ram_bytes(leaf, fi, size); size = btrfs_file_extent_calc_inline_size(size); - btrfs_truncate_item(path, size, 1); + btrfs_truncate_item(trans, path, size, 1); } else if (!del_item) { /* * We have to bail so the last_size is set to diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7814b9d654ce..2c61f9da4ab4 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -573,7 +573,7 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, kunmap_local(kaddr); put_page(page); } - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); btrfs_release_path(path); /* @@ -2912,7 +2912,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, btrfs_item_ptr_offset(leaf, path->slots[0]), sizeof(struct btrfs_file_extent_item)); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); btrfs_release_path(path); /* @@ -3981,7 +3981,7 @@ static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans, struct btrfs_inode_item); fill_inode_item(trans, leaf, inode_item, &inode->vfs_inode); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); btrfs_set_inode_last_trans(trans, inode); ret = 0; failed: @@ -6310,7 +6310,7 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans, } } - btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(trans, path->nodes[0]); /* * We don't need the path anymore, plus inheriting properties, adding * ACLs, security xattrs, orphan item or adding the link, will result in @@ -9446,7 +9446,7 @@ static int btrfs_symlink(struct mnt_idmap *idmap, struct inode *dir, ptr = btrfs_file_extent_inline_start(ei); write_extent_buffer(leaf, symname, ptr, name_len); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); btrfs_free_path(path); d_instantiate_new(dentry, inode); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 200dd780bc06..4cb4065453dd 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -663,7 +663,7 @@ static noinline int create_subvol(struct mnt_idmap *idmap, goto out; } - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); inode_item = &root_item->inode; btrfs_set_stack_inode_generation(inode_item, 1); @@ -2947,7 +2947,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) btrfs_cpu_key_to_disk(&disk_key, &new_root->root_key); btrfs_set_dir_item_key(path->nodes[0], di, &disk_key); - btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(trans, path->nodes[0]); btrfs_release_path(path); btrfs_set_fs_incompat(fs_info, DEFAULT_SUBVOL); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index b99230db3c82..bdaebb9fc689 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -622,7 +622,7 @@ static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, u64 src, ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0); - btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(trans, path->nodes[0]); btrfs_free_path(path); return ret; @@ -700,7 +700,7 @@ static int add_qgroup_item(struct btrfs_trans_handle *trans, btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0); btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); btrfs_release_path(path); @@ -719,7 +719,7 @@ static int add_qgroup_item(struct btrfs_trans_handle *trans, btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0); btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); ret = 0; out: @@ -808,7 +808,7 @@ static int update_qgroup_limit_item(struct btrfs_trans_handle *trans, btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, qgroup->rsv_rfer); btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, qgroup->rsv_excl); - btrfs_mark_buffer_dirty(l); + btrfs_mark_buffer_dirty(trans, l); out: btrfs_free_path(path); @@ -854,7 +854,7 @@ static int update_qgroup_info_item(struct btrfs_trans_handle *trans, btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl); btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr); - btrfs_mark_buffer_dirty(l); + btrfs_mark_buffer_dirty(trans, l); out: btrfs_free_path(path); @@ -896,7 +896,7 @@ static int update_qgroup_status_item(struct btrfs_trans_handle *trans) btrfs_set_qgroup_status_rescan(l, ptr, fs_info->qgroup_rescan_progress.objectid); - btrfs_mark_buffer_dirty(l); + btrfs_mark_buffer_dirty(trans, l); out: btrfs_free_path(path); @@ -1069,7 +1069,7 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info) BTRFS_QGROUP_STATUS_FLAGS_MASK); btrfs_set_qgroup_status_rescan(leaf, ptr, 0); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); key.objectid = 0; key.type = BTRFS_ROOT_REF_KEY; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index c6d4bb8cbe29..4eaac3ae5c36 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1181,7 +1181,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans, } } if (dirty) - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); if (inode) btrfs_add_delayed_iput(BTRFS_I(inode)); return ret; @@ -1374,13 +1374,13 @@ int replace_path(struct btrfs_trans_handle *trans, struct reloc_control *rc, */ btrfs_set_node_blockptr(parent, slot, new_bytenr); btrfs_set_node_ptr_generation(parent, slot, new_ptr_gen); - btrfs_mark_buffer_dirty(parent); + btrfs_mark_buffer_dirty(trans, parent); btrfs_set_node_blockptr(path->nodes[level], path->slots[level], old_bytenr); btrfs_set_node_ptr_generation(path->nodes[level], path->slots[level], old_ptr_gen); - btrfs_mark_buffer_dirty(path->nodes[level]); + btrfs_mark_buffer_dirty(trans, path->nodes[level]); btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, old_bytenr, blocksize, path->nodes[level]->start); @@ -2517,7 +2517,7 @@ static int do_relocation(struct btrfs_trans_handle *trans, node->eb->start); btrfs_set_node_ptr_generation(upper->eb, slot, trans->transid); - btrfs_mark_buffer_dirty(upper->eb); + btrfs_mark_buffer_dirty(trans, upper->eb); btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, node->eb->start, blocksize, @@ -3835,7 +3835,7 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans, btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); out: btrfs_free_path(path); return ret; diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 859874579456..5b0f1bccc409 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -191,7 +191,7 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_root_generation_v2(item, btrfs_root_generation(item)); write_extent_buffer(l, item, ptr, sizeof(*item)); - btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(trans, path->nodes[0]); out: btrfs_free_path(path); return ret; @@ -438,7 +438,7 @@ int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id, btrfs_set_root_ref_name_len(leaf, ref, name->len); ptr = (unsigned long)(ref + 1); write_extent_buffer(leaf, name->name, ptr, name->len); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); if (key.type == BTRFS_ROOT_BACKREF_KEY) { btrfs_release_path(path); diff --git a/fs/btrfs/tests/extent-buffer-tests.c b/fs/btrfs/tests/extent-buffer-tests.c index 5ef0b90e25c3..6a43a64ba55a 100644 --- a/fs/btrfs/tests/extent-buffer-tests.c +++ b/fs/btrfs/tests/extent-buffer-tests.c @@ -61,7 +61,11 @@ static int test_btrfs_split_item(u32 sectorsize, u32 nodesize) key.type = BTRFS_EXTENT_CSUM_KEY; key.offset = 0; - btrfs_setup_item_for_insert(root, path, &key, value_len); + /* + * Passing a NULL trans handle is fine here, we have a dummy root eb + * and the tree is a single node (level 0). + */ + btrfs_setup_item_for_insert(NULL, root, path, &key, value_len); write_extent_buffer(eb, value, btrfs_item_ptr_offset(eb, 0), value_len); diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c index 05b03f5eab83..492d69d2fa73 100644 --- a/fs/btrfs/tests/inode-tests.c +++ b/fs/btrfs/tests/inode-tests.c @@ -34,7 +34,11 @@ static void insert_extent(struct btrfs_root *root, u64 start, u64 len, key.type = BTRFS_EXTENT_DATA_KEY; key.offset = start; - btrfs_setup_item_for_insert(root, &path, &key, value_len); + /* + * Passing a NULL trans handle is fine here, we have a dummy root eb + * and the tree is a single node (level 0). + */ + btrfs_setup_item_for_insert(NULL, root, &path, &key, value_len); fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); btrfs_set_file_extent_generation(leaf, fi, 1); btrfs_set_file_extent_type(leaf, fi, type); @@ -64,7 +68,11 @@ static void insert_inode_item_key(struct btrfs_root *root) key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; - btrfs_setup_item_for_insert(root, &path, &key, value_len); + /* + * Passing a NULL trans handle is fine here, we have a dummy root eb + * and the tree is a single node (level 0). + */ + btrfs_setup_item_for_insert(NULL, root, &path, &key, value_len); } /* diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index cbb17b542131..9fb64af608d1 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -504,9 +504,9 @@ static int overwrite_item(struct btrfs_trans_handle *trans, found_size = btrfs_item_size(path->nodes[0], path->slots[0]); if (found_size > item_size) - btrfs_truncate_item(path, item_size, 1); + btrfs_truncate_item(trans, path, item_size, 1); else if (found_size < item_size) - btrfs_extend_item(path, item_size - found_size); + btrfs_extend_item(trans, path, item_size - found_size); } else if (ret) { return ret; } @@ -574,7 +574,7 @@ static int overwrite_item(struct btrfs_trans_handle *trans, } } no_copy: - btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(trans, path->nodes[0]); btrfs_release_path(path); return 0; } @@ -3530,7 +3530,7 @@ static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans, last_offset = max(last_offset, curr_end); } btrfs_set_dir_log_end(path->nodes[0], item, last_offset); - btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(trans, path->nodes[0]); btrfs_release_path(path); return 0; } @@ -4488,7 +4488,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, dst_index++; } - btrfs_mark_buffer_dirty(dst_path->nodes[0]); + btrfs_mark_buffer_dirty(trans, dst_path->nodes[0]); btrfs_release_path(dst_path); out: kfree(ins_data); @@ -4693,7 +4693,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans, write_extent_buffer(leaf, &fi, btrfs_item_ptr_offset(leaf, path->slots[0]), sizeof(fi)); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); btrfs_release_path(path); diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c index 7c7001f42b14..5be74f9e47eb 100644 --- a/fs/btrfs/uuid-tree.c +++ b/fs/btrfs/uuid-tree.c @@ -124,7 +124,7 @@ int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, u8 *uuid, u8 type, * An item with that type already exists. * Extend the item and store the new subid at the end. */ - btrfs_extend_item(path, sizeof(subid_le)); + btrfs_extend_item(trans, path, sizeof(subid_le)); eb = path->nodes[0]; slot = path->slots[0]; offset = btrfs_item_ptr_offset(eb, slot); @@ -139,7 +139,7 @@ int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, u8 *uuid, u8 type, ret = 0; subid_le = cpu_to_le64(subid_cpu); write_extent_buffer(eb, &subid_le, offset, sizeof(subid_le)); - btrfs_mark_buffer_dirty(eb); + btrfs_mark_buffer_dirty(trans, eb); out: btrfs_free_path(path); @@ -221,7 +221,7 @@ int btrfs_uuid_tree_remove(struct btrfs_trans_handle *trans, u8 *uuid, u8 type, move_src = offset + sizeof(subid); move_len = item_size - (move_src - btrfs_item_ptr_offset(eb, slot)); memmove_extent_buffer(eb, move_dst, move_src, move_len); - btrfs_truncate_item(path, item_size - sizeof(subid), 1); + btrfs_truncate_item(trans, path, item_size - sizeof(subid), 1); out: btrfs_free_path(path); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index b9ef6f54635c..c26d3499a289 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1894,7 +1894,7 @@ static int btrfs_add_dev_item(struct btrfs_trans_handle *trans, ptr = btrfs_device_fsid(dev_item); write_extent_buffer(leaf, trans->fs_info->fs_devices->metadata_uuid, ptr, BTRFS_FSID_SIZE); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); ret = 0; out: @@ -2597,7 +2597,7 @@ static int btrfs_finish_sprout(struct btrfs_trans_handle *trans) if (device->fs_devices->seeding) { btrfs_set_device_generation(leaf, dev_item, device->generation); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); } path->slots[0]++; @@ -2895,7 +2895,7 @@ static noinline int btrfs_update_device(struct btrfs_trans_handle *trans, btrfs_device_get_disk_total_bytes(device)); btrfs_set_device_bytes_used(leaf, dev_item, btrfs_device_get_bytes_used(device)); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); out: btrfs_free_path(path); @@ -3483,7 +3483,7 @@ static int insert_balance_item(struct btrfs_fs_info *fs_info, btrfs_set_balance_flags(leaf, item, bctl->flags); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); out: btrfs_free_path(path); err = btrfs_commit_transaction(trans); @@ -7534,7 +7534,7 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans, for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) btrfs_set_dev_stats_value(eb, ptr, i, btrfs_dev_stat_read(device, i)); - btrfs_mark_buffer_dirty(eb); + btrfs_mark_buffer_dirty(trans, eb); out: btrfs_free_path(path); diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 96828a13dd43..b906f809650e 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -188,15 +188,15 @@ int btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode, if (old_data_len + name_len + sizeof(*di) == item_size) { /* No other xattrs packed in the same leaf item. */ if (size > old_data_len) - btrfs_extend_item(path, size - old_data_len); + btrfs_extend_item(trans, path, size - old_data_len); else if (size < old_data_len) - btrfs_truncate_item(path, data_size, 1); + btrfs_truncate_item(trans, path, data_size, 1); } else { /* There are other xattrs packed in the same item. */ ret = btrfs_delete_one_dir_name(trans, root, path, di); if (ret) goto out; - btrfs_extend_item(path, data_size); + btrfs_extend_item(trans, path, data_size); } ptr = btrfs_item_ptr(leaf, slot, char); @@ -205,7 +205,7 @@ int btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode, btrfs_set_dir_data_len(leaf, di, size); data_ptr = ((unsigned long)(di + 1)) + name_len; write_extent_buffer(leaf, value, data_ptr, size); - btrfs_mark_buffer_dirty(leaf); + btrfs_mark_buffer_dirty(trans, leaf); } else { /* * Insert, and we had space for the xattr, so path->slots[0] is From aa7f1827953100cdde0795289a80c6c077bfe437 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 12 Feb 2021 20:11:25 -0500 Subject: [PATCH 0003/1397] lib/generic-radix-tree.c: Don't overflow in peek() [ Upstream commit 9492261ff2460252cf2d8de89cdf854c7e2b28a0 ] When we started spreading new inode numbers throughout most of the 64 bit inode space, that triggered some corner case bugs, in particular some integer overflows related to the radix tree code. Oops. Signed-off-by: Kent Overstreet Signed-off-by: Sasha Levin --- include/linux/generic-radix-tree.h | 7 +++++++ lib/generic-radix-tree.c | 17 ++++++++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/include/linux/generic-radix-tree.h b/include/linux/generic-radix-tree.h index 107613f7d792..f6cd0f909d9f 100644 --- a/include/linux/generic-radix-tree.h +++ b/include/linux/generic-radix-tree.h @@ -38,6 +38,7 @@ #include #include +#include #include #include #include @@ -184,6 +185,12 @@ void *__genradix_iter_peek(struct genradix_iter *, struct __genradix *, size_t); static inline void __genradix_iter_advance(struct genradix_iter *iter, size_t obj_size) { + if (iter->offset + obj_size < iter->offset) { + iter->offset = SIZE_MAX; + iter->pos = SIZE_MAX; + return; + } + iter->offset += obj_size; if (!is_power_of_2(obj_size) && diff --git a/lib/generic-radix-tree.c b/lib/generic-radix-tree.c index f25eb111c051..7dfa88282b00 100644 --- a/lib/generic-radix-tree.c +++ b/lib/generic-radix-tree.c @@ -166,6 +166,10 @@ void *__genradix_iter_peek(struct genradix_iter *iter, struct genradix_root *r; struct genradix_node *n; unsigned level, i; + + if (iter->offset == SIZE_MAX) + return NULL; + restart: r = READ_ONCE(radix->root); if (!r) @@ -184,10 +188,17 @@ void *__genradix_iter_peek(struct genradix_iter *iter, (GENRADIX_ARY - 1); while (!n->children[i]) { + size_t objs_per_ptr = genradix_depth_size(level); + + if (iter->offset + objs_per_ptr < iter->offset) { + iter->offset = SIZE_MAX; + iter->pos = SIZE_MAX; + return NULL; + } + i++; - iter->offset = round_down(iter->offset + - genradix_depth_size(level), - genradix_depth_size(level)); + iter->offset = round_down(iter->offset + objs_per_ptr, + objs_per_ptr); iter->pos = (iter->offset >> PAGE_SHIFT) * objs_per_page; if (i == GENRADIX_ARY) From e2d8abf5afc6957ee89704f5f33b9faa844fcf94 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 17 Oct 2023 09:59:46 -0700 Subject: [PATCH 0004/1397] x86/retpoline: Make sure there are no unconverted return thunks due to KCSAN [ Upstream commit 2d7ce49f58dc95495b3e22e45d2be7de909b2c63 ] Enabling CONFIG_KCSAN leads to unconverted, default return thunks to remain after patching. As David Kaplan describes in his debugging of the issue, it is caused by a couple of KCSAN-generated constructors which aren't processed by objtool: "When KCSAN is enabled, GCC generates lots of constructor functions named _sub_I_00099_0 which call __tsan_init and then return. The returns in these are generally annotated normally by objtool and fixed up at runtime. But objtool runs on vmlinux.o and vmlinux.o does not include a couple of object files that are in vmlinux, like init/version-timestamp.o and .vmlinux.export.o, both of which contain _sub_I_00099_0 functions. As a result, the returns in these functions are not annotated, and the panic occurs when we call one of them in do_ctors and it uses the default return thunk. This difference can be seen by counting the number of these functions in the object files: $ objdump -d vmlinux.o|grep -c "<_sub_I_00099_0>:" 2601 $ objdump -d vmlinux|grep -c "<_sub_I_00099_0>:" 2603 If these functions are only run during kernel boot, there is no speculation concern." Fix it by disabling KCSAN on version-timestamp.o and .vmlinux.export.o so the extra functions don't get generated. KASAN and GCOV are already disabled for those files. [ bp: Massage commit message. ] Closes: https://lore.kernel.org/lkml/20231016214810.GA3942238@dev-arch.thelio-3990X/ Reported-by: Nathan Chancellor Signed-off-by: Josh Poimboeuf Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Nick Desaulniers Acked-by: Marco Elver Tested-by: Nathan Chancellor Link: https://lore.kernel.org/r/20231017165946.v4i2d4exyqwqq3bx@treble Signed-off-by: Sasha Levin --- init/Makefile | 1 + scripts/Makefile.vmlinux | 1 + 2 files changed, 2 insertions(+) diff --git a/init/Makefile b/init/Makefile index ec557ada3c12..cbac576c57d6 100644 --- a/init/Makefile +++ b/init/Makefile @@ -60,4 +60,5 @@ include/generated/utsversion.h: FORCE $(obj)/version-timestamp.o: include/generated/utsversion.h CFLAGS_version-timestamp.o := -include include/generated/utsversion.h KASAN_SANITIZE_version-timestamp.o := n +KCSAN_SANITIZE_version-timestamp.o := n GCOV_PROFILE_version-timestamp.o := n diff --git a/scripts/Makefile.vmlinux b/scripts/Makefile.vmlinux index 3cd6ca15f390..c9f3e03124d7 100644 --- a/scripts/Makefile.vmlinux +++ b/scripts/Makefile.vmlinux @@ -19,6 +19,7 @@ quiet_cmd_cc_o_c = CC $@ ifdef CONFIG_MODULES KASAN_SANITIZE_.vmlinux.export.o := n +KCSAN_SANITIZE_.vmlinux.export.o := n GCOV_PROFILE_.vmlinux.export.o := n targets += .vmlinux.export.o vmlinux: .vmlinux.export.o From 2e905e608e38cf7f8dcddcf8a6036e91a78444cb Mon Sep 17 00:00:00 2001 From: Shuai Xue Date: Thu, 7 Sep 2023 08:43:07 +0800 Subject: [PATCH 0005/1397] perf/core: Bail out early if the request AUX area is out of bound [ Upstream commit 54aee5f15b83437f23b2b2469bcf21bdd9823916 ] When perf-record with a large AUX area, e.g 4GB, it fails with: #perf record -C 0 -m ,4G -e arm_spe_0// -- sleep 1 failed to mmap with 12 (Cannot allocate memory) and it reveals a WARNING with __alloc_pages(): ------------[ cut here ]------------ WARNING: CPU: 44 PID: 17573 at mm/page_alloc.c:5568 __alloc_pages+0x1ec/0x248 Call trace: __alloc_pages+0x1ec/0x248 __kmalloc_large_node+0xc0/0x1f8 __kmalloc_node+0x134/0x1e8 rb_alloc_aux+0xe0/0x298 perf_mmap+0x440/0x660 mmap_region+0x308/0x8a8 do_mmap+0x3c0/0x528 vm_mmap_pgoff+0xf4/0x1b8 ksys_mmap_pgoff+0x18c/0x218 __arm64_sys_mmap+0x38/0x58 invoke_syscall+0x50/0x128 el0_svc_common.constprop.0+0x58/0x188 do_el0_svc+0x34/0x50 el0_svc+0x34/0x108 el0t_64_sync_handler+0xb8/0xc0 el0t_64_sync+0x1a4/0x1a8 'rb->aux_pages' allocated by kcalloc() is a pointer array which is used to maintains AUX trace pages. The allocated page for this array is physically contiguous (and virtually contiguous) with an order of 0..MAX_ORDER. If the size of pointer array crosses the limitation set by MAX_ORDER, it reveals a WARNING. So bail out early with -ENOMEM if the request AUX area is out of bound, e.g.: #perf record -C 0 -m ,4G -e arm_spe_0// -- sleep 1 failed to mmap with 12 (Cannot allocate memory) Signed-off-by: Shuai Xue Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- kernel/events/ring_buffer.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index fb1e180b5f0a..e8d82c2f07d0 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -700,6 +700,12 @@ int rb_alloc_aux(struct perf_buffer *rb, struct perf_event *event, watermark = 0; } + /* + * kcalloc_node() is unable to allocate buffer if the size is larger + * than: PAGE_SIZE << MAX_ORDER; directly bail out in this case. + */ + if (get_order((unsigned long)nr_pages * sizeof(void *)) > MAX_ORDER) + return -ENOMEM; rb->aux_pages = kcalloc_node(nr_pages, sizeof(void *), GFP_KERNEL, node); if (!rb->aux_pages) From 74f6aedbe6f80eb13feda356d6bd962c6296d5b7 Mon Sep 17 00:00:00 2001 From: Denis Arefev Date: Mon, 4 Sep 2023 15:21:14 +0300 Subject: [PATCH 0006/1397] srcu: Fix srcu_struct node grpmask overflow on 64-bit systems [ Upstream commit d8d5b7bf6f2105883bbd91bbd4d5b67e4e3dff71 ] The value of a bitwise expression 1 << (cpu - sdp->mynode->grplo) is subject to overflow due to a failure to cast operands to a larger data type before performing the bitwise operation. The maximum result of this subtraction is defined by the RCU_FANOUT_LEAF Kconfig option, which on 64-bit systems defaults to 16 (resulting in a maximum shift of 15), but which can be set up as high as 64 (resulting in a maximum shift of 63). A value of 31 can result in sign extension, resulting in 0xffffffff80000000 instead of the desired 0x80000000. A value of 32 or greater triggers undefined behavior per the C standard. This bug has not been known to cause issues because almost all kernels take the default CONFIG_RCU_FANOUT_LEAF=16. Furthermore, as long as a given compiler gives a deterministic non-zero result for 1<=32, the code correctly invokes all SRCU callbacks, albeit wasting CPU time along the way. This commit therefore substitutes the correct 1UL for the buggy 1. Found by Linux Verification Center (linuxtesting.org) with SVACE. Signed-off-by: Denis Arefev Reviewed-by: Mathieu Desnoyers Reviewed-by: Joel Fernandes (Google) Cc: David Laight Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Signed-off-by: Sasha Levin --- kernel/rcu/srcutree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 253ed509b6ab..3affae97230d 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -223,7 +223,7 @@ static bool init_srcu_struct_nodes(struct srcu_struct *ssp, gfp_t gfp_flags) snp->grplo = cpu; snp->grphi = cpu; } - sdp->grpmask = 1 << (cpu - sdp->mynode->grplo); + sdp->grpmask = 1UL << (cpu - sdp->mynode->grplo); } smp_store_release(&ssp->srcu_sup->srcu_size_state, SRCU_SIZE_WAIT_BARRIER); return true; @@ -833,7 +833,7 @@ static void srcu_schedule_cbs_snp(struct srcu_struct *ssp, struct srcu_node *snp int cpu; for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) { - if (!(mask & (1 << (cpu - snp->grplo)))) + if (!(mask & (1UL << (cpu - snp->grplo)))) continue; srcu_schedule_cbs_sdp(per_cpu_ptr(ssp->sda, cpu), delay); } From 2a79a7e8b6417d58e803b2c0005dac40a45d3637 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo?= Date: Wed, 2 Aug 2023 08:32:52 +0200 Subject: [PATCH 0007/1397] selftests/lkdtm: Disable CONFIG_UBSAN_TRAP in test config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit cf77bf698887c3b9ebed76dea492b07a3c2c7632 ] The lkdtm selftest config fragment enables CONFIG_UBSAN_TRAP to make the ARRAY_BOUNDS test kill the calling process when an out-of-bound access is detected by UBSAN. However, after this [1] commit, UBSAN is triggered under many new scenarios that weren't detected before, such as in struct definitions with fixed-size trailing arrays used as flexible arrays. As a result, CONFIG_UBSAN_TRAP=y has become a very aggressive option to enable except for specific situations. `make kselftest-merge` applies CONFIG_UBSAN_TRAP=y to the kernel config for all selftests, which makes many of them fail because of system hangs during boot. This change removes the config option from the lkdtm kselftest and configures the ARRAY_BOUNDS test to look for UBSAN reports rather than relying on the calling process being killed. [1] commit 2d47c6956ab3 ("ubsan: Tighten UBSAN_BOUNDS on GCC")' Signed-off-by: Ricardo Cañuelo Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20230802063252.1917997-1-ricardo.canuelo@collabora.com Signed-off-by: Kees Cook Signed-off-by: Sasha Levin --- tools/testing/selftests/lkdtm/config | 1 - tools/testing/selftests/lkdtm/tests.txt | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/testing/selftests/lkdtm/config b/tools/testing/selftests/lkdtm/config index 5d52f64dfb43..7afe05e8c4d7 100644 --- a/tools/testing/selftests/lkdtm/config +++ b/tools/testing/selftests/lkdtm/config @@ -9,7 +9,6 @@ CONFIG_INIT_ON_FREE_DEFAULT_ON=y CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y CONFIG_UBSAN=y CONFIG_UBSAN_BOUNDS=y -CONFIG_UBSAN_TRAP=y CONFIG_STACKPROTECTOR_STRONG=y CONFIG_SLUB_DEBUG=y CONFIG_SLUB_DEBUG_ON=y diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt index 607b8d7e3ea3..2f3a1b96da6e 100644 --- a/tools/testing/selftests/lkdtm/tests.txt +++ b/tools/testing/selftests/lkdtm/tests.txt @@ -7,7 +7,7 @@ EXCEPTION #EXHAUST_STACK Corrupts memory on failure #CORRUPT_STACK Crashes entire system on success #CORRUPT_STACK_STRONG Crashes entire system on success -ARRAY_BOUNDS +ARRAY_BOUNDS call trace:|UBSAN: array-index-out-of-bounds CORRUPT_LIST_ADD list_add corruption CORRUPT_LIST_DEL list_del corruption STACK_GUARD_PAGE_LEADING From 07a2284773c9afe0644dc235f8523b88ea2789f3 Mon Sep 17 00:00:00 2001 From: Jacky Bai Date: Mon, 9 Oct 2023 16:39:22 +0800 Subject: [PATCH 0008/1397] clocksource/drivers/timer-imx-gpt: Fix potential memory leak [ Upstream commit 8051a993ce222a5158bccc6ac22ace9253dd71cb ] Fix coverity Issue CID 250382: Resource leak (RESOURCE_LEAK). Add kfree when error return. Signed-off-by: Jacky Bai Reviewed-by: Peng Fan Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20231009083922.1942971-1-ping.bai@nxp.com Signed-off-by: Sasha Levin --- drivers/clocksource/timer-imx-gpt.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/clocksource/timer-imx-gpt.c b/drivers/clocksource/timer-imx-gpt.c index 28ab4f1a7c71..6a878d227a13 100644 --- a/drivers/clocksource/timer-imx-gpt.c +++ b/drivers/clocksource/timer-imx-gpt.c @@ -434,12 +434,16 @@ static int __init mxc_timer_init_dt(struct device_node *np, enum imx_gpt_type t return -ENOMEM; imxtm->base = of_iomap(np, 0); - if (!imxtm->base) - return -ENXIO; + if (!imxtm->base) { + ret = -ENXIO; + goto err_kfree; + } imxtm->irq = irq_of_parse_and_map(np, 0); - if (imxtm->irq <= 0) - return -EINVAL; + if (imxtm->irq <= 0) { + ret = -EINVAL; + goto err_kfree; + } imxtm->clk_ipg = of_clk_get_by_name(np, "ipg"); @@ -452,11 +456,15 @@ static int __init mxc_timer_init_dt(struct device_node *np, enum imx_gpt_type t ret = _mxc_timer_init(imxtm); if (ret) - return ret; + goto err_kfree; initialized = 1; return 0; + +err_kfree: + kfree(imxtm); + return ret; } static int __init imx1_timer_init_dt(struct device_node *np) From b3b5c2730458c6d9b4a0ce634659284ec94f0441 Mon Sep 17 00:00:00 2001 From: Ronald Wahl Date: Sat, 7 Oct 2023 18:17:13 +0200 Subject: [PATCH 0009/1397] clocksource/drivers/timer-atmel-tcb: Fix initialization on SAM9 hardware [ Upstream commit 6d3bc4c02d59996d1d3180d8ed409a9d7d5900e0 ] On SAM9 hardware two cascaded 16 bit timers are used to form a 32 bit high resolution timer that is used as scheduler clock when the kernel has been configured that way (CONFIG_ATMEL_CLOCKSOURCE_TCB). The driver initially triggers a reset-to-zero of the two timers but this reset is only performed on the next rising clock. For the first timer this is ok - it will be in the next 60ns (16MHz clock). For the chained second timer this will only happen after the first timer overflows, i.e. after 2^16 clocks (~4ms with a 16MHz clock). So with other words the scheduler clock resets to 0 after the first 2^16 clock cycles. It looks like that the scheduler does not like this and behaves wrongly over its lifetime, e.g. some tasks are scheduled with a long delay. Why that is and if there are additional requirements for this behaviour has not been further analysed. There is a simple fix for resetting the second timer as well when the first timer is reset and this is to set the ATMEL_TC_ASWTRG_SET bit in the Channel Mode register (CMR) of the first timer. This will also rise the TIOA line (clock input of the second timer) when a software trigger respective SYNC is issued. Signed-off-by: Ronald Wahl Acked-by: Alexandre Belloni Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20231007161803.31342-1-rwahl@gmx.de Signed-off-by: Sasha Levin --- drivers/clocksource/timer-atmel-tcb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clocksource/timer-atmel-tcb.c b/drivers/clocksource/timer-atmel-tcb.c index 27af17c99590..2a90c92a9182 100644 --- a/drivers/clocksource/timer-atmel-tcb.c +++ b/drivers/clocksource/timer-atmel-tcb.c @@ -315,6 +315,7 @@ static void __init tcb_setup_dual_chan(struct atmel_tc *tc, int mck_divisor_idx) writel(mck_divisor_idx /* likely divide-by-8 */ | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP /* free-run */ + | ATMEL_TC_ASWTRG_SET /* TIOA0 rises at software trigger */ | ATMEL_TC_ACPA_SET /* TIOA0 rises at 0 */ | ATMEL_TC_ACPC_CLEAR, /* (duty cycle 50%) */ tcaddr + ATMEL_TC_REG(0, CMR)); From f62c43d64d5a1673b7b99161c2a324928d1f68db Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 4 Oct 2023 01:29:00 +0200 Subject: [PATCH 0010/1397] srcu: Only accelerate on enqueue time [ Upstream commit 8a77f38bcd28d3c22ab7dd8eff3f299d43c00411 ] Acceleration in SRCU happens on enqueue time for each new callback. This operation is expected not to fail and therefore any similar attempt from other places shouldn't find any remaining callbacks to accelerate. Moreover accelerations performed beyond enqueue time are error prone because rcu_seq_snap() then may return the snapshot for a new grace period that is not going to be started. Remove these dangerous and needless accelerations and introduce instead assertions reporting leaking unaccelerated callbacks beyond enqueue time. Co-developed-by: Yong He Signed-off-by: Yong He Co-developed-by: Joel Fernandes (Google) Signed-off-by: Joel Fernandes (Google) Co-developed-by: Neeraj upadhyay Signed-off-by: Neeraj upadhyay Reviewed-by: Like Xu Signed-off-by: Frederic Weisbecker Signed-off-by: Sasha Levin --- kernel/rcu/srcutree.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 3affae97230d..25285893e44e 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -782,8 +782,7 @@ static void srcu_gp_start(struct srcu_struct *ssp) spin_lock_rcu_node(sdp); /* Interrupts already disabled. */ rcu_segcblist_advance(&sdp->srcu_cblist, rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq)); - (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, - rcu_seq_snap(&ssp->srcu_sup->srcu_gp_seq)); + WARN_ON_ONCE(!rcu_segcblist_segempty(&sdp->srcu_cblist, RCU_NEXT_TAIL)); spin_unlock_rcu_node(sdp); /* Interrupts remain disabled. */ WRITE_ONCE(ssp->srcu_sup->srcu_gp_start, jiffies); WRITE_ONCE(ssp->srcu_sup->srcu_n_exp_nodelay, 0); @@ -1719,6 +1718,7 @@ static void srcu_invoke_callbacks(struct work_struct *work) ssp = sdp->ssp; rcu_cblist_init(&ready_cbs); spin_lock_irq_rcu_node(sdp); + WARN_ON_ONCE(!rcu_segcblist_segempty(&sdp->srcu_cblist, RCU_NEXT_TAIL)); rcu_segcblist_advance(&sdp->srcu_cblist, rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq)); if (sdp->srcu_cblist_invoking || @@ -1747,8 +1747,6 @@ static void srcu_invoke_callbacks(struct work_struct *work) */ spin_lock_irq_rcu_node(sdp); rcu_segcblist_add_len(&sdp->srcu_cblist, -len); - (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, - rcu_seq_snap(&ssp->srcu_sup->srcu_gp_seq)); sdp->srcu_cblist_invoking = false; more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist); spin_unlock_irq_rcu_node(sdp); From f6cc3d85cb80d79578b9616e3fc6ab8e8d9aaa55 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Mon, 21 Aug 2023 16:04:09 -0400 Subject: [PATCH 0011/1397] smp,csd: Throw an error if a CSD lock is stuck for too long [ Upstream commit 94b3f0b5af2c7af69e3d6e0cdd9b0ea535f22186 ] The CSD lock seems to get stuck in 2 "modes". When it gets stuck temporarily, it usually gets released in a few seconds, and sometimes up to one or two minutes. If the CSD lock stays stuck for more than several minutes, it never seems to get unstuck, and gradually more and more things in the system end up also getting stuck. In the latter case, we should just give up, so the system can dump out a little more information about what went wrong, and, with panic_on_oops and a kdump kernel loaded, dump a whole bunch more information about what might have gone wrong. In addition, there is an smp.panic_on_ipistall kernel boot parameter that by default retains the old behavior, but when set enables the panic after the CSD lock has been stuck for more than the specified number of milliseconds, as in 300,000 for five minutes. [ paulmck: Apply Imran Khan feedback. ] [ paulmck: Apply Leonardo Bras feedback. ] Link: https://lore.kernel.org/lkml/bc7cc8b0-f587-4451-8bcd-0daae627bcc7@paulmck-laptop/ Signed-off-by: Rik van Riel Signed-off-by: Paul E. McKenney Reviewed-by: Imran Khan Reviewed-by: Leonardo Bras Cc: Peter Zijlstra Cc: Valentin Schneider Cc: Juergen Gross Cc: Jonathan Corbet Cc: Randy Dunlap Signed-off-by: Sasha Levin --- Documentation/admin-guide/kernel-parameters.txt | 7 +++++++ kernel/smp.c | 13 ++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 0a1731a0f0ef..41644336e358 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5858,6 +5858,13 @@ This feature may be more efficiently disabled using the csdlock_debug- kernel parameter. + smp.panic_on_ipistall= [KNL] + If a csd_lock_timeout extends for more than + the specified number of milliseconds, panic the + system. By default, let CSD-lock acquisition + take as long as they take. Specifying 300,000 + for this value provides a 5-minute timeout. + smsc-ircc2.nopnp [HW] Don't use PNP to discover SMC devices smsc-ircc2.ircc_cfg= [HW] Device configuration I/O port smsc-ircc2.ircc_sir= [HW] SIR base I/O port diff --git a/kernel/smp.c b/kernel/smp.c index 8455a53465af..695eb13a276d 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -170,6 +170,8 @@ static DEFINE_PER_CPU(void *, cur_csd_info); static ulong csd_lock_timeout = 5000; /* CSD lock timeout in milliseconds. */ module_param(csd_lock_timeout, ulong, 0444); +static int panic_on_ipistall; /* CSD panic timeout in milliseconds, 300000 for five minutes. */ +module_param(panic_on_ipistall, int, 0444); static atomic_t csd_bug_count = ATOMIC_INIT(0); @@ -230,6 +232,7 @@ static bool csd_lock_wait_toolong(struct __call_single_data *csd, u64 ts0, u64 * } ts2 = sched_clock(); + /* How long since we last checked for a stuck CSD lock.*/ ts_delta = ts2 - *ts1; if (likely(ts_delta <= csd_lock_timeout_ns || csd_lock_timeout_ns == 0)) return false; @@ -243,9 +246,17 @@ static bool csd_lock_wait_toolong(struct __call_single_data *csd, u64 ts0, u64 * else cpux = cpu; cpu_cur_csd = smp_load_acquire(&per_cpu(cur_csd, cpux)); /* Before func and info. */ + /* How long since this CSD lock was stuck. */ + ts_delta = ts2 - ts0; pr_alert("csd: %s non-responsive CSD lock (#%d) on CPU#%d, waiting %llu ns for CPU#%02d %pS(%ps).\n", - firsttime ? "Detected" : "Continued", *bug_id, raw_smp_processor_id(), ts2 - ts0, + firsttime ? "Detected" : "Continued", *bug_id, raw_smp_processor_id(), ts_delta, cpu, csd->func, csd->info); + /* + * If the CSD lock is still stuck after 5 minutes, it is unlikely + * to become unstuck. Use a signed comparison to avoid triggering + * on underflows when the TSC is out of sync between sockets. + */ + BUG_ON(panic_on_ipistall > 0 && (s64)ts_delta > ((s64)panic_on_ipistall * NSEC_PER_MSEC)); if (cpu_cur_csd && csd != cpu_cur_csd) { pr_alert("\tcsd: CSD lock (#%d) handling prior %pS(%ps) request.\n", *bug_id, READ_ONCE(per_cpu(cur_csd_func, cpux)), From 3073f6df783d9d75f7f69f73e16c7ef85d6cfb63 Mon Sep 17 00:00:00 2001 From: Ran Xiaokai Date: Tue, 17 Oct 2023 17:09:53 +0800 Subject: [PATCH 0012/1397] cpu/hotplug: Don't offline the last non-isolated CPU [ Upstream commit 38685e2a0476127db766f81b1c06019ddc4c9ffa ] If a system has isolated CPUs via the "isolcpus=" command line parameter, then an attempt to offline the last housekeeping CPU will result in a WARN_ON() when rebuilding the scheduler domains and a subsequent panic due to and unhandled empty CPU mas in partition_sched_domains_locked(). cpuset_hotplug_workfn() rebuild_sched_domains_locked() ndoms = generate_sched_domains(&doms, &attr); cpumask_and(doms[0], top_cpuset.effective_cpus, housekeeping_cpumask(HK_FLAG_DOMAIN)); Thus results in an empty CPU mask which triggers the warning and then the subsequent crash: WARNING: CPU: 4 PID: 80 at kernel/sched/topology.c:2366 build_sched_domains+0x120c/0x1408 Call trace: build_sched_domains+0x120c/0x1408 partition_sched_domains_locked+0x234/0x880 rebuild_sched_domains_locked+0x37c/0x798 rebuild_sched_domains+0x30/0x58 cpuset_hotplug_workfn+0x2a8/0x930 Unable to handle kernel paging request at virtual address fffe80027ab37080 partition_sched_domains_locked+0x318/0x880 rebuild_sched_domains_locked+0x37c/0x798 Aside of the resulting crash, it does not make any sense to offline the last last housekeeping CPU. Prevent this by masking out the non-housekeeping CPUs when selecting a target CPU for initiating the CPU unplug operation via the work queue. Suggested-by: Thomas Gleixner Signed-off-by: Ran Xiaokai Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/202310171709530660462@zte.com.cn Signed-off-by: Sasha Levin --- kernel/cpu.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/kernel/cpu.c b/kernel/cpu.c index 1a189da3bdac..303cb0591b4b 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1523,11 +1523,14 @@ static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target) /* * Ensure that the control task does not run on the to be offlined * CPU to prevent a deadlock against cfs_b->period_timer. + * Also keep at least one housekeeping cpu onlined to avoid generating + * an empty sched_domain span. */ - cpu = cpumask_any_but(cpu_online_mask, cpu); - if (cpu >= nr_cpu_ids) - return -EBUSY; - return work_on_cpu(cpu, __cpu_down_maps_locked, &work); + for_each_cpu_and(cpu, cpu_online_mask, housekeeping_cpumask(HK_TYPE_DOMAIN)) { + if (cpu != work.cpu) + return work_on_cpu(cpu, __cpu_down_maps_locked, &work); + } + return -EBUSY; } static int cpu_down(unsigned int cpu, enum cpuhp_state target) From be2355b7763cee677b76e962aa98ba294c5402f2 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 24 Sep 2023 17:07:02 +0200 Subject: [PATCH 0013/1397] workqueue: Provide one lock class key per work_on_cpu() callsite [ Upstream commit 265f3ed077036f053981f5eea0b5b43e7c5b39ff ] All callers of work_on_cpu() share the same lock class key for all the functions queued. As a result the workqueue related locking scenario for a function A may be spuriously accounted as an inversion against the locking scenario of function B such as in the following model: long A(void *arg) { mutex_lock(&mutex); mutex_unlock(&mutex); } long B(void *arg) { } void launchA(void) { work_on_cpu(0, A, NULL); } void launchB(void) { mutex_lock(&mutex); work_on_cpu(1, B, NULL); mutex_unlock(&mutex); } launchA and launchB running concurrently have no chance to deadlock. However the above can be reported by lockdep as a possible locking inversion because the works containing A() and B() are treated as belonging to the same locking class. The following shows an existing example of such a spurious lockdep splat: ====================================================== WARNING: possible circular locking dependency detected 6.6.0-rc1-00065-g934ebd6e5359 #35409 Not tainted ------------------------------------------------------ kworker/0:1/9 is trying to acquire lock: ffffffff9bc72f30 (cpu_hotplug_lock){++++}-{0:0}, at: _cpu_down+0x57/0x2b0 but task is already holding lock: ffff9e3bc0057e60 ((work_completion)(&wfc.work)){+.+.}-{0:0}, at: process_scheduled_works+0x216/0x500 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 ((work_completion)(&wfc.work)){+.+.}-{0:0}: __flush_work+0x83/0x4e0 work_on_cpu+0x97/0xc0 rcu_nocb_cpu_offload+0x62/0xb0 rcu_nocb_toggle+0xd0/0x1d0 kthread+0xe6/0x120 ret_from_fork+0x2f/0x40 ret_from_fork_asm+0x1b/0x30 -> #1 (rcu_state.barrier_mutex){+.+.}-{3:3}: __mutex_lock+0x81/0xc80 rcu_nocb_cpu_deoffload+0x38/0xb0 rcu_nocb_toggle+0x144/0x1d0 kthread+0xe6/0x120 ret_from_fork+0x2f/0x40 ret_from_fork_asm+0x1b/0x30 -> #0 (cpu_hotplug_lock){++++}-{0:0}: __lock_acquire+0x1538/0x2500 lock_acquire+0xbf/0x2a0 percpu_down_write+0x31/0x200 _cpu_down+0x57/0x2b0 __cpu_down_maps_locked+0x10/0x20 work_for_cpu_fn+0x15/0x20 process_scheduled_works+0x2a7/0x500 worker_thread+0x173/0x330 kthread+0xe6/0x120 ret_from_fork+0x2f/0x40 ret_from_fork_asm+0x1b/0x30 other info that might help us debug this: Chain exists of: cpu_hotplug_lock --> rcu_state.barrier_mutex --> (work_completion)(&wfc.work) Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock((work_completion)(&wfc.work)); lock(rcu_state.barrier_mutex); lock((work_completion)(&wfc.work)); lock(cpu_hotplug_lock); *** DEADLOCK *** 2 locks held by kworker/0:1/9: #0: ffff900481068b38 ((wq_completion)events){+.+.}-{0:0}, at: process_scheduled_works+0x212/0x500 #1: ffff9e3bc0057e60 ((work_completion)(&wfc.work)){+.+.}-{0:0}, at: process_scheduled_works+0x216/0x500 stack backtrace: CPU: 0 PID: 9 Comm: kworker/0:1 Not tainted 6.6.0-rc1-00065-g934ebd6e5359 #35409 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014 Workqueue: events work_for_cpu_fn Call Trace: rcu-torture: rcu_torture_read_exit: Start of episode dump_stack_lvl+0x4a/0x80 check_noncircular+0x132/0x150 __lock_acquire+0x1538/0x2500 lock_acquire+0xbf/0x2a0 ? _cpu_down+0x57/0x2b0 percpu_down_write+0x31/0x200 ? _cpu_down+0x57/0x2b0 _cpu_down+0x57/0x2b0 __cpu_down_maps_locked+0x10/0x20 work_for_cpu_fn+0x15/0x20 process_scheduled_works+0x2a7/0x500 worker_thread+0x173/0x330 ? __pfx_worker_thread+0x10/0x10 kthread+0xe6/0x120 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x2f/0x40 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 Signed-off-by: Frederic Weisbecker Signed-off-by: Tejun Heo Signed-off-by: Sasha Levin --- include/linux/workqueue.h | 46 +++++++++++++++++++++++++++++++++------ kernel/workqueue.c | 20 ++++++++++------- 2 files changed, 51 insertions(+), 15 deletions(-) diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 1c1d06804d45..24b1e5070f4d 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -274,18 +274,16 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } * to generate better code. */ #ifdef CONFIG_LOCKDEP -#define __INIT_WORK(_work, _func, _onstack) \ +#define __INIT_WORK_KEY(_work, _func, _onstack, _key) \ do { \ - static struct lock_class_key __key; \ - \ __init_work((_work), _onstack); \ (_work)->data = (atomic_long_t) WORK_DATA_INIT(); \ - lockdep_init_map(&(_work)->lockdep_map, "(work_completion)"#_work, &__key, 0); \ + lockdep_init_map(&(_work)->lockdep_map, "(work_completion)"#_work, (_key), 0); \ INIT_LIST_HEAD(&(_work)->entry); \ (_work)->func = (_func); \ } while (0) #else -#define __INIT_WORK(_work, _func, _onstack) \ +#define __INIT_WORK_KEY(_work, _func, _onstack, _key) \ do { \ __init_work((_work), _onstack); \ (_work)->data = (atomic_long_t) WORK_DATA_INIT(); \ @@ -294,12 +292,22 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } } while (0) #endif +#define __INIT_WORK(_work, _func, _onstack) \ + do { \ + static __maybe_unused struct lock_class_key __key; \ + \ + __INIT_WORK_KEY(_work, _func, _onstack, &__key); \ + } while (0) + #define INIT_WORK(_work, _func) \ __INIT_WORK((_work), (_func), 0) #define INIT_WORK_ONSTACK(_work, _func) \ __INIT_WORK((_work), (_func), 1) +#define INIT_WORK_ONSTACK_KEY(_work, _func, _key) \ + __INIT_WORK_KEY((_work), (_func), 1, _key) + #define __INIT_DELAYED_WORK(_work, _func, _tflags) \ do { \ INIT_WORK(&(_work)->work, (_func)); \ @@ -693,8 +701,32 @@ static inline long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg) return fn(arg); } #else -long work_on_cpu(int cpu, long (*fn)(void *), void *arg); -long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg); +long work_on_cpu_key(int cpu, long (*fn)(void *), + void *arg, struct lock_class_key *key); +/* + * A new key is defined for each caller to make sure the work + * associated with the function doesn't share its locking class. + */ +#define work_on_cpu(_cpu, _fn, _arg) \ +({ \ + static struct lock_class_key __key; \ + \ + work_on_cpu_key(_cpu, _fn, _arg, &__key); \ +}) + +long work_on_cpu_safe_key(int cpu, long (*fn)(void *), + void *arg, struct lock_class_key *key); + +/* + * A new key is defined for each caller to make sure the work + * associated with the function doesn't share its locking class. + */ +#define work_on_cpu_safe(_cpu, _fn, _arg) \ +({ \ + static struct lock_class_key __key; \ + \ + work_on_cpu_safe_key(_cpu, _fn, _arg, &__key); \ +}) #endif /* CONFIG_SMP */ #ifdef CONFIG_FREEZER diff --git a/kernel/workqueue.c b/kernel/workqueue.c index a3522b70218d..0f682da96e1c 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -5622,50 +5622,54 @@ static void work_for_cpu_fn(struct work_struct *work) } /** - * work_on_cpu - run a function in thread context on a particular cpu + * work_on_cpu_key - run a function in thread context on a particular cpu * @cpu: the cpu to run on * @fn: the function to run * @arg: the function arg + * @key: The lock class key for lock debugging purposes * * It is up to the caller to ensure that the cpu doesn't go offline. * The caller must not hold any locks which would prevent @fn from completing. * * Return: The value @fn returns. */ -long work_on_cpu(int cpu, long (*fn)(void *), void *arg) +long work_on_cpu_key(int cpu, long (*fn)(void *), + void *arg, struct lock_class_key *key) { struct work_for_cpu wfc = { .fn = fn, .arg = arg }; - INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn); + INIT_WORK_ONSTACK_KEY(&wfc.work, work_for_cpu_fn, key); schedule_work_on(cpu, &wfc.work); flush_work(&wfc.work); destroy_work_on_stack(&wfc.work); return wfc.ret; } -EXPORT_SYMBOL_GPL(work_on_cpu); +EXPORT_SYMBOL_GPL(work_on_cpu_key); /** - * work_on_cpu_safe - run a function in thread context on a particular cpu + * work_on_cpu_safe_key - run a function in thread context on a particular cpu * @cpu: the cpu to run on * @fn: the function to run * @arg: the function argument + * @key: The lock class key for lock debugging purposes * * Disables CPU hotplug and calls work_on_cpu(). The caller must not hold * any locks which would prevent @fn from completing. * * Return: The value @fn returns. */ -long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg) +long work_on_cpu_safe_key(int cpu, long (*fn)(void *), + void *arg, struct lock_class_key *key) { long ret = -ENODEV; cpus_read_lock(); if (cpu_online(cpu)) - ret = work_on_cpu(cpu, fn, arg); + ret = work_on_cpu_key(cpu, fn, arg, key); cpus_read_unlock(); return ret; } -EXPORT_SYMBOL_GPL(work_on_cpu_safe); +EXPORT_SYMBOL_GPL(work_on_cpu_safe_key); #endif /* CONFIG_SMP */ #ifdef CONFIG_FREEZER From 0f32b0b2bdadda65d23e09dc56067a38c2f224db Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (IBM)" Date: Wed, 18 Oct 2023 12:42:50 +0200 Subject: [PATCH 0014/1397] x86/mm: Drop the 4 MB restriction on minimal NUMA node memory size [ Upstream commit a1e2b8b36820d8c91275f207e77e91645b7c6836 ] Qi Zheng reported crashes in a production environment and provided a simplified example as a reproducer: | For example, if we use Qemu to start a two NUMA node kernel, | one of the nodes has 2M memory (less than NODE_MIN_SIZE), | and the other node has 2G, then we will encounter the | following panic: | | BUG: kernel NULL pointer dereference, address: 0000000000000000 | <...> | RIP: 0010:_raw_spin_lock_irqsave+0x22/0x40 | <...> | Call Trace: | | deactivate_slab() | bootstrap() | kmem_cache_init() | start_kernel() | secondary_startup_64_no_verify() The crashes happen because of inconsistency between the nodemask that has nodes with less than 4MB as memoryless, and the actual memory fed into the core mm. The commit: 9391a3f9c7f1 ("[PATCH] x86_64: Clear more state when ignoring empty node in SRAT parsing") ... that introduced minimal size of a NUMA node does not explain why a node size cannot be less than 4MB and what boot failures this restriction might fix. Fixes have been submitted to the core MM code to tighten up the memory topologies it accepts and to not crash on weird input: mm: page_alloc: skip memoryless nodes entirely mm: memory_hotplug: drop memoryless node from fallback lists Andrew has accepted them into the -mm tree, but there are no stable SHA1's yet. This patch drops the limitation for minimal node size on x86: - which works around the crash without the fixes to the core MM. - makes x86 topologies less weird, - removes an arbitrary and undocumented limitation on NUMA topologies. [ mingo: Improved changelog clarity. ] Reported-by: Qi Zheng Tested-by: Mario Casquero Signed-off-by: Mike Rapoport (IBM) Signed-off-by: Ingo Molnar Acked-by: David Hildenbrand Acked-by: Michal Hocko Cc: Dave Hansen Cc: Rik van Riel Link: https://lore.kernel.org/r/ZS+2qqjEO5/867br@gmail.com Signed-off-by: Sasha Levin --- arch/x86/include/asm/numa.h | 7 ------- arch/x86/mm/numa.c | 7 ------- 2 files changed, 14 deletions(-) diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h index e3bae2b60a0d..ef2844d69173 100644 --- a/arch/x86/include/asm/numa.h +++ b/arch/x86/include/asm/numa.h @@ -12,13 +12,6 @@ #define NR_NODE_MEMBLKS (MAX_NUMNODES*2) -/* - * Too small node sizes may confuse the VM badly. Usually they - * result from BIOS bugs. So dont recognize nodes as standalone - * NUMA entities that have less than this amount of RAM listed: - */ -#define NODE_MIN_SIZE (4*1024*1024) - extern int numa_off; /* diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index c01c5506fd4a..aa39d678fe81 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -602,13 +602,6 @@ static int __init numa_register_memblks(struct numa_meminfo *mi) if (start >= end) continue; - /* - * Don't confuse VM with a node that doesn't have the - * minimum amount of memory: - */ - if (end && (end - start) < NODE_MIN_SIZE) - continue; - alloc_node_data(nid); } From 03f9498afa52493decd0514fe899f8da9d812552 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Tue, 29 Aug 2023 12:45:31 +0300 Subject: [PATCH 0015/1397] wifi: plfxlc: fix clang-specific fortify warning [ Upstream commit a763e92c78615ea838f5b9a841398b1d4adb968e ] When compiling with clang 16.0.6 and CONFIG_FORTIFY_SOURCE=y, I've noticed the following (somewhat confusing due to absence of an actual source code location): In file included from drivers/net/wireless/purelifi/plfxlc/mac.c:6: In file included from ./include/linux/netdevice.h:24: In file included from ./include/linux/timer.h:6: In file included from ./include/linux/ktime.h:24: In file included from ./include/linux/time.h:60: In file included from ./include/linux/time32.h:13: In file included from ./include/linux/timex.h:67: In file included from ./arch/x86/include/asm/timex.h:5: In file included from ./arch/x86/include/asm/processor.h:23: In file included from ./arch/x86/include/asm/msr.h:11: In file included from ./arch/x86/include/asm/cpumask.h:5: In file included from ./include/linux/cpumask.h:12: In file included from ./include/linux/bitmap.h:11: In file included from ./include/linux/string.h:254: ./include/linux/fortify-string.h:592:4: warning: call to '__read_overflow2_field' declared with 'warning' attribute: detected read beyond size of field (2nd parameter); maybe use struct_group()? [-Wattribute-warning] __read_overflow2_field(q_size_field, size); The compiler actually complains on 'plfxlc_get_et_strings()' where fortification logic inteprets call to 'memcpy()' as an attempt to copy the whole 'et_strings' array from its first member and so issues an overread warning. This warning may be silenced by passing an address of the whole array and not the first member to 'memcpy()'. Signed-off-by: Dmitry Antipov Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230829094541.234751-1-dmantipov@yandex.ru Signed-off-by: Sasha Levin --- drivers/net/wireless/purelifi/plfxlc/mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/purelifi/plfxlc/mac.c b/drivers/net/wireless/purelifi/plfxlc/mac.c index 94ee831b5de3..506d2f31efb5 100644 --- a/drivers/net/wireless/purelifi/plfxlc/mac.c +++ b/drivers/net/wireless/purelifi/plfxlc/mac.c @@ -666,7 +666,7 @@ static void plfxlc_get_et_strings(struct ieee80211_hw *hw, u32 sset, u8 *data) { if (sset == ETH_SS_STATS) - memcpy(data, *et_strings, sizeof(et_strings)); + memcpy(data, et_strings, sizeof(et_strings)); } static void plfxlc_get_et_stats(struct ieee80211_hw *hw, From 499aafa2ceef1e2269360c3c039a12688c6a2e7b Mon Sep 17 00:00:00 2001 From: Harshitha Prem Date: Sat, 26 Aug 2023 08:42:43 +0300 Subject: [PATCH 0016/1397] wifi: ath12k: Ignore fragments from uninitialized peer in dp [ Upstream commit bbc86757ca62423c3b6bd8f7176da1ff43450769 ] When max virtual ap interfaces are configured in all the bands with ACS and hostapd restart is done every 60s, a crash is observed at random times. In the above scenario, a fragmented packet is received for self peer, for which rx_tid and rx_frags are not initialized in datapath. While handling this fragment, crash is observed as the rx_frag list is uninitialized and when we walk in ath12k_dp_rx_h_sort_frags, skb null leads to exception. To address this, before processing received fragments we check dp_setup_done flag is set to ensure that peer has completed its dp peer setup for fragment queue, else ignore processing the fragments. Call trace: PC points to "ath12k_dp_process_rx_err+0x4e8/0xfcc [ath12k]" LR points to "ath12k_dp_process_rx_err+0x480/0xfcc [ath12k]". The Backtrace obtained is as follows: ath12k_dp_process_rx_err+0x4e8/0xfcc [ath12k] ath12k_dp_service_srng+0x78/0x260 [ath12k] ath12k_pci_write32+0x990/0xb0c [ath12k] __napi_poll+0x30/0xa4 net_rx_action+0x118/0x270 __do_softirq+0x10c/0x244 irq_exit+0x64/0xb4 __handle_domain_irq+0x88/0xac gic_handle_irq+0x74/0xbc el1_irq+0xf0/0x1c0 arch_cpu_idle+0x10/0x18 do_idle+0x104/0x248 cpu_startup_entry+0x20/0x64 rest_init+0xd0/0xdc arch_call_rest_init+0xc/0x14 Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.0.1-00029-QCAHKSWPL_SILICONZ-1 Signed-off-by: Harshitha Prem Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230821130343.29495-2-quic_hprem@quicinc.com Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath12k/dp.c | 1 + drivers/net/wireless/ath/ath12k/dp_rx.c | 9 +++++++++ drivers/net/wireless/ath/ath12k/peer.h | 3 +++ 3 files changed, 13 insertions(+) diff --git a/drivers/net/wireless/ath/ath12k/dp.c b/drivers/net/wireless/ath/ath12k/dp.c index f933896f2a68..6893466f61f0 100644 --- a/drivers/net/wireless/ath/ath12k/dp.c +++ b/drivers/net/wireless/ath/ath12k/dp.c @@ -38,6 +38,7 @@ void ath12k_dp_peer_cleanup(struct ath12k *ar, int vdev_id, const u8 *addr) ath12k_dp_rx_peer_tid_cleanup(ar, peer); crypto_free_shash(peer->tfm_mmic); + peer->dp_setup_done = false; spin_unlock_bh(&ab->base_lock); } diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c index 17da39bd3ab4..690a0107f0d6 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/dp_rx.c @@ -2748,6 +2748,7 @@ int ath12k_dp_rx_peer_frag_setup(struct ath12k *ar, const u8 *peer_mac, int vdev } peer->tfm_mmic = tfm; + peer->dp_setup_done = true; spin_unlock_bh(&ab->base_lock); return 0; @@ -3214,6 +3215,14 @@ static int ath12k_dp_rx_frag_h_mpdu(struct ath12k *ar, ret = -ENOENT; goto out_unlock; } + + if (!peer->dp_setup_done) { + ath12k_warn(ab, "The peer %pM [%d] has uninitialized datapath\n", + peer->addr, peer_id); + ret = -ENOENT; + goto out_unlock; + } + rx_tid = &peer->rx_tid[tid]; if ((!skb_queue_empty(&rx_tid->rx_frags) && seqno != rx_tid->cur_sn) || diff --git a/drivers/net/wireless/ath/ath12k/peer.h b/drivers/net/wireless/ath/ath12k/peer.h index b296dc0e2f67..c6edb24cbedd 100644 --- a/drivers/net/wireless/ath/ath12k/peer.h +++ b/drivers/net/wireless/ath/ath12k/peer.h @@ -44,6 +44,9 @@ struct ath12k_peer { struct ppdu_user_delayba ppdu_stats_delayba; bool delayba_flag; bool is_authorized; + + /* protected by ab->data_lock */ + bool dp_setup_done; }; void ath12k_peer_unmap_event(struct ath12k_base *ab, u16 peer_id); From 90e7d2ad8d91a50e138ecbfcdebeef874dc46ee5 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Tue, 29 Aug 2023 12:41:01 +0300 Subject: [PATCH 0017/1397] wifi: mac80211_hwsim: fix clang-specific fortify warning [ Upstream commit cbaccdc42483c65016f1bae89128c08dc17cfb2a ] When compiling with clang 16.0.6 and CONFIG_FORTIFY_SOURCE=y, I've noticed the following (somewhat confusing due to absence of an actual source code location): In file included from drivers/net/wireless/virtual/mac80211_hwsim.c:18: In file included from ./include/linux/slab.h:16: In file included from ./include/linux/gfp.h:7: In file included from ./include/linux/mmzone.h:8: In file included from ./include/linux/spinlock.h:56: In file included from ./include/linux/preempt.h:79: In file included from ./arch/x86/include/asm/preempt.h:9: In file included from ./include/linux/thread_info.h:60: In file included from ./arch/x86/include/asm/thread_info.h:53: In file included from ./arch/x86/include/asm/cpufeature.h:5: In file included from ./arch/x86/include/asm/processor.h:23: In file included from ./arch/x86/include/asm/msr.h:11: In file included from ./arch/x86/include/asm/cpumask.h:5: In file included from ./include/linux/cpumask.h:12: In file included from ./include/linux/bitmap.h:11: In file included from ./include/linux/string.h:254: ./include/linux/fortify-string.h:592:4: warning: call to '__read_overflow2_field' declared with 'warning' attribute: detected read beyond size of field (2nd parameter); maybe use struct_group()? [-Wattribute-warning] __read_overflow2_field(q_size_field, size); The compiler actually complains on 'mac80211_hwsim_get_et_strings()' where fortification logic inteprets call to 'memcpy()' as an attempt to copy the whole 'mac80211_hwsim_gstrings_stats' array from its first member and so issues an overread warning. This warning may be silenced by passing an address of the whole array and not the first member to 'memcpy()'. Signed-off-by: Dmitry Antipov Link: https://lore.kernel.org/r/20230829094140.234636-1-dmantipov@yandex.ru Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/virtual/mac80211_hwsim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/virtual/mac80211_hwsim.c b/drivers/net/wireless/virtual/mac80211_hwsim.c index 1f524030b186..f5a0880da3fc 100644 --- a/drivers/net/wireless/virtual/mac80211_hwsim.c +++ b/drivers/net/wireless/virtual/mac80211_hwsim.c @@ -3170,7 +3170,7 @@ static void mac80211_hwsim_get_et_strings(struct ieee80211_hw *hw, u32 sset, u8 *data) { if (sset == ETH_SS_STATS) - memcpy(data, *mac80211_hwsim_gstrings_stats, + memcpy(data, mac80211_hwsim_gstrings_stats, sizeof(mac80211_hwsim_gstrings_stats)); } From 5a94cffe90e20e8fade0b9abd4370bd671fe87c7 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Fri, 3 Feb 2023 10:36:36 +0800 Subject: [PATCH 0018/1397] wifi: mac80211: don't return unset power in ieee80211_get_tx_power() [ Upstream commit e160ab85166e77347d0cbe5149045cb25e83937f ] We can get a UBSAN warning if ieee80211_get_tx_power() returns the INT_MIN value mac80211 internally uses for "unset power level". UBSAN: signed-integer-overflow in net/wireless/nl80211.c:3816:5 -2147483648 * 100 cannot be represented in type 'int' CPU: 0 PID: 20433 Comm: insmod Tainted: G WC OE Call Trace: dump_stack+0x74/0x92 ubsan_epilogue+0x9/0x50 handle_overflow+0x8d/0xd0 __ubsan_handle_mul_overflow+0xe/0x10 nl80211_send_iface+0x688/0x6b0 [cfg80211] [...] cfg80211_register_wdev+0x78/0xb0 [cfg80211] cfg80211_netdev_notifier_call+0x200/0x620 [cfg80211] [...] ieee80211_if_add+0x60e/0x8f0 [mac80211] ieee80211_register_hw+0xda5/0x1170 [mac80211] In this case, simply return an error instead, to indicate that no data is available. Cc: Zong-Zhe Yang Signed-off-by: Ping-Ke Shih Link: https://lore.kernel.org/r/20230203023636.4418-1-pkshih@realtek.com Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/cfg.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 0e3a1753a51c..715da615f035 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3121,6 +3121,10 @@ static int ieee80211_get_tx_power(struct wiphy *wiphy, else *dbm = sdata->vif.bss_conf.txpower; + /* INT_MIN indicates no power level was set yet */ + if (*dbm == INT_MIN) + return -EINVAL; + return 0; } From 32f08b7b430ee01ec47d730f961a3306c1c7b6fb Mon Sep 17 00:00:00 2001 From: Sieng-Piaw Liew Date: Tue, 12 Sep 2023 09:07:11 +0800 Subject: [PATCH 0019/1397] atl1c: Work around the DMA RX overflow issue [ Upstream commit 86565682e9053e5deb128193ea9e88531bbae9cf ] This is based on alx driver commit 881d0327db37 ("net: alx: Work around the DMA RX overflow issue"). The alx and atl1c drivers had RX overflow error which was why a custom allocator was created to avoid certain addresses. The simpler workaround then created for alx driver, but not for atl1c due to lack of tester. Instead of using a custom allocator, check the allocated skb address and use skb_reserve() to move away from problematic 0x...fc0 address. Tested on AR8131 on Acer 4540. Signed-off-by: Sieng-Piaw Liew Link: https://lore.kernel.org/r/20230912010711.12036-1-liew.s.piaw@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/atheros/atl1c/atl1c.h | 3 - .../net/ethernet/atheros/atl1c/atl1c_main.c | 67 +++++-------------- 2 files changed, 16 insertions(+), 54 deletions(-) diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c.h b/drivers/net/ethernet/atheros/atl1c/atl1c.h index 43d821fe7a54..63ba64dbb731 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c.h +++ b/drivers/net/ethernet/atheros/atl1c/atl1c.h @@ -504,15 +504,12 @@ struct atl1c_rrd_ring { u16 next_to_use; u16 next_to_clean; struct napi_struct napi; - struct page *rx_page; - unsigned int rx_page_offset; }; /* board specific private data structure */ struct atl1c_adapter { struct net_device *netdev; struct pci_dev *pdev; - unsigned int rx_frag_size; struct atl1c_hw hw; struct atl1c_hw_stats hw_stats; struct mii_if_info mii; /* MII interface info */ diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c index 940c5d1ff9cf..74b78164cf74 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c @@ -483,15 +483,10 @@ static int atl1c_set_mac_addr(struct net_device *netdev, void *p) static void atl1c_set_rxbufsize(struct atl1c_adapter *adapter, struct net_device *dev) { - unsigned int head_size; int mtu = dev->mtu; adapter->rx_buffer_len = mtu > AT_RX_BUF_SIZE ? roundup(mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN, 8) : AT_RX_BUF_SIZE; - - head_size = SKB_DATA_ALIGN(adapter->rx_buffer_len + NET_SKB_PAD + NET_IP_ALIGN) + - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - adapter->rx_frag_size = roundup_pow_of_two(head_size); } static netdev_features_t atl1c_fix_features(struct net_device *netdev, @@ -964,7 +959,6 @@ static void atl1c_init_ring_ptrs(struct atl1c_adapter *adapter) static void atl1c_free_ring_resources(struct atl1c_adapter *adapter) { struct pci_dev *pdev = adapter->pdev; - int i; dma_free_coherent(&pdev->dev, adapter->ring_header.size, adapter->ring_header.desc, adapter->ring_header.dma); @@ -977,12 +971,6 @@ static void atl1c_free_ring_resources(struct atl1c_adapter *adapter) kfree(adapter->tpd_ring[0].buffer_info); adapter->tpd_ring[0].buffer_info = NULL; } - for (i = 0; i < adapter->rx_queue_count; ++i) { - if (adapter->rrd_ring[i].rx_page) { - put_page(adapter->rrd_ring[i].rx_page); - adapter->rrd_ring[i].rx_page = NULL; - } - } } /** @@ -1754,48 +1742,11 @@ static inline void atl1c_rx_checksum(struct atl1c_adapter *adapter, skb_checksum_none_assert(skb); } -static struct sk_buff *atl1c_alloc_skb(struct atl1c_adapter *adapter, - u32 queue, bool napi_mode) -{ - struct atl1c_rrd_ring *rrd_ring = &adapter->rrd_ring[queue]; - struct sk_buff *skb; - struct page *page; - - if (adapter->rx_frag_size > PAGE_SIZE) { - if (likely(napi_mode)) - return napi_alloc_skb(&rrd_ring->napi, - adapter->rx_buffer_len); - else - return netdev_alloc_skb_ip_align(adapter->netdev, - adapter->rx_buffer_len); - } - - page = rrd_ring->rx_page; - if (!page) { - page = alloc_page(GFP_ATOMIC); - if (unlikely(!page)) - return NULL; - rrd_ring->rx_page = page; - rrd_ring->rx_page_offset = 0; - } - - skb = build_skb(page_address(page) + rrd_ring->rx_page_offset, - adapter->rx_frag_size); - if (likely(skb)) { - skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); - rrd_ring->rx_page_offset += adapter->rx_frag_size; - if (rrd_ring->rx_page_offset >= PAGE_SIZE) - rrd_ring->rx_page = NULL; - else - get_page(page); - } - return skb; -} - static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter, u32 queue, bool napi_mode) { struct atl1c_rfd_ring *rfd_ring = &adapter->rfd_ring[queue]; + struct atl1c_rrd_ring *rrd_ring = &adapter->rrd_ring[queue]; struct pci_dev *pdev = adapter->pdev; struct atl1c_buffer *buffer_info, *next_info; struct sk_buff *skb; @@ -1814,13 +1765,27 @@ static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter, u32 queue, while (next_info->flags & ATL1C_BUFFER_FREE) { rfd_desc = ATL1C_RFD_DESC(rfd_ring, rfd_next_to_use); - skb = atl1c_alloc_skb(adapter, queue, napi_mode); + /* When DMA RX address is set to something like + * 0x....fc0, it will be very likely to cause DMA + * RFD overflow issue. + * + * To work around it, we apply rx skb with 64 bytes + * longer space, and offset the address whenever + * 0x....fc0 is detected. + */ + if (likely(napi_mode)) + skb = napi_alloc_skb(&rrd_ring->napi, adapter->rx_buffer_len + 64); + else + skb = netdev_alloc_skb(adapter->netdev, adapter->rx_buffer_len + 64); if (unlikely(!skb)) { if (netif_msg_rx_err(adapter)) dev_warn(&pdev->dev, "alloc rx buffer failed\n"); break; } + if (((unsigned long)skb->data & 0xfff) == 0xfc0) + skb_reserve(skb, 64); + /* * Make buffer alignment 2 beyond a 16 byte boundary * this will result in a 16 byte aligned IP header after From 821a7e4143af115b840ec199eb179537e18af922 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Wed, 13 Sep 2023 01:32:08 +0200 Subject: [PATCH 0020/1397] bpf: Detect IP == ksym.end as part of BPF program [ Upstream commit 66d9111f3517f85ef2af0337ece02683ce0faf21 ] Now that bpf_throw kfunc is the first such call instruction that has noreturn semantics within the verifier, this also kicks in dead code elimination in unprecedented ways. For one, any instruction following a bpf_throw call will never be marked as seen. Moreover, if a callchain ends up throwing, any instructions after the call instruction to the eventually throwing subprog in callers will also never be marked as seen. The tempting way to fix this would be to emit extra 'int3' instructions which bump the jited_len of a program, and ensure that during runtime when a program throws, we can discover its boundaries even if the call instruction to bpf_throw (or to subprogs that always throw) is emitted as the final instruction in the program. An example of such a program would be this: do_something(): ... r0 = 0 exit foo(): r1 = 0 call bpf_throw r0 = 0 exit bar(cond): if r1 != 0 goto pc+2 call do_something exit call foo r0 = 0 // Never seen by verifier exit // main(ctx): r1 = ... call bar r0 = 0 exit Here, if we do end up throwing, the stacktrace would be the following: bpf_throw foo bar main In bar, the final instruction emitted will be the call to foo, as such, the return address will be the subsequent instruction (which the JIT emits as int3 on x86). This will end up lying outside the jited_len of the program, thus, when unwinding, we will fail to discover the return address as belonging to any program and end up in a panic due to the unreliable stack unwinding of BPF programs that we never expect. To remedy this case, make bpf_prog_ksym_find treat IP == ksym.end as part of the BPF program, so that is_bpf_text_address returns true when such a case occurs, and we are able to unwind reliably when the final instruction ends up being a call instruction. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20230912233214.1518551-12-memxor@gmail.com Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- kernel/bpf/core.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 4e3ce0542e31..64fcd81ad3da 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -623,7 +623,11 @@ static __always_inline int bpf_tree_comp(void *key, struct latch_tree_node *n) if (val < ksym->start) return -1; - if (val >= ksym->end) + /* Ensure that we detect return addresses as part of the program, when + * the final instruction is a call for a program part of the stack + * trace. Therefore, do val > ksym->end instead of val >= ksym->end. + */ + if (val > ksym->end) return 1; return 0; From 8954a159d137b3135a97e09934f220ad1576da17 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Tue, 29 Aug 2023 12:38:12 +0300 Subject: [PATCH 0021/1397] wifi: ath9k: fix clang-specific fortify warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 95f97fe0ac974467ab4da215985a32b2fdf48af0 ] When compiling with clang 16.0.6 and CONFIG_FORTIFY_SOURCE=y, I've noticed the following (somewhat confusing due to absence of an actual source code location): In file included from drivers/net/wireless/ath/ath9k/debug.c:17: In file included from ./include/linux/slab.h:16: In file included from ./include/linux/gfp.h:7: In file included from ./include/linux/mmzone.h:8: In file included from ./include/linux/spinlock.h:56: In file included from ./include/linux/preempt.h:79: In file included from ./arch/x86/include/asm/preempt.h:9: In file included from ./include/linux/thread_info.h:60: In file included from ./arch/x86/include/asm/thread_info.h:53: In file included from ./arch/x86/include/asm/cpufeature.h:5: In file included from ./arch/x86/include/asm/processor.h:23: In file included from ./arch/x86/include/asm/msr.h:11: In file included from ./arch/x86/include/asm/cpumask.h:5: In file included from ./include/linux/cpumask.h:12: In file included from ./include/linux/bitmap.h:11: In file included from ./include/linux/string.h:254: ./include/linux/fortify-string.h:592:4: warning: call to '__read_overflow2_field' declared with 'warning' attribute: detected read beyond size of field (2nd parameter); maybe use struct_group()? [-Wattribute-warning] __read_overflow2_field(q_size_field, size); In file included from drivers/net/wireless/ath/ath9k/htc_drv_debug.c:17: In file included from drivers/net/wireless/ath/ath9k/htc.h:20: In file included from ./include/linux/module.h:13: In file included from ./include/linux/stat.h:19: In file included from ./include/linux/time.h:60: In file included from ./include/linux/time32.h:13: In file included from ./include/linux/timex.h:67: In file included from ./arch/x86/include/asm/timex.h:5: In file included from ./arch/x86/include/asm/processor.h:23: In file included from ./arch/x86/include/asm/msr.h:11: In file included from ./arch/x86/include/asm/cpumask.h:5: In file included from ./include/linux/cpumask.h:12: In file included from ./include/linux/bitmap.h:11: In file included from ./include/linux/string.h:254: ./include/linux/fortify-string.h:592:4: warning: call to '__read_overflow2_field' declared with 'warning' attribute: detected read beyond size of field (2nd parameter); maybe use struct_group()? [-Wattribute-warning] __read_overflow2_field(q_size_field, size); The compiler actually complains on 'ath9k_get_et_strings()' and 'ath9k_htc_get_et_strings()' due to the same reason: fortification logic inteprets call to 'memcpy()' as an attempt to copy the whole array from it's first member and so issues an overread warning. These warnings may be silenced by passing an address of the whole array and not the first member to 'memcpy()'. Signed-off-by: Dmitry Antipov Acked-by: Toke Høiland-Jørgensen Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230829093856.234584-1-dmantipov@yandex.ru Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath9k/debug.c | 2 +- drivers/net/wireless/ath/ath9k/htc_drv_debug.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/debug.c b/drivers/net/wireless/ath/ath9k/debug.c index 9bc57c5a89bf..a0376a6787b8 100644 --- a/drivers/net/wireless/ath/ath9k/debug.c +++ b/drivers/net/wireless/ath/ath9k/debug.c @@ -1293,7 +1293,7 @@ void ath9k_get_et_strings(struct ieee80211_hw *hw, u32 sset, u8 *data) { if (sset == ETH_SS_STATS) - memcpy(data, *ath9k_gstrings_stats, + memcpy(data, ath9k_gstrings_stats, sizeof(ath9k_gstrings_stats)); } diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_debug.c b/drivers/net/wireless/ath/ath9k/htc_drv_debug.c index c549ff3abcdc..278ddc713fdc 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_debug.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_debug.c @@ -423,7 +423,7 @@ void ath9k_htc_get_et_strings(struct ieee80211_hw *hw, u32 sset, u8 *data) { if (sset == ETH_SS_STATS) - memcpy(data, *ath9k_htc_gstrings_stats, + memcpy(data, ath9k_htc_gstrings_stats, sizeof(ath9k_htc_gstrings_stats)); } From c9e44111da221246efb2e623ae1be40a5cf6542c Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Fri, 1 Sep 2023 09:56:02 +0800 Subject: [PATCH 0022/1397] wifi: ath12k: fix possible out-of-bound read in ath12k_htt_pull_ppdu_stats() [ Upstream commit 1bc44a505a229bb1dd4957e11aa594edeea3690e ] len is extracted from HTT message and could be an unexpected value in case errors happen, so add validation before using to avoid possible out-of-bound read in the following message iteration and parsing. The same issue also applies to ppdu_info->ppdu_stats.common.num_users, so validate it before using too. These are found during code review. Compile test only. Signed-off-by: Baochen Qiang Acked-by: Jeff Johnson Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230901015602.45112-1-quic_bqiang@quicinc.com Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath12k/dp_rx.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c index 690a0107f0d6..e1c84fc97460 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/dp_rx.c @@ -1555,6 +1555,13 @@ static int ath12k_htt_pull_ppdu_stats(struct ath12k_base *ab, msg = (struct ath12k_htt_ppdu_stats_msg *)skb->data; len = le32_get_bits(msg->info, HTT_T2H_PPDU_STATS_INFO_PAYLOAD_SIZE); + if (len > (skb->len - struct_size(msg, data, 0))) { + ath12k_warn(ab, + "HTT PPDU STATS event has unexpected payload size %u, should be smaller than %u\n", + len, skb->len); + return -EINVAL; + } + pdev_id = le32_get_bits(msg->info, HTT_T2H_PPDU_STATS_INFO_PDEV_ID); ppdu_id = le32_to_cpu(msg->ppdu_id); @@ -1583,6 +1590,16 @@ static int ath12k_htt_pull_ppdu_stats(struct ath12k_base *ab, goto exit; } + if (ppdu_info->ppdu_stats.common.num_users >= HTT_PPDU_STATS_MAX_USERS) { + spin_unlock_bh(&ar->data_lock); + ath12k_warn(ab, + "HTT PPDU STATS event has unexpected num_users %u, should be smaller than %u\n", + ppdu_info->ppdu_stats.common.num_users, + HTT_PPDU_STATS_MAX_USERS); + ret = -EINVAL; + goto exit; + } + /* back up data rate tlv for all peers */ if (ppdu_info->frame_type == HTT_STATS_PPDU_FTYPE_DATA && (ppdu_info->tlv_bitmap & (1 << HTT_PPDU_STATS_TAG_USR_COMMON)) && From e310aff779eb71f27ae9716ce73a041e86cc4368 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Tue, 29 Aug 2023 12:36:02 +0300 Subject: [PATCH 0023/1397] wifi: ath10k: fix clang-specific fortify warning [ Upstream commit cb4c132ebfeac5962f7258ffc831caa0c4dada1a ] When compiling with clang 16.0.6 and CONFIG_FORTIFY_SOURCE=y, I've noticed the following (somewhat confusing due to absence of an actual source code location): In file included from drivers/net/wireless/ath/ath10k/debug.c:8: In file included from ./include/linux/module.h:13: In file included from ./include/linux/stat.h:19: In file included from ./include/linux/time.h:60: In file included from ./include/linux/time32.h:13: In file included from ./include/linux/timex.h:67: In file included from ./arch/x86/include/asm/timex.h:5: In file included from ./arch/x86/include/asm/processor.h:23: In file included from ./arch/x86/include/asm/msr.h:11: In file included from ./arch/x86/include/asm/cpumask.h:5: In file included from ./include/linux/cpumask.h:12: In file included from ./include/linux/bitmap.h:11: In file included from ./include/linux/string.h:254: ./include/linux/fortify-string.h:592:4: warning: call to '__read_overflow2_field' declared with 'warning' attribute: detected read beyond size of field (2nd parameter); maybe use struct_group()? [-Wattribute-warning] __read_overflow2_field(q_size_field, size); The compiler actually complains on 'ath10k_debug_get_et_strings()' where fortification logic inteprets call to 'memcpy()' as an attempt to copy the whole 'ath10k_gstrings_stats' array from it's first member and so issues an overread warning. This warning may be silenced by passing an address of the whole array and not the first member to 'memcpy()'. Signed-off-by: Dmitry Antipov Acked-by: Jeff Johnson Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230829093652.234537-1-dmantipov@yandex.ru Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath10k/debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c index f9518e1c9903..fe89bc61e531 100644 --- a/drivers/net/wireless/ath/ath10k/debug.c +++ b/drivers/net/wireless/ath/ath10k/debug.c @@ -1140,7 +1140,7 @@ void ath10k_debug_get_et_strings(struct ieee80211_hw *hw, u32 sset, u8 *data) { if (sset == ETH_SS_STATS) - memcpy(data, *ath10k_gstrings_stats, + memcpy(data, ath10k_gstrings_stats, sizeof(ath10k_gstrings_stats)); } From 4dd0547e8b45faf6f95373be5436b66cde326c0e Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Wed, 20 Sep 2023 16:43:42 +0300 Subject: [PATCH 0024/1397] wifi: ath12k: fix possible out-of-bound write in ath12k_wmi_ext_hal_reg_caps() [ Upstream commit b302dce3d9edea5b93d1902a541684a967f3c63c ] reg_cap.phy_id is extracted from WMI event and could be an unexpected value in case some errors happen. As a result out-of-bound write may occur to soc->hal_reg_cap. Fix it by validating reg_cap.phy_id before using it. This is found during code review. Compile tested only. Signed-off-by: Baochen Qiang Acked-by: Jeff Johnson Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230830020716.5420-1-quic_bqiang@quicinc.com Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath12k/wmi.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index ef0f3cf35cfd..bddf4571d50c 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -3876,6 +3876,12 @@ static int ath12k_wmi_ext_hal_reg_caps(struct ath12k_base *soc, ath12k_warn(soc, "failed to extract reg cap %d\n", i); return ret; } + + if (reg_cap.phy_id >= MAX_RADIOS) { + ath12k_warn(soc, "unexpected phy id %u\n", reg_cap.phy_id); + return -EINVAL; + } + soc->hal_reg_cap[reg_cap.phy_id] = reg_cap; } return 0; From b7765b0a034553018f0d815e27f3e9d4178a31a5 Mon Sep 17 00:00:00 2001 From: Shiju Jose Date: Thu, 21 Sep 2023 02:03:36 +0800 Subject: [PATCH 0025/1397] ACPI: APEI: Fix AER info corruption when error status data has multiple sections [ Upstream commit e2abc47a5a1a9f641e7cacdca643fdd40729bf6e ] ghes_handle_aer() passes AER data to the PCI core for logging and recovery by calling aer_recover_queue() with a pointer to struct aer_capability_regs. The problem was that aer_recover_queue() queues the pointer directly without copying the aer_capability_regs data. The pointer was to the ghes->estatus buffer, which could be reused before aer_recover_work_func() reads the data. To avoid this problem, allocate a new aer_capability_regs structure from the ghes_estatus_pool, copy the AER data from the ghes->estatus buffer into it, pass a pointer to the new struct to aer_recover_queue(), and free it after aer_recover_work_func() has processed it. Reported-by: Bjorn Helgaas Acked-by: Bjorn Helgaas Signed-off-by: Shiju Jose [ rjw: Subject edits ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/apei/ghes.c | 23 ++++++++++++++++++++++- drivers/pci/pcie/aer.c | 10 ++++++++++ include/acpi/ghes.h | 4 ++++ 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index ef59d6ea16da..63ad0541db38 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -209,6 +209,20 @@ int ghes_estatus_pool_init(unsigned int num_ghes) return -ENOMEM; } +/** + * ghes_estatus_pool_region_free - free previously allocated memory + * from the ghes_estatus_pool. + * @addr: address of memory to free. + * @size: size of memory to free. + * + * Returns none. + */ +void ghes_estatus_pool_region_free(unsigned long addr, u32 size) +{ + gen_pool_free(ghes_estatus_pool, addr, size); +} +EXPORT_SYMBOL_GPL(ghes_estatus_pool_region_free); + static int map_gen_v2(struct ghes *ghes) { return apei_map_generic_address(&ghes->generic_v2->read_ack_register); @@ -564,6 +578,7 @@ static void ghes_handle_aer(struct acpi_hest_generic_data *gdata) pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) { unsigned int devfn; int aer_severity; + u8 *aer_info; devfn = PCI_DEVFN(pcie_err->device_id.device, pcie_err->device_id.function); @@ -577,11 +592,17 @@ static void ghes_handle_aer(struct acpi_hest_generic_data *gdata) if (gdata->flags & CPER_SEC_RESET) aer_severity = AER_FATAL; + aer_info = (void *)gen_pool_alloc(ghes_estatus_pool, + sizeof(struct aer_capability_regs)); + if (!aer_info) + return; + memcpy(aer_info, pcie_err->aer_info, sizeof(struct aer_capability_regs)); + aer_recover_queue(pcie_err->device_id.segment, pcie_err->device_id.bus, devfn, aer_severity, (struct aer_capability_regs *) - pcie_err->aer_info); + aer_info); } #endif } diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 9c8fd69ae5ad..40d84cb0c601 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include "../pci.h" @@ -997,6 +998,15 @@ static void aer_recover_work_func(struct work_struct *work) continue; } cper_print_aer(pdev, entry.severity, entry.regs); + /* + * Memory for aer_capability_regs(entry.regs) is being allocated from the + * ghes_estatus_pool to protect it from overwriting when multiple sections + * are present in the error status. Thus free the same after processing + * the data. + */ + ghes_estatus_pool_region_free((unsigned long)entry.regs, + sizeof(struct aer_capability_regs)); + if (entry.severity == AER_NONFATAL) pcie_do_recovery(pdev, pci_channel_io_normal, aer_root_reset); diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index 3c8bba9f1114..be1dd4c1a917 100644 --- a/include/acpi/ghes.h +++ b/include/acpi/ghes.h @@ -73,8 +73,12 @@ int ghes_register_vendor_record_notifier(struct notifier_block *nb); void ghes_unregister_vendor_record_notifier(struct notifier_block *nb); struct list_head *ghes_get_devices(void); + +void ghes_estatus_pool_region_free(unsigned long addr, u32 size); #else static inline struct list_head *ghes_get_devices(void) { return NULL; } + +static inline void ghes_estatus_pool_region_free(unsigned long addr, u32 size) { return; } #endif int ghes_estatus_pool_init(unsigned int num_ghes); From 994f77f35ebb657124fba402e5e05e77eff7db28 Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Tue, 19 Sep 2023 14:47:20 +0200 Subject: [PATCH 0026/1397] net: sfp: add quirk for Fiberstone GPON-ONU-34-20BI [ Upstream commit d387e34fec407f881fdf165b5d7ec128ebff362f ] Fiberstone GPON-ONU-34-20B can operate at 2500base-X, but report 1.2GBd NRZ in their EEPROM. The module also require the ignore tx fault fixup similar to Huawei MA5671A as it gets disabled on error messages with serial redirection enabled. Signed-off-by: Christian Marangi Link: https://lore.kernel.org/r/20230919124720.8210-1-ansuelsmth@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/phy/sfp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 4ecfac227865..a50038a45250 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -452,6 +452,11 @@ static const struct sfp_quirk sfp_quirks[] = { // Rollball protocol to talk to the PHY. SFP_QUIRK_F("FS", "SFP-10G-T", sfp_fixup_fs_10gt), + // Fiberstore GPON-ONU-34-20BI can operate at 2500base-X, but report 1.2GBd + // NRZ in their EEPROM + SFP_QUIRK("FS", "GPON-ONU-34-20BI", sfp_quirk_2500basex, + sfp_fixup_ignore_tx_fault), + SFP_QUIRK_F("HALNy", "HL-GSFP", sfp_fixup_halny_gsfp), // HG MXPD-483II-F 2.5G supports 2500Base-X, but incorrectly reports From 8943a6d1790471fe6571fdb4caa79daddb4dda5b Mon Sep 17 00:00:00 2001 From: Ingo Rohloff Date: Sat, 26 Aug 2023 22:02:41 +0200 Subject: [PATCH 0027/1397] wifi: mt76: mt7921e: Support MT7992 IP in Xiaomi Redmibook 15 Pro (2023) [ Upstream commit fce9c967820a72f600abbf061d7077861685a14d ] In the Xiaomi Redmibook 15 Pro (2023) laptop I have got, a wifi chip is used, which according to its PCI Vendor ID is from "ITTIM Technology". This chip works flawlessly with the mt7921e module. The driver doesn't bind to this PCI device, because the Vendor ID from "ITTIM Technology" is not recognized. This patch adds the PCI Vendor ID from "ITTIM Technology" to the list of PCI Vendor IDs and lets the mt7921e driver bind to the mentioned wifi chip. Signed-off-by: Ingo Rohloff Signed-off-by: Felix Fietkau Signed-off-by: Sasha Levin --- drivers/net/wireless/mediatek/mt76/mt7921/pci.c | 2 ++ include/linux/pci_ids.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c index 3dda84a93717..f04e7095e181 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c @@ -17,6 +17,8 @@ static const struct pci_device_id mt7921_pci_device_table[] = { .driver_data = (kernel_ulong_t)MT7921_FIRMWARE_WM }, { PCI_DEVICE(PCI_VENDOR_ID_MEDIATEK, 0x7922), .driver_data = (kernel_ulong_t)MT7922_FIRMWARE_WM }, + { PCI_DEVICE(PCI_VENDOR_ID_ITTIM, 0x7922), + .driver_data = (kernel_ulong_t)MT7922_FIRMWARE_WM }, { PCI_DEVICE(PCI_VENDOR_ID_MEDIATEK, 0x0608), .driver_data = (kernel_ulong_t)MT7921_FIRMWARE_WM }, { PCI_DEVICE(PCI_VENDOR_ID_MEDIATEK, 0x0616), diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 3a8e24e9a93f..fe4a3589bb3f 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -180,6 +180,8 @@ #define PCI_DEVICE_ID_BERKOM_A4T 0xffa4 #define PCI_DEVICE_ID_BERKOM_SCITEL_QUADRO 0xffa8 +#define PCI_VENDOR_ID_ITTIM 0x0b48 + #define PCI_VENDOR_ID_COMPAQ 0x0e11 #define PCI_DEVICE_ID_COMPAQ_TOKENRING 0x0508 #define PCI_DEVICE_ID_COMPAQ_TACHYON 0xa0fc From b521d90864c073fef2a19df097fc1eb60752b78e Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Tue, 29 Aug 2023 12:43:49 +0300 Subject: [PATCH 0028/1397] wifi: mt76: fix clang-specific fortify warnings [ Upstream commit 03f0e11da7fb26db4f27e6b83a223512db9f7ca5 ] When compiling with clang 16.0.6 and CONFIG_FORTIFY_SOURCE=y, I've noticed the following (somewhat confusing due to absence of an actual source code location): In file included from drivers/net/wireless/mediatek/mt76/mt792x_core.c:4: In file included from ./include/linux/module.h:13: In file included from ./include/linux/stat.h:19: In file included from ./include/linux/time.h:60: In file included from ./include/linux/time32.h:13: In file included from ./include/linux/timex.h:67: In file included from ./arch/x86/include/asm/timex.h:5: In file included from ./arch/x86/include/asm/processor.h:23: In file included from ./arch/x86/include/asm/msr.h:11: In file included from ./arch/x86/include/asm/cpumask.h:5: In file included from ./include/linux/cpumask.h:12: In file included from ./include/linux/bitmap.h:11: In file included from ./include/linux/string.h:254: ./include/linux/fortify-string.h:592:4: warning: call to '__read_overflow2_field' declared with 'warning' attribute: detected read beyond size of field (2nd parameter); maybe use struct_group()? [-Wattribute-warning] __read_overflow2_field(q_size_field, size); In file included from drivers/net/wireless/mediatek/mt76/mt7915/main.c:4: In file included from ./include/linux/etherdevice.h:20: In file included from ./include/linux/if_ether.h:19: In file included from ./include/linux/skbuff.h:15: In file included from ./include/linux/time.h:60: In file included from ./include/linux/time32.h:13: In file included from ./include/linux/timex.h:67: In file included from ./arch/x86/include/asm/timex.h:5: In file included from ./arch/x86/include/asm/processor.h:23: In file included from ./arch/x86/include/asm/msr.h:11: In file included from ./arch/x86/include/asm/cpumask.h:5: In file included from ./include/linux/cpumask.h:12: In file included from ./include/linux/bitmap.h:11: In file included from ./include/linux/string.h:254: ./include/linux/fortify-string.h:592:4: warning: call to '__read_overflow2_field' declared with 'warning' attribute: detected read beyond size of field (2nd parameter); maybe use struct_group()? [-Wattribute-warning] __read_overflow2_field(q_size_field, size); In file included from drivers/net/wireless/mediatek/mt76/mt7996/main.c:6: In file included from drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h:9: In file included from ./include/linux/interrupt.h:8: In file included from ./include/linux/cpumask.h:12: In file included from ./include/linux/bitmap.h:11: In file included from ./include/linux/string.h:254: ./include/linux/fortify-string.h:592:4: warning: call to '__read_overflow2_field' declared with 'warning' attribute: detected read beyond size of field (2nd parameter); maybe use struct_group()? [-Wattribute-warning] __read_overflow2_field(q_size_field, size); The compiler actually complains on 'mt7915_get_et_strings()', 'mt792x_get_et_strings()' and 'mt7996_get_et_strings()' due to the same reason: fortification logic inteprets call to 'memcpy()' as an attempt to copy the whole array from its first member and so issues an overread warning. These warnings may be silenced by passing an address of the whole array and not the first member to 'memcpy()'. Signed-off-by: Dmitry Antipov Signed-off-by: Felix Fietkau Signed-off-by: Sasha Levin --- drivers/net/wireless/mediatek/mt76/mt7915/main.c | 2 +- drivers/net/wireless/mediatek/mt76/mt792x_core.c | 2 +- drivers/net/wireless/mediatek/mt76/mt7996/main.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/main.c b/drivers/net/wireless/mediatek/mt76/mt7915/main.c index d06c25dda325..d85105a43d70 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/main.c @@ -1388,7 +1388,7 @@ void mt7915_get_et_strings(struct ieee80211_hw *hw, if (sset != ETH_SS_STATS) return; - memcpy(data, *mt7915_gstrings_stats, sizeof(mt7915_gstrings_stats)); + memcpy(data, mt7915_gstrings_stats, sizeof(mt7915_gstrings_stats)); data += sizeof(mt7915_gstrings_stats); page_pool_ethtool_stats_get_strings(data); } diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_core.c b/drivers/net/wireless/mediatek/mt76/mt792x_core.c index ec98450a938f..f111c47fdca5 100644 --- a/drivers/net/wireless/mediatek/mt76/mt792x_core.c +++ b/drivers/net/wireless/mediatek/mt76/mt792x_core.c @@ -358,7 +358,7 @@ void mt792x_get_et_strings(struct ieee80211_hw *hw, struct ieee80211_vif *vif, if (sset != ETH_SS_STATS) return; - memcpy(data, *mt792x_gstrings_stats, sizeof(mt792x_gstrings_stats)); + memcpy(data, mt792x_gstrings_stats, sizeof(mt792x_gstrings_stats)); data += sizeof(mt792x_gstrings_stats); page_pool_ethtool_stats_get_strings(data); diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c index 6e0f0c100db8..620880e560e0 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c @@ -1198,7 +1198,7 @@ void mt7996_get_et_strings(struct ieee80211_hw *hw, u32 sset, u8 *data) { if (sset == ETH_SS_STATS) - memcpy(data, *mt7996_gstrings_stats, + memcpy(data, mt7996_gstrings_stats, sizeof(mt7996_gstrings_stats)); } From 224f68c507b1eb6af1421af4a23680df6849c710 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 Sep 2023 20:28:17 +0000 Subject: [PATCH 0029/1397] net: annotate data-races around sk->sk_tx_queue_mapping [ Upstream commit 0bb4d124d34044179b42a769a0c76f389ae973b6 ] This field can be read or written without socket lock being held. Add annotations to avoid load-store tearing. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/sock.h | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 92f7ea62a915..97f7fbcbf61e 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2006,21 +2006,33 @@ static inline void sk_tx_queue_set(struct sock *sk, int tx_queue) /* sk_tx_queue_mapping accept only upto a 16-bit value */ if (WARN_ON_ONCE((unsigned short)tx_queue >= USHRT_MAX)) return; - sk->sk_tx_queue_mapping = tx_queue; + /* Paired with READ_ONCE() in sk_tx_queue_get() and + * other WRITE_ONCE() because socket lock might be not held. + */ + WRITE_ONCE(sk->sk_tx_queue_mapping, tx_queue); } #define NO_QUEUE_MAPPING USHRT_MAX static inline void sk_tx_queue_clear(struct sock *sk) { - sk->sk_tx_queue_mapping = NO_QUEUE_MAPPING; + /* Paired with READ_ONCE() in sk_tx_queue_get() and + * other WRITE_ONCE() because socket lock might be not held. + */ + WRITE_ONCE(sk->sk_tx_queue_mapping, NO_QUEUE_MAPPING); } static inline int sk_tx_queue_get(const struct sock *sk) { - if (sk && sk->sk_tx_queue_mapping != NO_QUEUE_MAPPING) - return sk->sk_tx_queue_mapping; + if (sk) { + /* Paired with WRITE_ONCE() in sk_tx_queue_clear() + * and sk_tx_queue_set(). + */ + int val = READ_ONCE(sk->sk_tx_queue_mapping); + if (val != NO_QUEUE_MAPPING) + return val; + } return -1; } From 87324a50b4e833289bbe22339eccbc42e4d0578a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 Sep 2023 20:28:18 +0000 Subject: [PATCH 0030/1397] net: annotate data-races around sk->sk_dst_pending_confirm [ Upstream commit eb44ad4e635132754bfbcb18103f1dcb7058aedd ] This field can be read or written without socket lock being held. Add annotations to avoid load-store tearing. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/sock.h | 6 +++--- net/core/sock.c | 2 +- net/ipv4/tcp_output.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 97f7fbcbf61e..7753354d59c0 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2181,7 +2181,7 @@ static inline void __dst_negative_advice(struct sock *sk) if (ndst != dst) { rcu_assign_pointer(sk->sk_dst_cache, ndst); sk_tx_queue_clear(sk); - sk->sk_dst_pending_confirm = 0; + WRITE_ONCE(sk->sk_dst_pending_confirm, 0); } } } @@ -2198,7 +2198,7 @@ __sk_dst_set(struct sock *sk, struct dst_entry *dst) struct dst_entry *old_dst; sk_tx_queue_clear(sk); - sk->sk_dst_pending_confirm = 0; + WRITE_ONCE(sk->sk_dst_pending_confirm, 0); old_dst = rcu_dereference_protected(sk->sk_dst_cache, lockdep_sock_is_held(sk)); rcu_assign_pointer(sk->sk_dst_cache, dst); @@ -2211,7 +2211,7 @@ sk_dst_set(struct sock *sk, struct dst_entry *dst) struct dst_entry *old_dst; sk_tx_queue_clear(sk); - sk->sk_dst_pending_confirm = 0; + WRITE_ONCE(sk->sk_dst_pending_confirm, 0); old_dst = xchg((__force struct dst_entry **)&sk->sk_dst_cache, dst); dst_release(old_dst); } diff --git a/net/core/sock.c b/net/core/sock.c index 16584e2dd648..bfaf47b3f3c7 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -600,7 +600,7 @@ struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie) INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check, dst, cookie) == NULL) { sk_tx_queue_clear(sk); - sk->sk_dst_pending_confirm = 0; + WRITE_ONCE(sk->sk_dst_pending_confirm, 0); RCU_INIT_POINTER(sk->sk_dst_cache, NULL); dst_release(dst); return NULL; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f0723460753c..9ccfdc825004 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1331,7 +1331,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, skb->destructor = skb_is_tcp_pure_ack(skb) ? __sock_wfree : tcp_wfree; refcount_add(skb->truesize, &sk->sk_wmem_alloc); - skb_set_dst_pending_confirm(skb, sk->sk_dst_pending_confirm); + skb_set_dst_pending_confirm(skb, READ_ONCE(sk->sk_dst_pending_confirm)); /* Build TCP header and checksum it. */ th = (struct tcphdr *)skb->data; From 8175a9f662f7a2742a2c37735e76a39c036df00b Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Sat, 30 Sep 2023 07:54:47 +0300 Subject: [PATCH 0031/1397] wifi: ath12k: mhi: fix potential memory leak in ath12k_mhi_register() [ Upstream commit 47c27aa7ded4b8ead19b3487cc42a6185b762903 ] mhi_alloc_controller() allocates a memory space for mhi_ctrl. When some errors occur, mhi_ctrl should be freed by mhi_free_controller() and set ab_pci->mhi_ctrl = NULL. We can fix it by calling mhi_free_controller() when the failure happens and set ab_pci->mhi_ctrl = NULL in all of the places where we call mhi_free_controller(). Signed-off-by: Ma Ke Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230922021036.3604157-1-make_ruc2021@163.com Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath12k/mhi.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mhi.c b/drivers/net/wireless/ath/ath12k/mhi.c index 42f1140baa4f..f83d3e09ae36 100644 --- a/drivers/net/wireless/ath/ath12k/mhi.c +++ b/drivers/net/wireless/ath/ath12k/mhi.c @@ -370,8 +370,7 @@ int ath12k_mhi_register(struct ath12k_pci *ab_pci) ret = ath12k_mhi_get_msi(ab_pci); if (ret) { ath12k_err(ab, "failed to get msi for mhi\n"); - mhi_free_controller(mhi_ctrl); - return ret; + goto free_controller; } mhi_ctrl->iova_start = 0; @@ -388,11 +387,15 @@ int ath12k_mhi_register(struct ath12k_pci *ab_pci) ret = mhi_register_controller(mhi_ctrl, ab->hw_params->mhi_config); if (ret) { ath12k_err(ab, "failed to register to mhi bus, err = %d\n", ret); - mhi_free_controller(mhi_ctrl); - return ret; + goto free_controller; } return 0; + +free_controller: + mhi_free_controller(mhi_ctrl); + ab_pci->mhi_ctrl = NULL; + return ret; } void ath12k_mhi_unregister(struct ath12k_pci *ab_pci) From a48cb9c6adfa8c1f309755a54b458c6cf4390c29 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Sat, 30 Sep 2023 07:54:48 +0300 Subject: [PATCH 0032/1397] wifi: ath10k: Don't touch the CE interrupt registers after power up [ Upstream commit 170c75d43a77dc937c58f07ecf847ba1b42ab74e ] As talked about in commit d66d24ac300c ("ath10k: Keep track of which interrupts fired, don't poll them"), if we access the copy engine register at a bad time then ath10k can go boom. However, it's not necessarily easy to know when it's safe to access them. The ChromeOS test labs saw a crash that looked like this at shutdown/reboot time (on a chromeos-5.15 kernel, but likely the problem could also reproduce upstream): Internal error: synchronous external abort: 96000010 [#1] PREEMPT SMP ... CPU: 4 PID: 6168 Comm: reboot Not tainted 5.15.111-lockdep-19350-g1d624fe6758f #1 010b9b233ab055c27c6dc88efb0be2f4e9e86f51 Hardware name: Google Kingoftown (DT) ... pc : ath10k_snoc_read32+0x50/0x74 [ath10k_snoc] lr : ath10k_snoc_read32+0x24/0x74 [ath10k_snoc] ... Call trace: ath10k_snoc_read32+0x50/0x74 [ath10k_snoc ...] ath10k_ce_disable_interrupt+0x190/0x65c [ath10k_core ...] ath10k_ce_disable_interrupts+0x8c/0x120 [ath10k_core ...] ath10k_snoc_hif_stop+0x78/0x660 [ath10k_snoc ...] ath10k_core_stop+0x13c/0x1ec [ath10k_core ...] ath10k_halt+0x398/0x5b0 [ath10k_core ...] ath10k_stop+0xfc/0x1a8 [ath10k_core ...] drv_stop+0x148/0x6b4 [mac80211 ...] ieee80211_stop_device+0x70/0x80 [mac80211 ...] ieee80211_do_stop+0x10d8/0x15b0 [mac80211 ...] ieee80211_stop+0x144/0x1a0 [mac80211 ...] __dev_close_many+0x1e8/0x2c0 dev_close_many+0x198/0x33c dev_close+0x140/0x210 cfg80211_shutdown_all_interfaces+0xc8/0x1e0 [cfg80211 ...] ieee80211_remove_interfaces+0x118/0x5c4 [mac80211 ...] ieee80211_unregister_hw+0x64/0x1f4 [mac80211 ...] ath10k_mac_unregister+0x4c/0xf0 [ath10k_core ...] ath10k_core_unregister+0x80/0xb0 [ath10k_core ...] ath10k_snoc_free_resources+0xb8/0x1ec [ath10k_snoc ...] ath10k_snoc_shutdown+0x98/0xd0 [ath10k_snoc ...] platform_shutdown+0x7c/0xa0 device_shutdown+0x3e0/0x58c kernel_restart_prepare+0x68/0xa0 kernel_restart+0x28/0x7c Though there's no known way to reproduce the problem, it makes sense that it would be the same issue where we're trying to access copy engine registers when it's not allowed. Let's fix this by changing how we "disable" the interrupts. Instead of tweaking the copy engine registers we'll just use disable_irq() and enable_irq(). Then we'll configure the interrupts once at power up time. Tested-on: WCN3990 hw1.0 SNOC WLAN.HL.3.2.2.c10-00754-QCAHLSWMTPL-1 Signed-off-by: Douglas Anderson Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230630151842.1.If764ede23c4e09a43a842771c2ddf99608f25f8e@changeid Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath10k/snoc.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/snoc.c b/drivers/net/wireless/ath/ath10k/snoc.c index 26214c00cd0d..2c39bad7ebfb 100644 --- a/drivers/net/wireless/ath/ath10k/snoc.c +++ b/drivers/net/wireless/ath/ath10k/snoc.c @@ -828,12 +828,20 @@ static void ath10k_snoc_hif_get_default_pipe(struct ath10k *ar, static inline void ath10k_snoc_irq_disable(struct ath10k *ar) { - ath10k_ce_disable_interrupts(ar); + struct ath10k_snoc *ar_snoc = ath10k_snoc_priv(ar); + int id; + + for (id = 0; id < CE_COUNT_MAX; id++) + disable_irq(ar_snoc->ce_irqs[id].irq_line); } static inline void ath10k_snoc_irq_enable(struct ath10k *ar) { - ath10k_ce_enable_interrupts(ar); + struct ath10k_snoc *ar_snoc = ath10k_snoc_priv(ar); + int id; + + for (id = 0; id < CE_COUNT_MAX; id++) + enable_irq(ar_snoc->ce_irqs[id].irq_line); } static void ath10k_snoc_rx_pipe_cleanup(struct ath10k_snoc_pipe *snoc_pipe) @@ -1090,6 +1098,8 @@ static int ath10k_snoc_hif_power_up(struct ath10k *ar, goto err_free_rri; } + ath10k_ce_enable_interrupts(ar); + return 0; err_free_rri: @@ -1253,8 +1263,8 @@ static int ath10k_snoc_request_irq(struct ath10k *ar) for (id = 0; id < CE_COUNT_MAX; id++) { ret = request_irq(ar_snoc->ce_irqs[id].irq_line, - ath10k_snoc_per_engine_handler, 0, - ce_name[id], ar); + ath10k_snoc_per_engine_handler, + IRQF_NO_AUTOEN, ce_name[id], ar); if (ret) { ath10k_err(ar, "failed to register IRQ handler for CE %d: %d\n", From 0570c9fd1609da5d981a181892fa74bbd1ca860a Mon Sep 17 00:00:00 2001 From: Raju Lakkaraju Date: Mon, 25 Sep 2023 13:30:59 +0530 Subject: [PATCH 0033/1397] net: sfp: add quirk for FS's 2.5G copper SFP [ Upstream commit e27aca3760c08b7b05aea71068bd609aa93e7b35 ] Add a quirk for a copper SFP that identifies itself as "FS" "SFP-2.5G-T". This module's PHY is inaccessible, and can only run at 2500base-X with the host without negotiation. Add a quirk to enable the 2500base-X interface mode with 2500base-T support and disable auto negotiation. Signed-off-by: Raju Lakkaraju Link: https://lore.kernel.org/r/20230925080059.266240-1-Raju.Lakkaraju@microchip.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/phy/sfp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index a50038a45250..3679a43f4eb0 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -468,6 +468,9 @@ static const struct sfp_quirk sfp_quirks[] = { SFP_QUIRK("HUAWEI", "MA5671A", sfp_quirk_2500basex, sfp_fixup_ignore_tx_fault), + // FS 2.5G Base-T + SFP_QUIRK_M("FS", "SFP-2.5G-T", sfp_quirk_oem_2_5g), + // Lantech 8330-262D-E can operate at 2500base-X, but incorrectly report // 2500MBd NRZ in their EEPROM SFP_QUIRK_M("Lantech", "8330-262D-E", sfp_quirk_2500basex), From d55a40a6fbeda6e0b96acce4729a88a98cb74106 Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Tue, 10 Oct 2023 22:15:14 +0300 Subject: [PATCH 0034/1397] vsock: read from socket's error queue [ Upstream commit 49dbe25adac42d3e06f65d1420946bec65896222 ] This adds handling of MSG_ERRQUEUE input flag in receive call. This flag is used to read socket's error queue instead of data queue. Possible scenario of error queue usage is receiving completions for transmission with MSG_ZEROCOPY flag. This patch also adds new defines: 'SOL_VSOCK' and 'VSOCK_RECVERR'. Signed-off-by: Arseniy Krasnov Reviewed-by: Stefano Garzarella Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/linux/socket.h | 1 + include/uapi/linux/vm_sockets.h | 17 +++++++++++++++++ net/vmw_vsock/af_vsock.c | 6 ++++++ 3 files changed, 24 insertions(+) diff --git a/include/linux/socket.h b/include/linux/socket.h index 39b74d83c7c4..cfcb7e2c3813 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -383,6 +383,7 @@ struct ucred { #define SOL_MPTCP 284 #define SOL_MCTP 285 #define SOL_SMC 286 +#define SOL_VSOCK 287 /* IPX options */ #define IPX_TYPE 1 diff --git a/include/uapi/linux/vm_sockets.h b/include/uapi/linux/vm_sockets.h index c60ca33eac59..ed07181d4eff 100644 --- a/include/uapi/linux/vm_sockets.h +++ b/include/uapi/linux/vm_sockets.h @@ -191,4 +191,21 @@ struct sockaddr_vm { #define IOCTL_VM_SOCKETS_GET_LOCAL_CID _IO(7, 0xb9) +/* MSG_ZEROCOPY notifications are encoded in the standard error format, + * sock_extended_err. See Documentation/networking/msg_zerocopy.rst in + * kernel source tree for more details. + */ + +/* 'cmsg_level' field value of 'struct cmsghdr' for notification parsing + * when MSG_ZEROCOPY flag is used on transmissions. + */ + +#define SOL_VSOCK 287 + +/* 'cmsg_type' field value of 'struct cmsghdr' for notification parsing + * when MSG_ZEROCOPY flag is used on transmissions. + */ + +#define VSOCK_RECVERR 1 + #endif /* _UAPI_VM_SOCKETS_H */ diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 020cf17ab7e4..ccd8cefeea7b 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -89,6 +89,7 @@ #include #include #include +#include #include #include #include @@ -110,6 +111,7 @@ #include #include #include +#include static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr); static void vsock_sk_destruct(struct sock *sk); @@ -2134,6 +2136,10 @@ vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int err; sk = sock->sk; + + if (unlikely(flags & MSG_ERRQUEUE)) + return sock_recv_errqueue(sk, msg, len, SOL_VSOCK, VSOCK_RECVERR); + vsk = vsock_sk(sk); err = 0; From aa4dd55adeb6db7939aa2ad218c39a91a8913737 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 11 Oct 2023 15:37:28 -0700 Subject: [PATCH 0035/1397] bpf: Ensure proper register state printing for cond jumps [ Upstream commit 1a8a315f008a58f54fecb012b928aa6a494435b3 ] Verifier emits relevant register state involved in any given instruction next to it after `;` to the right, if possible. Or, worst case, on the separate line repeating instruction index. E.g., a nice and simple case would be: 2: (d5) if r0 s<= 0x0 goto pc+1 ; R0_w=0 But if there is some intervening extra output (e.g., precision backtracking log) involved, we are supposed to see the state after the precision backtrack log: 4: (75) if r0 s>= 0x0 goto pc+1 mark_precise: frame0: last_idx 4 first_idx 0 subseq_idx -1 mark_precise: frame0: regs=r0 stack= before 2: (d5) if r0 s<= 0x0 goto pc+1 mark_precise: frame0: regs=r0 stack= before 1: (b7) r0 = 0 6: R0_w=0 First off, note that in `6: R0_w=0` instruction index corresponds to the next instruction, not to the conditional jump instruction itself, which is wrong and we'll get to that. But besides that, the above is a happy case that does work today. Yet, if it so happens that precision backtracking had to traverse some of the parent states, this `6: R0_w=0` state output would be missing. This is due to a quirk of print_verifier_state() routine, which performs mark_verifier_state_clean(env) at the end. This marks all registers as "non-scratched", which means that subsequent logic to print *relevant* registers (that is, "scratched ones") fails and doesn't see anything relevant to print and skips the output altogether. print_verifier_state() is used both to print instruction context, but also to print an **entire** verifier state indiscriminately, e.g., during precision backtracking (and in a few other situations, like during entering or exiting subprogram). Which means if we have to print entire parent state before getting to printing instruction context state, instruction context is marked as clean and is omitted. Long story short, this is definitely not intentional. So we fix this behavior in this patch by teaching print_verifier_state() to clear scratch state only if it was used to print instruction state, not the parent/callback state. This is determined by print_all option, so if it's not set, we don't clear scratch state. This fixes missing instruction state for these cases. As for the mismatched instruction index, we fix that by making sure we call print_insn_state() early inside check_cond_jmp_op() before we adjusted insn_idx based on jump branch taken logic. And with that we get desired correct information: 9: (16) if w4 == 0x1 goto pc+9 mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1 mark_precise: frame0: parent state regs=r4 stack=: R2_w=1944 R4_rw=P1 R10=fp0 mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9 mark_precise: frame0: regs=r4 stack= before 8: (66) if w4 s> 0x3 goto pc+5 mark_precise: frame0: regs=r4 stack= before 7: (b7) r4 = 1 9: R4=1 Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Eduard Zingerman Link: https://lore.kernel.org/bpf/20231011223728.3188086-6-andrii@kernel.org Signed-off-by: Sasha Levin --- kernel/bpf/verifier.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 82c9e5c47031..6542685d6772 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1515,7 +1515,8 @@ static void print_verifier_state(struct bpf_verifier_env *env, if (state->in_async_callback_fn) verbose(env, " async_cb"); verbose(env, "\n"); - mark_verifier_state_clean(env); + if (!print_all) + mark_verifier_state_clean(env); } static inline u32 vlog_alignment(u32 pos) @@ -14139,6 +14140,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, !sanitize_speculative_path(env, insn, *insn_idx + 1, *insn_idx)) return -EFAULT; + if (env->log.level & BPF_LOG_LEVEL) + print_insn_state(env, this_branch->frame[this_branch->curframe]); *insn_idx += insn->off; return 0; } else if (pred == 0) { @@ -14151,6 +14154,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, *insn_idx + insn->off + 1, *insn_idx)) return -EFAULT; + if (env->log.level & BPF_LOG_LEVEL) + print_insn_state(env, this_branch->frame[this_branch->curframe]); return 0; } From d09d246f6abdb151d19f8ff73121f9b13c518770 Mon Sep 17 00:00:00 2001 From: Gregory Greenman Date: Tue, 17 Oct 2023 12:16:44 +0300 Subject: [PATCH 0036/1397] wifi: iwlwifi: mvm: fix size check for fw_link_id [ Upstream commit e25bd1853cc8308158d97e5b3696ea3689fa0840 ] Check that fw_link_id does not exceed the size of link_id_to_link_conf array. There's no any codepath that can cause that, but it's still safer to verify in case fw_link_id gets corrupted. Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20231017115047.3385bd11f423.I2d30fdb464f951c648217553c47901857a0046c7@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/mvm/link.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/link.c b/drivers/net/wireless/intel/iwlwifi/mvm/link.c index 6e1ad65527d1..4ab55a1fcbf0 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/link.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/link.c @@ -60,7 +60,7 @@ int iwl_mvm_add_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif, if (link_info->fw_link_id == IWL_MVM_FW_LINK_ID_INVALID) { link_info->fw_link_id = iwl_mvm_get_free_fw_link_id(mvm, mvmvif); - if (link_info->fw_link_id == IWL_MVM_FW_LINK_ID_INVALID) + if (link_info->fw_link_id >= ARRAY_SIZE(mvm->link_id_to_link_conf)) return -EINVAL; rcu_assign_pointer(mvm->link_id_to_link_conf[link_info->fw_link_id], @@ -243,7 +243,7 @@ int iwl_mvm_remove_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif, int ret; if (WARN_ON(!link_info || - link_info->fw_link_id == IWL_MVM_FW_LINK_ID_INVALID)) + link_info->fw_link_id >= ARRAY_SIZE(mvm->link_id_to_link_conf))) return -EINVAL; RCU_INIT_POINTER(mvm->link_id_to_link_conf[link_info->fw_link_id], From 13b1ebad4c175e6a9b0748acbf133c21a15d282a Mon Sep 17 00:00:00 2001 From: youwan Wang Date: Wed, 11 Oct 2023 13:14:47 +0800 Subject: [PATCH 0037/1397] Bluetooth: btusb: Add date->evt_skb is NULL check [ Upstream commit 624820f7c8826dd010e8b1963303c145f99816e9 ] fix crash because of null pointers [ 6104.969662] BUG: kernel NULL pointer dereference, address: 00000000000000c8 [ 6104.969667] #PF: supervisor read access in kernel mode [ 6104.969668] #PF: error_code(0x0000) - not-present page [ 6104.969670] PGD 0 P4D 0 [ 6104.969673] Oops: 0000 [#1] SMP NOPTI [ 6104.969684] RIP: 0010:btusb_mtk_hci_wmt_sync+0x144/0x220 [btusb] [ 6104.969688] RSP: 0018:ffffb8d681533d48 EFLAGS: 00010246 [ 6104.969689] RAX: 0000000000000000 RBX: ffff8ad560bb2000 RCX: 0000000000000006 [ 6104.969691] RDX: 0000000000000000 RSI: ffffb8d681533d08 RDI: 0000000000000000 [ 6104.969692] RBP: ffffb8d681533d70 R08: 0000000000000001 R09: 0000000000000001 [ 6104.969694] R10: 0000000000000001 R11: 00000000fa83b2da R12: ffff8ad461d1d7c0 [ 6104.969695] R13: 0000000000000000 R14: ffff8ad459618c18 R15: ffffb8d681533d90 [ 6104.969697] FS: 00007f5a1cab9d40(0000) GS:ffff8ad578200000(0000) knlGS:00000 [ 6104.969699] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 6104.969700] CR2: 00000000000000c8 CR3: 000000018620c001 CR4: 0000000000760ef0 [ 6104.969701] PKRU: 55555554 [ 6104.969702] Call Trace: [ 6104.969708] btusb_mtk_shutdown+0x44/0x80 [btusb] [ 6104.969732] hci_dev_do_close+0x470/0x5c0 [bluetooth] [ 6104.969748] hci_rfkill_set_block+0x56/0xa0 [bluetooth] [ 6104.969753] rfkill_set_block+0x92/0x160 [ 6104.969755] rfkill_fop_write+0x136/0x1e0 [ 6104.969759] __vfs_write+0x18/0x40 [ 6104.969761] vfs_write+0xdf/0x1c0 [ 6104.969763] ksys_write+0xb1/0xe0 [ 6104.969765] __x64_sys_write+0x1a/0x20 [ 6104.969769] do_syscall_64+0x51/0x180 [ 6104.969771] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 6104.969773] RIP: 0033:0x7f5a21f18fef [ 6104.9] RSP: 002b:00007ffeefe39010 EFLAGS: 00000293 ORIG_RAX: 0000000000000001 [ 6104.969780] RAX: ffffffffffffffda RBX: 000055c10a7560a0 RCX: 00007f5a21f18fef [ 6104.969781] RDX: 0000000000000008 RSI: 00007ffeefe39060 RDI: 0000000000000012 [ 6104.969782] RBP: 00007ffeefe39060 R08: 0000000000000000 R09: 0000000000000017 [ 6104.969784] R10: 00007ffeefe38d97 R11: 0000000000000293 R12: 0000000000000002 [ 6104.969785] R13: 00007ffeefe39220 R14: 00007ffeefe391a0 R15: 000055c10a72acf0 Signed-off-by: youwan Wang Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- drivers/bluetooth/btusb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 499f4809fcdf..63ebd5677d26 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -2818,6 +2818,9 @@ static int btusb_mtk_hci_wmt_sync(struct hci_dev *hdev, goto err_free_wc; } + if (data->evt_skb == NULL) + goto err_free_wc; + /* Parse and handle the return WMT event */ wmt_evt = (struct btmtk_hci_wmt_evt *)data->evt_skb->data; if (wmt_evt->whdr.op != hdr->op) { From 56a4fdde95ed98d864611155f6728983e199e198 Mon Sep 17 00:00:00 2001 From: ZhengHan Wang Date: Wed, 18 Oct 2023 12:30:55 +0200 Subject: [PATCH 0038/1397] Bluetooth: Fix double free in hci_conn_cleanup [ Upstream commit a85fb91e3d728bdfc80833167e8162cce8bc7004 ] syzbot reports a slab use-after-free in hci_conn_hash_flush [1]. After releasing an object using hci_conn_del_sysfs in the hci_conn_cleanup function, releasing the same object again using the hci_dev_put and hci_conn_put functions causes a double free. Here's a simplified flow: hci_conn_del_sysfs: hci_dev_put put_device kobject_put kref_put kobject_release kobject_cleanup kfree_const kfree(name) hci_dev_put: ... kfree(name) hci_conn_put: put_device ... kfree(name) This patch drop the hci_dev_put and hci_conn_put function call in hci_conn_cleanup function, because the object is freed in hci_conn_del_sysfs function. This patch also fixes the refcounting in hci_conn_add_sysfs() and hci_conn_del_sysfs() to take into account device_add() failures. This fixes CVE-2023-28464. Link: https://syzkaller.appspot.com/bug?id=1bb51491ca5df96a5f724899d1dbb87afda61419 [1] Signed-off-by: ZhengHan Wang Co-developed-by: Luiz Augusto von Dentz Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/hci_conn.c | 6 ++---- net/bluetooth/hci_sysfs.c | 23 ++++++++++++----------- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 7450b550cff6..f3139c4c20fc 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -172,13 +172,11 @@ static void hci_conn_cleanup(struct hci_conn *conn) hdev->notify(hdev, HCI_NOTIFY_CONN_DEL); } - hci_conn_del_sysfs(conn); - debugfs_remove_recursive(conn->debugfs); - hci_dev_put(hdev); + hci_conn_del_sysfs(conn); - hci_conn_put(conn); + hci_dev_put(hdev); } static void hci_acl_create_connection(struct hci_conn *conn) diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 15b33579007c..367e32fe30eb 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -35,7 +35,7 @@ void hci_conn_init_sysfs(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; - BT_DBG("conn %p", conn); + bt_dev_dbg(hdev, "conn %p", conn); conn->dev.type = &bt_link; conn->dev.class = &bt_class; @@ -48,27 +48,30 @@ void hci_conn_add_sysfs(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; - BT_DBG("conn %p", conn); + bt_dev_dbg(hdev, "conn %p", conn); if (device_is_registered(&conn->dev)) return; dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle); - if (device_add(&conn->dev) < 0) { + if (device_add(&conn->dev) < 0) bt_dev_err(hdev, "failed to register connection device"); - return; - } - - hci_dev_hold(hdev); } void hci_conn_del_sysfs(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; - if (!device_is_registered(&conn->dev)) + bt_dev_dbg(hdev, "conn %p", conn); + + if (!device_is_registered(&conn->dev)) { + /* If device_add() has *not* succeeded, use *only* put_device() + * to drop the reference count. + */ + put_device(&conn->dev); return; + } while (1) { struct device *dev; @@ -80,9 +83,7 @@ void hci_conn_del_sysfs(struct hci_conn *conn) put_device(dev); } - device_del(&conn->dev); - - hci_dev_put(hdev); + device_unregister(&conn->dev); } static void bt_host_release(struct device *dev) From 2deacc5c6bc528b68bf84524258066d9f8cddd0d Mon Sep 17 00:00:00 2001 From: Jonathan Denose Date: Tue, 24 Oct 2023 09:13:36 -0500 Subject: [PATCH 0039/1397] ACPI: EC: Add quirk for HP 250 G7 Notebook PC [ Upstream commit 891ddc03e2f4395e24795596e032f57d5ab37fe7 ] Add GPE quirk entry for HP 250 G7 Notebook PC. This change allows the lid switch to be identified as the lid switch and not a keyboard button. With the lid switch properly identified, the device triggers suspend correctly on lid close. Signed-off-by: Jonathan Denose Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/ec.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index c95d0edb0be9..a59c11df7375 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1924,6 +1924,16 @@ static const struct dmi_system_id ec_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion Gaming Laptop 15-dk1xxx"), }, }, + { + /* + * HP 250 G7 Notebook PC + */ + .callback = ec_honor_dsdt_gpe, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP 250 G7 Notebook PC"), + }, + }, { /* * Samsung hardware From 6bb50965e3fc52d54d25d3ad7081530d584c30da Mon Sep 17 00:00:00 2001 From: Gerhard Engleder Date: Mon, 23 Oct 2023 20:38:56 +0200 Subject: [PATCH 0040/1397] tsnep: Fix tsnep_request_irq() format-overflow warning [ Upstream commit 00e984cb986b31e9313745e51daceaa1e1eb7351 ] Compiler warns about a possible format-overflow in tsnep_request_irq(): drivers/net/ethernet/engleder/tsnep_main.c:884:55: warning: 'sprintf' may write a terminating nul past the end of the destination [-Wformat-overflow=] sprintf(queue->name, "%s-rx-%d", name, ^ drivers/net/ethernet/engleder/tsnep_main.c:881:55: warning: 'sprintf' may write a terminating nul past the end of the destination [-Wformat-overflow=] sprintf(queue->name, "%s-tx-%d", name, ^ drivers/net/ethernet/engleder/tsnep_main.c:878:49: warning: '-txrx-' directive writing 6 bytes into a region of size between 5 and 25 [-Wformat-overflow=] sprintf(queue->name, "%s-txrx-%d", name, ^~~~~~ Actually overflow cannot happen. Name is limited to IFNAMSIZ, because netdev_name() is called during ndo_open(). queue_index is single char, because less than 10 queues are supported. Fix warning with snprintf(). Additionally increase buffer to 32 bytes, because those 7 additional bytes were unused anyway. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202310182028.vmDthIUa-lkp@intel.com/ Signed-off-by: Gerhard Engleder Reviewed-by: Jacob Keller Link: https://lore.kernel.org/r/20231023183856.58373-1-gerhard@engleder-embedded.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/engleder/tsnep.h | 2 +- drivers/net/ethernet/engleder/tsnep_main.c | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/engleder/tsnep.h b/drivers/net/ethernet/engleder/tsnep.h index 6e14c918e3fb..f188fba021a6 100644 --- a/drivers/net/ethernet/engleder/tsnep.h +++ b/drivers/net/ethernet/engleder/tsnep.h @@ -143,7 +143,7 @@ struct tsnep_rx { struct tsnep_queue { struct tsnep_adapter *adapter; - char name[IFNAMSIZ + 9]; + char name[IFNAMSIZ + 16]; struct tsnep_tx *tx; struct tsnep_rx *rx; diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c index 8b992dc9bb52..38da2d6c250e 100644 --- a/drivers/net/ethernet/engleder/tsnep_main.c +++ b/drivers/net/ethernet/engleder/tsnep_main.c @@ -1779,14 +1779,14 @@ static int tsnep_request_irq(struct tsnep_queue *queue, bool first) dev = queue->adapter; } else { if (queue->tx && queue->rx) - sprintf(queue->name, "%s-txrx-%d", name, - queue->rx->queue_index); + snprintf(queue->name, sizeof(queue->name), "%s-txrx-%d", + name, queue->rx->queue_index); else if (queue->tx) - sprintf(queue->name, "%s-tx-%d", name, - queue->tx->queue_index); + snprintf(queue->name, sizeof(queue->name), "%s-tx-%d", + name, queue->tx->queue_index); else - sprintf(queue->name, "%s-rx-%d", name, - queue->rx->queue_index); + snprintf(queue->name, sizeof(queue->name), "%s-rx-%d", + name, queue->rx->queue_index); handler = tsnep_irq_txrx; dev = queue; } From 30d8ff067f87775aefce1ff7c037d186587822d7 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 9 Sep 2023 16:18:10 +0200 Subject: [PATCH 0041/1397] gpiolib: acpi: Add a ignore interrupt quirk for Peaq C1010 [ Upstream commit 6cc64f6173751d212c9833bde39e856b4f585a3e ] On the Peaq C1010 2-in-1 INT33FC:00 pin 3 is connected to a "dolby" button. At the ACPI level an _AEI event-handler is connected which sets an ACPI variable to 1 on both edges. This variable can be polled + cleared to 0 using WMI. Since the variable is set on both edges the WMI interface is pretty useless even when polling. So instead of writing a custom WMI driver for this the x86-android-tablets code instantiates a gpio-keys platform device for the "dolby" button. Add an ignore_interrupt quirk for INT33FC:00 pin 3 on the Peaq C1010, so that it is not seen as busy when the gpio-keys driver requests it. Note this replaces a hack in x86-android-tablets where it would call acpi_gpiochip_free_interrupts() on the INT33FC:00 GPIO controller. acpi_gpiochip_free_interrupts() is considered private (internal) gpiolib API so x86-android-tablets should stop using it. Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Reviewed-by: Mika Westerberg Acked-by: Linus Walleij Acked-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20230909141816.58358-3-hdegoede@redhat.com Signed-off-by: Sasha Levin --- drivers/gpio/gpiolib-acpi.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index 51e41676de0b..5d04720107ef 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -1655,6 +1655,26 @@ static const struct dmi_system_id gpiolib_acpi_quirks[] __initconst = { .ignore_wake = "SYNA1202:00@16", }, }, + { + /* + * On the Peaq C1010 2-in-1 INT33FC:00 pin 3 is connected to + * a "dolby" button. At the ACPI level an _AEI event-handler + * is connected which sets an ACPI variable to 1 on both + * edges. This variable can be polled + cleared to 0 using + * WMI. But since the variable is set on both edges the WMI + * interface is pretty useless even when polling. + * So instead the x86-android-tablets code instantiates + * a gpio-keys platform device for it. + * Ignore the _AEI handler for the pin, so that it is not busy. + */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "PEAQ"), + DMI_MATCH(DMI_PRODUCT_NAME, "PEAQ PMM C1010 MD99187"), + }, + .driver_data = &(struct acpi_gpiolib_dmi_quirk) { + .ignore_interrupt = "INT33FC:00@3", + }, + }, {} /* Terminating entry */ }; From 20bcab576168792c4127105b8f54d2e9484448e7 Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Tue, 3 Oct 2023 08:05:04 +0000 Subject: [PATCH 0042/1397] platform/chrome: kunit: initialize lock for fake ec_dev [ Upstream commit e410b4ade83d06a046f6e32b5085997502ba0559 ] cros_ec_cmd_xfer() uses ec_dev->lock. Initialize it. Otherwise, dmesg shows the following: > DEBUG_LOCKS_WARN_ON(lock->magic != lock) > ... > Call Trace: > ? __mutex_lock > ? __warn > ? __mutex_lock > ... > ? cros_ec_cmd_xfer Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20231003080504.4011337-1-tzungbi@kernel.org Signed-off-by: Tzung-Bi Shih Signed-off-by: Sasha Levin --- drivers/platform/chrome/cros_ec_proto_test.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/chrome/cros_ec_proto_test.c b/drivers/platform/chrome/cros_ec_proto_test.c index 5b9748e0463b..63e38671e95a 100644 --- a/drivers/platform/chrome/cros_ec_proto_test.c +++ b/drivers/platform/chrome/cros_ec_proto_test.c @@ -2668,6 +2668,7 @@ static int cros_ec_proto_test_init(struct kunit *test) ec_dev->dev->release = cros_ec_proto_test_release; ec_dev->cmd_xfer = cros_kunit_ec_xfer_mock; ec_dev->pkt_xfer = cros_kunit_ec_xfer_mock; + mutex_init(&ec_dev->lock); priv->msg = (struct cros_ec_command *)priv->_msg; From 76e2b215ea0941ec79a8bf9abb74b847b8fe0c14 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Tue, 17 Oct 2023 13:02:16 +0200 Subject: [PATCH 0043/1397] of: address: Fix address translation when address-size is greater than 2 [ Upstream commit 42604f8eb7ba04b589375049cc76282dad4677d2 ] With the recent addition of of_pci_prop_ranges() in commit 407d1a51921e ("PCI: Create device tree node for bridge"), the ranges property can have a 3 cells child address, a 3 cells parent address and a 2 cells child size. A range item property for a PCI device is filled as follow: 0 0 <-- Child --> <-- Parent (PCI definition) --> <- BAR size (64bit) --> This allow to translate BAR addresses from the DT. For instance: pci@0,0 { #address-cells = <0x03>; #size-cells = <0x02>; device_type = "pci"; compatible = "pci11ab,100", "pciclass,060400", "pciclass,0604"; ranges = <0x82000000 0x00 0xe8000000 0x82000000 0x00 0xe8000000 0x00 0x4400000>; ... dev@0,0 { #address-cells = <0x03>; #size-cells = <0x02>; compatible = "pci1055,9660", "pciclass,020000", "pciclass,0200"; /* Translations for BAR0 to BAR5 */ ranges = <0x00 0x00 0x00 0x82010000 0x00 0xe8000000 0x00 0x2000000 0x01 0x00 0x00 0x82010000 0x00 0xea000000 0x00 0x1000000 0x02 0x00 0x00 0x82010000 0x00 0xeb000000 0x00 0x800000 0x03 0x00 0x00 0x82010000 0x00 0xeb800000 0x00 0x800000 0x04 0x00 0x00 0x82010000 0x00 0xec000000 0x00 0x20000 0x05 0x00 0x00 0x82010000 0x00 0xec020000 0x00 0x2000>; ... pci-ep-bus@0 { #address-cells = <0x01>; #size-cells = <0x01>; compatible = "simple-bus"; /* Translate 0xe2000000 to BAR0 and 0xe0000000 to BAR1 */ ranges = <0xe2000000 0x00 0x00 0x00 0x2000000 0xe0000000 0x01 0x00 0x00 0x1000000>; ... }; }; }; During the translation process, the "default-flags" map() function is used to select the matching item in the ranges table and determine the address offset from this matching item. This map() function simply calls of_read_number() and when address-size is greater than 2, the map() function skips the extra high address part (ie part over 64bit). This lead to a wrong matching item and a wrong offset computation. Also during the translation itself, the extra high part related to the parent address is not present in the translated address. Fix the "default-flags" map() and translate() in order to take into account the child extra high address part in map() and the parent extra high address part in translate() and so having a correct address translation for ranges patterns such as the one given in the example above. Signed-off-by: Herve Codina Link: https://lore.kernel.org/r/20231017110221.189299-2-herve.codina@bootlin.com Signed-off-by: Rob Herring Signed-off-by: Sasha Levin --- drivers/of/address.c | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/drivers/of/address.c b/drivers/of/address.c index e692809ff822..3219c5177750 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -100,6 +100,32 @@ static unsigned int of_bus_default_get_flags(const __be32 *addr) return IORESOURCE_MEM; } +static u64 of_bus_default_flags_map(__be32 *addr, const __be32 *range, int na, + int ns, int pna) +{ + u64 cp, s, da; + + /* Check that flags match */ + if (*addr != *range) + return OF_BAD_ADDR; + + /* Read address values, skipping high cell */ + cp = of_read_number(range + 1, na - 1); + s = of_read_number(range + na + pna, ns); + da = of_read_number(addr + 1, na - 1); + + pr_debug("default flags map, cp=%llx, s=%llx, da=%llx\n", cp, s, da); + + if (da < cp || da >= (cp + s)) + return OF_BAD_ADDR; + return da - cp; +} + +static int of_bus_default_flags_translate(__be32 *addr, u64 offset, int na) +{ + /* Keep "flags" part (high cell) in translated address */ + return of_bus_default_translate(addr + 1, offset, na - 1); +} #ifdef CONFIG_PCI static unsigned int of_bus_pci_get_flags(const __be32 *addr) @@ -374,8 +400,8 @@ static struct of_bus of_busses[] = { .addresses = "reg", .match = of_bus_default_flags_match, .count_cells = of_bus_default_count_cells, - .map = of_bus_default_map, - .translate = of_bus_default_translate, + .map = of_bus_default_flags_map, + .translate = of_bus_default_flags_translate, .has_flags = true, .get_flags = of_bus_default_flags_get_flags, }, From 9d43c83cd8621322442b2da084243f442f21a273 Mon Sep 17 00:00:00 2001 From: Olli Asikainen Date: Tue, 24 Oct 2023 22:09:21 +0300 Subject: [PATCH 0044/1397] platform/x86: thinkpad_acpi: Add battery quirk for Thinkpad X120e MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 916646758aea81a143ce89103910f715ed923346 ] Thinkpad X120e also needs this battery quirk. Signed-off-by: Olli Asikainen Link: https://lore.kernel.org/r/20231024190922.2742-1-olli.asikainen@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen Signed-off-by: Sasha Levin --- drivers/platform/x86/thinkpad_acpi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 41584427dc32..a46fc417cb20 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -9816,6 +9816,7 @@ static const struct tpacpi_quirk battery_quirk_table[] __initconst = { * Individual addressing is broken on models that expose the * primary battery as BAT1. */ + TPACPI_Q_LNV('8', 'F', true), /* Thinkpad X120e */ TPACPI_Q_LNV('J', '7', true), /* B5400 */ TPACPI_Q_LNV('J', 'I', true), /* Thinkpad 11e */ TPACPI_Q_LNV3('R', '0', 'B', true), /* Thinkpad 11e gen 3 */ From bc2808a28139230726806e059bcba1447366b542 Mon Sep 17 00:00:00 2001 From: Sui Jingfeng Date: Fri, 28 Jul 2023 02:58:55 +0800 Subject: [PATCH 0045/1397] drm/gma500: Fix call trace when psb_gem_mm_init() fails [ Upstream commit da596080b2b400c50fe9f8f237bcaf09fed06af8 ] Because the gma_irq_install() is call after psb_gem_mm_init() function, when psb_gem_mm_init() fails, the interrupt line haven't been allocated. Yet the gma_irq_uninstall() is called in the psb_driver_unload() function without checking if checking the irq is registered or not. The calltrace is appended as following: [ 20.539253] ioremap memtype_reserve failed -16 [ 20.543895] gma500 0000:00:02.0: Failure to map stolen base. [ 20.565049] ------------[ cut here ]------------ [ 20.565066] Trying to free already-free IRQ 16 [ 20.565087] WARNING: CPU: 1 PID: 381 at kernel/irq/manage.c:1893 free_irq+0x209/0x370 [ 20.565316] CPU: 1 PID: 381 Comm: systemd-udevd Tainted: G C 6.5.0-rc1+ #368 [ 20.565329] Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./IMB-140D Plus, BIOS P1.10 11/18/2013 [ 20.565338] RIP: 0010:free_irq+0x209/0x370 [ 20.565357] Code: 41 5d 41 5e 41 5f 5d 31 d2 89 d1 89 d6 89 d7 41 89 d1 c3 cc cc cc cc 8b 75 d0 48 c7 c7 e0 77 12 9f 4c 89 4d c8 e8 57 fe f4 ff <0f> 0b 48 8b 75 c8 4c 89 f7 e8 29 f3 f1 00 49 8b 47 40 48 8b 40 78 [ 20.565369] RSP: 0018:ffffae3b40733808 EFLAGS: 00010046 [ 20.565382] RAX: 0000000000000000 RBX: ffff9f8082bfe000 RCX: 0000000000000000 [ 20.565390] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 [ 20.565397] RBP: ffffae3b40733840 R08: 0000000000000000 R09: 0000000000000000 [ 20.565405] R10: 0000000000000000 R11: 0000000000000000 R12: ffff9f80871c3100 [ 20.565413] R13: ffff9f80835d3360 R14: ffff9f80835d32a4 R15: ffff9f80835d3200 [ 20.565424] FS: 00007f13d36458c0(0000) GS:ffff9f8138880000(0000) knlGS:0000000000000000 [ 20.565434] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 20.565441] CR2: 00007f0d046f3f20 CR3: 0000000006c8c000 CR4: 00000000000006e0 [ 20.565450] Call Trace: [ 20.565458] [ 20.565470] ? show_regs+0x72/0x90 [ 20.565488] ? free_irq+0x209/0x370 [ 20.565504] ? __warn+0x8d/0x160 [ 20.565520] ? free_irq+0x209/0x370 [ 20.565536] ? report_bug+0x1bb/0x1d0 [ 20.565555] ? handle_bug+0x46/0x90 [ 20.565572] ? exc_invalid_op+0x19/0x80 [ 20.565587] ? asm_exc_invalid_op+0x1b/0x20 [ 20.565607] ? free_irq+0x209/0x370 [ 20.565625] ? free_irq+0x209/0x370 [ 20.565644] gma_irq_uninstall+0x15b/0x1e0 [gma500_gfx] [ 20.565728] psb_driver_unload+0x27/0x190 [gma500_gfx] [ 20.565800] psb_pci_probe+0x5d2/0x790 [gma500_gfx] [ 20.565873] local_pci_probe+0x48/0xb0 [ 20.565892] pci_device_probe+0xc8/0x280 [ 20.565912] really_probe+0x1d2/0x440 [ 20.565929] __driver_probe_device+0x8a/0x190 [ 20.565944] driver_probe_device+0x23/0xd0 [ 20.565957] __driver_attach+0x10f/0x220 [ 20.565971] ? __pfx___driver_attach+0x10/0x10 [ 20.565984] bus_for_each_dev+0x7a/0xe0 [ 20.566002] driver_attach+0x1e/0x30 [ 20.566014] bus_add_driver+0x127/0x240 [ 20.566029] driver_register+0x64/0x140 [ 20.566043] ? __pfx_psb_init+0x10/0x10 [gma500_gfx] [ 20.566111] __pci_register_driver+0x68/0x80 [ 20.566128] psb_init+0x2c/0xff0 [gma500_gfx] [ 20.566194] do_one_initcall+0x46/0x330 [ 20.566214] ? kmalloc_trace+0x2a/0xb0 [ 20.566233] do_init_module+0x6a/0x270 [ 20.566250] load_module+0x207f/0x23a0 [ 20.566278] init_module_from_file+0x9c/0xf0 [ 20.566293] ? init_module_from_file+0x9c/0xf0 [ 20.566315] idempotent_init_module+0x184/0x240 [ 20.566335] __x64_sys_finit_module+0x64/0xd0 [ 20.566352] do_syscall_64+0x59/0x90 [ 20.566366] ? ksys_mmap_pgoff+0x123/0x270 [ 20.566378] ? __secure_computing+0x9b/0x110 [ 20.566392] ? exit_to_user_mode_prepare+0x39/0x190 [ 20.566406] ? syscall_exit_to_user_mode+0x2a/0x50 [ 20.566420] ? do_syscall_64+0x69/0x90 [ 20.566433] ? do_syscall_64+0x69/0x90 [ 20.566445] ? do_syscall_64+0x69/0x90 [ 20.566458] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 [ 20.566472] RIP: 0033:0x7f13d351ea3d [ 20.566485] Code: 5b 41 5c c3 66 0f 1f 84 00 00 00 00 00 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d c3 a3 0f 00 f7 d8 64 89 01 48 [ 20.566496] RSP: 002b:00007ffe566c1fd8 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 [ 20.566510] RAX: ffffffffffffffda RBX: 000055e66806eec0 RCX: 00007f13d351ea3d [ 20.566519] RDX: 0000000000000000 RSI: 00007f13d36d9441 RDI: 0000000000000010 [ 20.566527] RBP: 0000000000020000 R08: 0000000000000000 R09: 0000000000000002 [ 20.566535] R10: 0000000000000010 R11: 0000000000000246 R12: 00007f13d36d9441 [ 20.566543] R13: 000055e6681108c0 R14: 000055e66805ba70 R15: 000055e66819a9c0 [ 20.566559] [ 20.566566] ---[ end trace 0000000000000000 ]--- Signed-off-by: Sui Jingfeng Signed-off-by: Patrik Jakobsson Link: https://patchwork.freedesktop.org/patch/msgid/20230727185855.713318-1-suijingfeng@loongson.cn Signed-off-by: Sasha Levin --- drivers/gpu/drm/gma500/psb_drv.h | 1 + drivers/gpu/drm/gma500/psb_irq.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/gma500/psb_drv.h b/drivers/gpu/drm/gma500/psb_drv.h index f7f709df99b4..70d9adafa233 100644 --- a/drivers/gpu/drm/gma500/psb_drv.h +++ b/drivers/gpu/drm/gma500/psb_drv.h @@ -424,6 +424,7 @@ struct drm_psb_private { uint32_t pipestat[PSB_NUM_PIPE]; spinlock_t irqmask_lock; + bool irq_enabled; /* Power */ bool pm_initialized; diff --git a/drivers/gpu/drm/gma500/psb_irq.c b/drivers/gpu/drm/gma500/psb_irq.c index 343c51250207..7bbb79b0497d 100644 --- a/drivers/gpu/drm/gma500/psb_irq.c +++ b/drivers/gpu/drm/gma500/psb_irq.c @@ -327,6 +327,8 @@ int gma_irq_install(struct drm_device *dev) gma_irq_postinstall(dev); + dev_priv->irq_enabled = true; + return 0; } @@ -337,6 +339,9 @@ void gma_irq_uninstall(struct drm_device *dev) unsigned long irqflags; unsigned int i; + if (!dev_priv->irq_enabled) + return; + spin_lock_irqsave(&dev_priv->irqmask_lock, irqflags); if (dev_priv->ops->hotplug_enable) From de1ce1081b7881044c46012ca27c1f2d5bb27f2d Mon Sep 17 00:00:00 2001 From: Harish Kasiviswanathan Date: Thu, 10 Aug 2023 12:10:57 -0400 Subject: [PATCH 0046/1397] drm/amdkfd: ratelimited SQ interrupt messages [ Upstream commit 37fb87910724f21a1f27a75743d4f9accdee77fb ] No functional change. Use ratelimited version of pr_ to avoid overflowing of dmesg buffer Signed-off-by: Harish Kasiviswanathan Reviewed-by: Philip Yang Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c | 6 +++--- drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c | 6 +++--- drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c index c7991e07b6be..a7697ec8188e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c @@ -268,7 +268,7 @@ static void event_interrupt_wq_v10(struct kfd_node *dev, SQ_INTERRUPT_WORD_WAVE_CTXID1, ENCODING); switch (encoding) { case SQ_INTERRUPT_WORD_ENCODING_AUTO: - pr_debug( + pr_debug_ratelimited( "sq_intr: auto, se %d, ttrace %d, wlt %d, ttrac_buf0_full %d, ttrac_buf1_full %d, ttrace_utc_err %d\n", REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_AUTO_CTXID1, SE_ID), @@ -284,7 +284,7 @@ static void event_interrupt_wq_v10(struct kfd_node *dev, THREAD_TRACE_UTC_ERROR)); break; case SQ_INTERRUPT_WORD_ENCODING_INST: - pr_debug("sq_intr: inst, se %d, data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n", + pr_debug_ratelimited("sq_intr: inst, se %d, data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n", REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, SE_ID), REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, @@ -310,7 +310,7 @@ static void event_interrupt_wq_v10(struct kfd_node *dev, case SQ_INTERRUPT_WORD_ENCODING_ERROR: sq_intr_err_type = REG_GET_FIELD(context_id0, KFD_CTXID0, ERR_TYPE); - pr_warn("sq_intr: error, se %d, data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d, err_type %d\n", + pr_warn_ratelimited("sq_intr: error, se %d, data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d, err_type %d\n", REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, SE_ID), REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c index f933bd231fb9..2a65792fd116 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c @@ -150,7 +150,7 @@ enum SQ_INTERRUPT_ERROR_TYPE { static void print_sq_intr_info_auto(uint32_t context_id0, uint32_t context_id1) { - pr_debug( + pr_debug_ratelimited( "sq_intr: auto, ttrace %d, wlt %d, ttrace_buf_full %d, reg_tms %d, cmd_tms %d, host_cmd_ovf %d, host_reg_ovf %d, immed_ovf %d, ttrace_utc_err %d\n", REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, THREAD_TRACE), REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, WLT), @@ -165,7 +165,7 @@ static void print_sq_intr_info_auto(uint32_t context_id0, uint32_t context_id1) static void print_sq_intr_info_inst(uint32_t context_id0, uint32_t context_id1) { - pr_debug( + pr_debug_ratelimited( "sq_intr: inst, data 0x%08x, sh %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n", REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, DATA), REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, SH_ID), @@ -177,7 +177,7 @@ static void print_sq_intr_info_inst(uint32_t context_id0, uint32_t context_id1) static void print_sq_intr_info_error(uint32_t context_id0, uint32_t context_id1) { - pr_warn( + pr_warn_ratelimited( "sq_intr: error, detail 0x%08x, type %d, sh %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n", REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, DETAIL), REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, TYPE), diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index 830396b1c3b1..27cdaea40501 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -333,7 +333,7 @@ static void event_interrupt_wq_v9(struct kfd_node *dev, encoding = REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, ENCODING); switch (encoding) { case SQ_INTERRUPT_WORD_ENCODING_AUTO: - pr_debug( + pr_debug_ratelimited( "sq_intr: auto, se %d, ttrace %d, wlt %d, ttrac_buf_full %d, reg_tms %d, cmd_tms %d, host_cmd_ovf %d, host_reg_ovf %d, immed_ovf %d, ttrace_utc_err %d\n", REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, SE_ID), REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, THREAD_TRACE), @@ -347,7 +347,7 @@ static void event_interrupt_wq_v9(struct kfd_node *dev, REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, THREAD_TRACE_UTC_ERROR)); break; case SQ_INTERRUPT_WORD_ENCODING_INST: - pr_debug("sq_intr: inst, se %d, data 0x%x, sh %d, priv %d, wave_id %d, simd_id %d, cu_id %d, intr_data 0x%x\n", + pr_debug_ratelimited("sq_intr: inst, se %d, data 0x%x, sh %d, priv %d, wave_id %d, simd_id %d, cu_id %d, intr_data 0x%x\n", REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SE_ID), REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, DATA), REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SH_ID), @@ -366,7 +366,7 @@ static void event_interrupt_wq_v9(struct kfd_node *dev, break; case SQ_INTERRUPT_WORD_ENCODING_ERROR: sq_intr_err = REG_GET_FIELD(sq_int_data, KFD_SQ_INT_DATA, ERR_TYPE); - pr_warn("sq_intr: error, se %d, data 0x%x, sh %d, priv %d, wave_id %d, simd_id %d, cu_id %d, err_type %d\n", + pr_warn_ratelimited("sq_intr: error, se %d, data 0x%x, sh %d, priv %d, wave_id %d, simd_id %d, cu_id %d, err_type %d\n", REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SE_ID), REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, DATA), REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SH_ID), From aa359db88213c25b7a8743979726e577bcceb3bc Mon Sep 17 00:00:00 2001 From: "baozhu.liu" Date: Fri, 4 Aug 2023 10:05:53 +0800 Subject: [PATCH 0047/1397] drm/komeda: drop all currently held locks if deadlock happens [ Upstream commit 19ecbe8325a2a7ffda5ff4790955b84eaccba49f ] If komeda_pipeline_unbound_components() returns -EDEADLK, it means that a deadlock happened in the locking context. Currently, komeda is not dealing with the deadlock properly,producing the following output when CONFIG_DEBUG_WW_MUTEX_SLOWPATH is enabled: ------------[ cut here ]------------ [ 26.103984] WARNING: CPU: 2 PID: 345 at drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c:1248 komeda_release_unclaimed_resources+0x13c/0x170 [ 26.117453] Modules linked in: [ 26.120511] CPU: 2 PID: 345 Comm: composer@2.1-se Kdump: loaded Tainted: G W 5.10.110-SE-SDK1.8-dirty #16 [ 26.131374] Hardware name: Siengine Se1000 Evaluation board (DT) [ 26.137379] pstate: 20400009 (nzCv daif +PAN -UAO -TCO BTYPE=--) [ 26.143385] pc : komeda_release_unclaimed_resources+0x13c/0x170 [ 26.149301] lr : komeda_release_unclaimed_resources+0xbc/0x170 [ 26.155130] sp : ffff800017b8b8d0 [ 26.158442] pmr_save: 000000e0 [ 26.161493] x29: ffff800017b8b8d0 x28: ffff000cf2f96200 [ 26.166805] x27: ffff000c8f5a8800 x26: 0000000000000000 [ 26.172116] x25: 0000000000000038 x24: ffff8000116a0140 [ 26.177428] x23: 0000000000000038 x22: ffff000cf2f96200 [ 26.182739] x21: ffff000cfc300300 x20: ffff000c8ab77080 [ 26.188051] x19: 0000000000000003 x18: 0000000000000000 [ 26.193362] x17: 0000000000000000 x16: 0000000000000000 [ 26.198672] x15: b400e638f738ba38 x14: 0000000000000000 [ 26.203983] x13: 0000000106400a00 x12: 0000000000000000 [ 26.209294] x11: 0000000000000000 x10: 0000000000000000 [ 26.214604] x9 : ffff800012f80000 x8 : ffff000ca3308000 [ 26.219915] x7 : 0000000ff3000000 x6 : ffff80001084034c [ 26.225226] x5 : ffff800017b8bc40 x4 : 000000000000000f [ 26.230536] x3 : ffff000ca3308000 x2 : 0000000000000000 [ 26.235847] x1 : 0000000000000000 x0 : ffffffffffffffdd [ 26.241158] Call trace: [ 26.243604] komeda_release_unclaimed_resources+0x13c/0x170 [ 26.249175] komeda_crtc_atomic_check+0x68/0xf0 [ 26.253706] drm_atomic_helper_check_planes+0x138/0x1f4 [ 26.258929] komeda_kms_check+0x284/0x36c [ 26.262939] drm_atomic_check_only+0x40c/0x714 [ 26.267381] drm_atomic_nonblocking_commit+0x1c/0x60 [ 26.272344] drm_mode_atomic_ioctl+0xa3c/0xb8c [ 26.276787] drm_ioctl_kernel+0xc4/0x120 [ 26.280708] drm_ioctl+0x268/0x534 [ 26.284109] __arm64_sys_ioctl+0xa8/0xf0 [ 26.288030] el0_svc_common.constprop.0+0x80/0x240 [ 26.292817] do_el0_svc+0x24/0x90 [ 26.296132] el0_svc+0x20/0x30 [ 26.299185] el0_sync_handler+0xe8/0xf0 [ 26.303018] el0_sync+0x1a4/0x1c0 [ 26.306330] irq event stamp: 0 [ 26.309384] hardirqs last enabled at (0): [<0000000000000000>] 0x0 [ 26.315650] hardirqs last disabled at (0): [] copy_process+0x5d0/0x183c [ 26.323825] softirqs last enabled at (0): [] copy_process+0x5d0/0x183c [ 26.331997] softirqs last disabled at (0): [<0000000000000000>] 0x0 [ 26.338261] ---[ end trace 20ae984fa860184a ]--- [ 26.343021] ------------[ cut here ]------------ [ 26.347646] WARNING: CPU: 3 PID: 345 at drivers/gpu/drm/drm_modeset_lock.c:228 drm_modeset_drop_locks+0x84/0x90 [ 26.357727] Modules linked in: [ 26.360783] CPU: 3 PID: 345 Comm: composer@2.1-se Kdump: loaded Tainted: G W 5.10.110-SE-SDK1.8-dirty #16 [ 26.371645] Hardware name: Siengine Se1000 Evaluation board (DT) [ 26.377647] pstate: 20400009 (nzCv daif +PAN -UAO -TCO BTYPE=--) [ 26.383649] pc : drm_modeset_drop_locks+0x84/0x90 [ 26.388351] lr : drm_mode_atomic_ioctl+0x860/0xb8c [ 26.393137] sp : ffff800017b8bb10 [ 26.396447] pmr_save: 000000e0 [ 26.399497] x29: ffff800017b8bb10 x28: 0000000000000001 [ 26.404807] x27: 0000000000000038 x26: 0000000000000002 [ 26.410115] x25: ffff000cecbefa00 x24: ffff000cf2f96200 [ 26.415423] x23: 0000000000000001 x22: 0000000000000018 [ 26.420731] x21: 0000000000000001 x20: ffff800017b8bc10 [ 26.426039] x19: 0000000000000000 x18: 0000000000000000 [ 26.431347] x17: 0000000002e8bf2c x16: 0000000002e94c6b [ 26.436655] x15: 0000000002ea48b9 x14: ffff8000121f0300 [ 26.441963] x13: 0000000002ee2ca8 x12: ffff80001129cae0 [ 26.447272] x11: ffff800012435000 x10: ffff000ed46b5e88 [ 26.452580] x9 : ffff000c9935e600 x8 : 0000000000000000 [ 26.457888] x7 : 000000008020001e x6 : 000000008020001f [ 26.463196] x5 : ffff80001085fbe0 x4 : fffffe0033a59f20 [ 26.468504] x3 : 000000008020001e x2 : 0000000000000000 [ 26.473813] x1 : 0000000000000000 x0 : ffff000c8f596090 [ 26.479122] Call trace: [ 26.481566] drm_modeset_drop_locks+0x84/0x90 [ 26.485918] drm_mode_atomic_ioctl+0x860/0xb8c [ 26.490359] drm_ioctl_kernel+0xc4/0x120 [ 26.494278] drm_ioctl+0x268/0x534 [ 26.497677] __arm64_sys_ioctl+0xa8/0xf0 [ 26.501598] el0_svc_common.constprop.0+0x80/0x240 [ 26.506384] do_el0_svc+0x24/0x90 [ 26.509697] el0_svc+0x20/0x30 [ 26.512748] el0_sync_handler+0xe8/0xf0 [ 26.516580] el0_sync+0x1a4/0x1c0 [ 26.519891] irq event stamp: 0 [ 26.522943] hardirqs last enabled at (0): [<0000000000000000>] 0x0 [ 26.529207] hardirqs last disabled at (0): [] copy_process+0x5d0/0x183c [ 26.537379] softirqs last enabled at (0): [] copy_process+0x5d0/0x183c [ 26.545550] softirqs last disabled at (0): [<0000000000000000>] 0x0 [ 26.551812] ---[ end trace 20ae984fa860184b ]--- According to the call trace information,it can be located to be WARN_ON(IS_ERR(c_st)) in the komeda_pipeline_unbound_components function; Then follow the function. komeda_pipeline_unbound_components -> komeda_component_get_state_and_set_user -> komeda_pipeline_get_state_and_set_crtc -> komeda_pipeline_get_state ->drm_atomic_get_private_obj_state -> drm_atomic_get_private_obj_state -> drm_modeset_lock komeda_pipeline_unbound_components -> komeda_component_get_state_and_set_user -> komeda_component_get_state -> drm_atomic_get_private_obj_state -> drm_modeset_lock ret = drm_modeset_lock(&obj->lock, state->acquire_ctx); if (ret) return ERR_PTR(ret); Here it return -EDEADLK. deal with the deadlock as suggested by [1], using the function drm_modeset_backoff(). [1] https://docs.kernel.org/gpu/drm-kms.html?highlight=kms#kms-locking Therefore, handling this problem can be solved by adding return -EDEADLK back to the drm_modeset_backoff processing flow in the drm_mode_atomic_ioctl function. Signed-off-by: baozhu.liu Signed-off-by: menghui.huang Reviewed-by: Liviu Dudau Signed-off-by: Liviu Dudau Link: https://patchwork.freedesktop.org/patch/msgid/20230804013117.6870-1-menghui.huang@siengine.com Signed-off-by: Sasha Levin --- .../gpu/drm/arm/display/komeda/komeda_pipeline_state.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c index 4618687a8f4d..f3e744172673 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c @@ -1223,7 +1223,7 @@ int komeda_build_display_data_flow(struct komeda_crtc *kcrtc, return 0; } -static void +static int komeda_pipeline_unbound_components(struct komeda_pipeline *pipe, struct komeda_pipeline_state *new) { @@ -1243,8 +1243,12 @@ komeda_pipeline_unbound_components(struct komeda_pipeline *pipe, c = komeda_pipeline_get_component(pipe, id); c_st = komeda_component_get_state_and_set_user(c, drm_st, NULL, new->crtc); + if (PTR_ERR(c_st) == -EDEADLK) + return -EDEADLK; WARN_ON(IS_ERR(c_st)); } + + return 0; } /* release unclaimed pipeline resource */ @@ -1266,9 +1270,8 @@ int komeda_release_unclaimed_resources(struct komeda_pipeline *pipe, if (WARN_ON(IS_ERR_OR_NULL(st))) return -EINVAL; - komeda_pipeline_unbound_components(pipe, st); + return komeda_pipeline_unbound_components(pipe, st); - return 0; } /* Since standalone disabled components must be disabled separately and in the From 67af2e23e2a77769bb53cd65e576cc231bdff83b Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Tue, 8 Aug 2023 13:21:58 -0400 Subject: [PATCH 0048/1397] drm/amd/display: Blank phantom OTG before enabling [ Upstream commit e87a6c5b7780b5f423797351eb586ed96cc6d151 ] [Description] Before enabling the phantom OTG for an update we must enable DPG to avoid underflow. Reviewed-by: Samson Tam Acked-by: Stylon Wang Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/core/dc.c | 50 +------------------ .../drm/amd/display/dc/dcn20/dcn20_hwseq.c | 10 +++- .../drm/amd/display/dc/dcn32/dcn32_hwseq.c | 46 +++++++++++++++++ .../drm/amd/display/dc/dcn32/dcn32_hwseq.h | 5 ++ .../gpu/drm/amd/display/dc/dcn32/dcn32_init.c | 1 + .../gpu/drm/amd/display/dc/inc/hw_sequencer.h | 5 ++ 6 files changed, 68 insertions(+), 49 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index d08e60dff46d..3b9d6fa50d17 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1069,53 +1069,6 @@ static void apply_ctx_interdependent_lock(struct dc *dc, } } -static void phantom_pipe_blank( - struct dc *dc, - struct timing_generator *tg, - int width, - int height) -{ - struct dce_hwseq *hws = dc->hwseq; - enum dc_color_space color_space; - struct tg_color black_color = {0}; - struct output_pixel_processor *opp = NULL; - uint32_t num_opps, opp_id_src0, opp_id_src1; - uint32_t otg_active_width, otg_active_height; - uint32_t i; - - /* program opp dpg blank color */ - color_space = COLOR_SPACE_SRGB; - color_space_to_black_color(dc, color_space, &black_color); - - otg_active_width = width; - otg_active_height = height; - - /* get the OPTC source */ - tg->funcs->get_optc_source(tg, &num_opps, &opp_id_src0, &opp_id_src1); - ASSERT(opp_id_src0 < dc->res_pool->res_cap->num_opp); - - for (i = 0; i < dc->res_pool->res_cap->num_opp; i++) { - if (dc->res_pool->opps[i] != NULL && dc->res_pool->opps[i]->inst == opp_id_src0) { - opp = dc->res_pool->opps[i]; - break; - } - } - - if (opp && opp->funcs->opp_set_disp_pattern_generator) - opp->funcs->opp_set_disp_pattern_generator( - opp, - CONTROLLER_DP_TEST_PATTERN_SOLID_COLOR, - CONTROLLER_DP_COLOR_SPACE_UDEFINED, - COLOR_DEPTH_UNDEFINED, - &black_color, - otg_active_width, - otg_active_height, - 0); - - if (tg->funcs->is_tg_enabled(tg)) - hws->funcs.wait_for_blank_complete(opp); -} - static void dc_update_viusal_confirm_color(struct dc *dc, struct dc_state *context, struct pipe_ctx *pipe_ctx) { if (dc->ctx->dce_version >= DCN_VERSION_1_0) { @@ -1206,7 +1159,8 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context) main_pipe_width = old_stream->mall_stream_config.paired_stream->dst.width; main_pipe_height = old_stream->mall_stream_config.paired_stream->dst.height; - phantom_pipe_blank(dc, tg, main_pipe_width, main_pipe_height); + if (dc->hwss.blank_phantom) + dc->hwss.blank_phantom(dc, tg, main_pipe_width, main_pipe_height); tg->funcs->enable_crtc(tg); } } diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index aeadc587433f..a2e1ca3b93e8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -1830,8 +1830,16 @@ void dcn20_program_front_end_for_ctx( dc->current_state->res_ctx.pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM) { struct timing_generator *tg = dc->current_state->res_ctx.pipe_ctx[i].stream_res.tg; - if (tg->funcs->enable_crtc) + if (tg->funcs->enable_crtc) { + if (dc->hwss.blank_phantom) { + int main_pipe_width, main_pipe_height; + + main_pipe_width = dc->current_state->res_ctx.pipe_ctx[i].stream->mall_stream_config.paired_stream->dst.width; + main_pipe_height = dc->current_state->res_ctx.pipe_ctx[i].stream->mall_stream_config.paired_stream->dst.height; + dc->hwss.blank_phantom(dc, tg, main_pipe_width, main_pipe_height); + } tg->funcs->enable_crtc(tg); + } } } /* OTG blank before disabling all front ends */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c index 680e7fa8d18a..cae5e1e68c86 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c @@ -1573,3 +1573,49 @@ void dcn32_init_blank( if (opp) hws->funcs.wait_for_blank_complete(opp); } + +void dcn32_blank_phantom(struct dc *dc, + struct timing_generator *tg, + int width, + int height) +{ + struct dce_hwseq *hws = dc->hwseq; + enum dc_color_space color_space; + struct tg_color black_color = {0}; + struct output_pixel_processor *opp = NULL; + uint32_t num_opps, opp_id_src0, opp_id_src1; + uint32_t otg_active_width, otg_active_height; + uint32_t i; + + /* program opp dpg blank color */ + color_space = COLOR_SPACE_SRGB; + color_space_to_black_color(dc, color_space, &black_color); + + otg_active_width = width; + otg_active_height = height; + + /* get the OPTC source */ + tg->funcs->get_optc_source(tg, &num_opps, &opp_id_src0, &opp_id_src1); + ASSERT(opp_id_src0 < dc->res_pool->res_cap->num_opp); + + for (i = 0; i < dc->res_pool->res_cap->num_opp; i++) { + if (dc->res_pool->opps[i] != NULL && dc->res_pool->opps[i]->inst == opp_id_src0) { + opp = dc->res_pool->opps[i]; + break; + } + } + + if (opp && opp->funcs->opp_set_disp_pattern_generator) + opp->funcs->opp_set_disp_pattern_generator( + opp, + CONTROLLER_DP_TEST_PATTERN_SOLID_COLOR, + CONTROLLER_DP_COLOR_SPACE_UDEFINED, + COLOR_DEPTH_UNDEFINED, + &black_color, + otg_active_width, + otg_active_height, + 0); + + if (tg->funcs->is_tg_enabled(tg)) + hws->funcs.wait_for_blank_complete(opp); +} diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.h index 2d2628f31bed..616d5219119e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.h @@ -115,4 +115,9 @@ void dcn32_init_blank( struct dc *dc, struct timing_generator *tg); +void dcn32_blank_phantom(struct dc *dc, + struct timing_generator *tg, + int width, + int height); + #endif /* __DC_HWSS_DCN32_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c index c7417147dff1..eb4227926006 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c @@ -115,6 +115,7 @@ static const struct hw_sequencer_funcs dcn32_funcs = { .update_phantom_vp_position = dcn32_update_phantom_vp_position, .update_dsc_pg = dcn32_update_dsc_pg, .apply_update_flags_for_phantom = dcn32_apply_update_flags_for_phantom, + .blank_phantom = dcn32_blank_phantom, }; static const struct hwseq_private_funcs dcn32_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h index 02ff99f7bec2..7a702e216e53 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h @@ -388,6 +388,11 @@ struct hw_sequencer_funcs { void (*z10_restore)(const struct dc *dc); void (*z10_save_init)(struct dc *dc); + void (*blank_phantom)(struct dc *dc, + struct timing_generator *tg, + int width, + int height); + void (*update_visual_confirm_color)(struct dc *dc, struct pipe_ctx *pipe_ctx, int mpcc_id); From f95d571bb53b7cd119b272320ca263c06652e0c3 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Wed, 23 Aug 2023 10:18:36 -0400 Subject: [PATCH 0049/1397] drm/amd/display: Don't lock phantom pipe on disabling [ Upstream commit cbb4c9bc55427774ca4d819933e1b5fa38a6fb44 ] [Description] - When disabling a phantom pipe, we first enable the phantom OTG so the double buffer update can successfully take place - However, want to avoid locking the phantom otherwise setting DPG_EN=1 for the phantom pipe is blocked (without this we could hit underflow due to phantom HUBP being blanked by default) Reviewed-by: Samson Tam Acked-by: Stylon Wang Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 9834b75f1837..79befa17bb03 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -111,7 +111,8 @@ void dcn10_lock_all_pipes(struct dc *dc, if (pipe_ctx->top_pipe || !pipe_ctx->stream || (!pipe_ctx->plane_state && !old_pipe_ctx->plane_state) || - !tg->funcs->is_tg_enabled(tg)) + !tg->funcs->is_tg_enabled(tg) || + pipe_ctx->stream->mall_stream_config.type == SUBVP_PHANTOM) continue; if (lock) From ea971fd05f39b2029c16a4d18def097442a97c27 Mon Sep 17 00:00:00 2001 From: Wenjing Liu Date: Thu, 24 Aug 2023 17:08:48 -0400 Subject: [PATCH 0050/1397] drm/amd/display: add seamless pipe topology transition check [ Upstream commit 15c6798ae26d5c7a7776f4f7d0c1fa8c462688a2 ] [why] We have a few cases where we need to perform update topology update in dc update interface. However some of the updates are not seamless This could cause user noticible glitches. To enforce seamless transition we are adding a checking condition and error logging so the corruption as result of non seamless transition can be easily spotted. Reviewed-by: Dillon Varone Acked-by: Stylon Wang Signed-off-by: Wenjing Liu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/core/dc.c | 8 +++ .../drm/amd/display/dc/dcn32/dcn32_hwseq.c | 52 +++++++++++++++++++ .../drm/amd/display/dc/dcn32/dcn32_hwseq.h | 4 ++ .../gpu/drm/amd/display/dc/dcn32/dcn32_init.c | 1 + .../gpu/drm/amd/display/dc/inc/hw_sequencer.h | 3 ++ 5 files changed, 68 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 3b9d6fa50d17..14c3c1907b95 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -4328,6 +4328,14 @@ bool dc_update_planes_and_stream(struct dc *dc, update_type, context); } else { + if (!stream_update && + dc->hwss.is_pipe_topology_transition_seamless && + !dc->hwss.is_pipe_topology_transition_seamless( + dc, dc->current_state, context)) { + + DC_LOG_ERROR("performing non-seamless pipe topology transition with surface only update!\n"); + BREAK_TO_DEBUGGER(); + } commit_planes_for_stream( dc, srf_updates, diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c index cae5e1e68c86..018376146d97 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c @@ -1619,3 +1619,55 @@ void dcn32_blank_phantom(struct dc *dc, if (tg->funcs->is_tg_enabled(tg)) hws->funcs.wait_for_blank_complete(opp); } + +bool dcn32_is_pipe_topology_transition_seamless(struct dc *dc, + const struct dc_state *cur_ctx, + const struct dc_state *new_ctx) +{ + int i; + const struct pipe_ctx *cur_pipe, *new_pipe; + bool is_seamless = true; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + cur_pipe = &cur_ctx->res_ctx.pipe_ctx[i]; + new_pipe = &new_ctx->res_ctx.pipe_ctx[i]; + + if (resource_is_pipe_type(cur_pipe, FREE_PIPE) || + resource_is_pipe_type(new_pipe, FREE_PIPE)) + /* adding or removing free pipes is always seamless */ + continue; + else if (resource_is_pipe_type(cur_pipe, OTG_MASTER)) { + if (resource_is_pipe_type(new_pipe, OTG_MASTER)) + if (cur_pipe->stream->stream_id == new_pipe->stream->stream_id) + /* OTG master with the same stream is seamless */ + continue; + } else if (resource_is_pipe_type(cur_pipe, OPP_HEAD)) { + if (resource_is_pipe_type(new_pipe, OPP_HEAD)) { + if (cur_pipe->stream_res.tg == new_pipe->stream_res.tg) + /* + * OPP heads sharing the same timing + * generator is seamless + */ + continue; + } + } else if (resource_is_pipe_type(cur_pipe, DPP_PIPE)) { + if (resource_is_pipe_type(new_pipe, DPP_PIPE)) { + if (cur_pipe->stream_res.opp == new_pipe->stream_res.opp) + /* + * DPP pipes sharing the same OPP head is + * seamless + */ + continue; + } + } + + /* + * This pipe's transition doesn't fall under any seamless + * conditions + */ + is_seamless = false; + break; + } + + return is_seamless; +} diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.h index 616d5219119e..9992e40acd21 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.h @@ -120,4 +120,8 @@ void dcn32_blank_phantom(struct dc *dc, int width, int height); +bool dcn32_is_pipe_topology_transition_seamless(struct dc *dc, + const struct dc_state *cur_ctx, + const struct dc_state *new_ctx); + #endif /* __DC_HWSS_DCN32_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c index eb4227926006..1edadff39a5e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c @@ -116,6 +116,7 @@ static const struct hw_sequencer_funcs dcn32_funcs = { .update_dsc_pg = dcn32_update_dsc_pg, .apply_update_flags_for_phantom = dcn32_apply_update_flags_for_phantom, .blank_phantom = dcn32_blank_phantom, + .is_pipe_topology_transition_seamless = dcn32_is_pipe_topology_transition_seamless, }; static const struct hwseq_private_funcs dcn32_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h index 7a702e216e53..66e680902c95 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h @@ -401,6 +401,9 @@ struct hw_sequencer_funcs { struct dc_state *context, struct pipe_ctx *phantom_pipe); void (*apply_update_flags_for_phantom)(struct pipe_ctx *phantom_pipe); + bool (*is_pipe_topology_transition_seamless)(struct dc *dc, + const struct dc_state *cur_ctx, + const struct dc_state *new_ctx); void (*commit_subvp_config)(struct dc *dc, struct dc_state *context); void (*enable_phantom_streams)(struct dc *dc, struct dc_state *context); From d78c4efaeb2d9ad53545e28460adda0685d80ae4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 21 Sep 2023 00:19:33 +0300 Subject: [PATCH 0051/1397] drm/edid: Fixup h/vsync_end instead of h/vtotal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 2682768bde745b10ae126a322cdcaf532cf88851 ] There are some weird EDIDs floating around that have the sync pulse extending beyond the end of the blanking period. On the currently problemtic machine (HP Omni 120) EDID reports the following mode: "1600x900": 60 108000 1600 1780 1860 1800 900 910 913 1000 0x40 0x5 which is then "corrected" to have htotal=1861 by the current drm_edid.c code. The fixup code was originally added in commit 7064fef56369 ("drm: work around EDIDs with bad htotal/vtotal values"). Googling around we end up in https://bugs.launchpad.net/ubuntu/hardy/+source/xserver-xorg-video-intel/+bug/297245 where we find an EDID for a Dell Studio 15, which reports: (II) VESA(0): clock: 65.0 MHz Image Size: 331 x 207 mm (II) VESA(0): h_active: 1280 h_sync: 1328 h_sync_end 1360 h_blank_end 1337 h_border: 0 (II) VESA(0): v_active: 800 v_sync: 803 v_sync_end 809 v_blanking: 810 v_border: 0 Note that if we use the hblank size (as opposed of the hsync_end) from the DTD to determine htotal we get exactly 60Hz refresh rate in both cases, whereas using hsync_end to determine htotal we get a slightly lower refresh rates. This makes me believe the using the hblank size is what was intended even in those cases. Also note that in case of the HP Onmi 120 the VBIOS boots with these: crtc timings: 108000 1600 1780 1860 1800 900 910 913 1000, type: 0x40 flags: 0x5 ie. it just blindly stuffs the bogus hsync_end and htotal from the DTD into the transcoder timing registers, and the display works. I believe the (at least more modern) hardware will automagically terminate the hsync pulse when the timing generator reaches htotal, which again points that we should use the hblank size to determine htotal. Unfortunatley the old bug reports for the Dell machines are extremely lacking in useful details so we have no idea what kind of timings the VBIOS programmed into the hardware :( Let's just flip this quirk around and reduce the length of the sync pulse instead of extending the blanking period. This at least seems to be the correct thing to do on more modern hardware. And if any issues crop up on older hardware we need to debug them properly. v2: Add debug message breadcrumbs (Jani) Reviewed-by: Jani Nikula Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/8895 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20230920211934.14920-1-ville.syrjala@linux.intel.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_edid.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 4b71040ae5be..b3e1b288fc0c 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -3499,11 +3499,19 @@ static struct drm_display_mode *drm_mode_detailed(struct drm_connector *connecto mode->vsync_end = mode->vsync_start + vsync_pulse_width; mode->vtotal = mode->vdisplay + vblank; - /* Some EDIDs have bogus h/vtotal values */ - if (mode->hsync_end > mode->htotal) - mode->htotal = mode->hsync_end + 1; - if (mode->vsync_end > mode->vtotal) - mode->vtotal = mode->vsync_end + 1; + /* Some EDIDs have bogus h/vsync_end values */ + if (mode->hsync_end > mode->htotal) { + drm_dbg_kms(dev, "[CONNECTOR:%d:%s] reducing hsync_end %d->%d\n", + connector->base.id, connector->name, + mode->hsync_end, mode->htotal); + mode->hsync_end = mode->htotal; + } + if (mode->vsync_end > mode->vtotal) { + drm_dbg_kms(dev, "[CONNECTOR:%d:%s] reducing vsync_end %d->%d\n", + connector->base.id, connector->name, + mode->vsync_end, mode->vtotal); + mode->vsync_end = mode->vtotal; + } drm_mode_do_interlace_quirk(mode, pt); From 20c2d2c323bcdcd3c1e9fbf67a0f5e77cb094a14 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Fri, 25 Aug 2023 11:09:52 +0800 Subject: [PATCH 0052/1397] md: don't rely on 'mddev->pers' to be set in mddev_suspend() [ Upstream commit b721e7885eb242aa2459ee66bb42ceef1bcf0f0c ] 'active_io' used to be initialized while the array is running, and 'mddev->pers' is set while the array is running as well. Hence caller must hold 'reconfig_mutex' and guarantee 'mddev->pers' is set before calling mddev_suspend(). Now that 'active_io' is initialized when mddev is allocated, such restriction doesn't exist anymore. In the meantime, follow up patches will refactor mddev_suspend(), hence add checking for 'mddev->pers' to prevent null-ptr-deref. Signed-off-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20230825030956.1527023-4-yukuai1@huaweicloud.com Signed-off-by: Sasha Levin --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index a104a025084d..9247e55c7eaf 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -449,7 +449,7 @@ void mddev_suspend(struct mddev *mddev) set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags); percpu_ref_kill(&mddev->active_io); - if (mddev->pers->prepare_suspend) + if (mddev->pers && mddev->pers->prepare_suspend) mddev->pers->prepare_suspend(mddev); wait_event(mddev->sb_wait, percpu_ref_is_zero(&mddev->active_io)); From 0a810a3fc67ed4122c0d087af93db57049140636 Mon Sep 17 00:00:00 2001 From: "David (Ming Qiang) Wu" Date: Thu, 14 Sep 2023 16:34:08 -0400 Subject: [PATCH 0053/1397] drm/amdgpu: not to save bo in the case of RAS err_event_athub [ Upstream commit fa1f1cc09d588a90c8ce3f507c47df257461d148 ] err_event_athub will corrupt VCPU buffer and not good to be restored in amdgpu_vcn_resume() and in this case the VCPU buffer needs to be cleared for VCN firmware to work properly. Acked-by: Leo Liu Signed-off-by: David (Ming Qiang) Wu Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 36b55d2bd51a..03b4bcfca196 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -292,8 +292,15 @@ int amdgpu_vcn_suspend(struct amdgpu_device *adev) void *ptr; int i, idx; + bool in_ras_intr = amdgpu_ras_intr_triggered(); + cancel_delayed_work_sync(&adev->vcn.idle_work); + /* err_event_athub will corrupt VCPU buffer, so we need to + * restore fw data and clear buffer in amdgpu_vcn_resume() */ + if (in_ras_intr) + return 0; + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) continue; From fc0210720127cc6302e6d6f3de48f49c3fcf5659 Mon Sep 17 00:00:00 2001 From: Xiaogang Chen Date: Wed, 27 Sep 2023 11:20:28 -0500 Subject: [PATCH 0054/1397] drm/amdkfd: Fix a race condition of vram buffer unref in svm code [ Upstream commit 709c348261618da7ed89d6c303e2ceb9e453ba74 ] prange->svm_bo unref can happen in both mmu callback and a callback after migrate to system ram. Both are async call in different tasks. Sync svm_bo unref operation to avoid random "use-after-free". Signed-off-by: Xiaogang Chen Reviewed-by: Philip Yang Reviewed-by: Jesse Zhang Tested-by: Jesse Zhang Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 2a42fbddcb7a..2591bdfcc228 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -628,8 +628,15 @@ svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange, void svm_range_vram_node_free(struct svm_range *prange) { - svm_range_bo_unref(prange->svm_bo); - prange->ttm_res = NULL; + /* serialize prange->svm_bo unref */ + mutex_lock(&prange->lock); + /* prange->svm_bo has not been unref */ + if (prange->ttm_res) { + prange->ttm_res = NULL; + mutex_unlock(&prange->lock); + svm_range_bo_unref(prange->svm_bo); + } else + mutex_unlock(&prange->lock); } struct kfd_node * From b39f1303b9596039a90da419b7057e607d54ac46 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Wed, 27 Sep 2023 18:02:29 +0800 Subject: [PATCH 0055/1397] drm/amdgpu: update retry times for psp vmbx wait [ Upstream commit fc598890715669ff794b253fdf387cd02b9396f8 ] Increase the retry loops and replace the constant number with macro. Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 469eed084976..52d80f286b3d 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -59,6 +59,9 @@ MODULE_FIRMWARE("amdgpu/psp_14_0_0_ta.bin"); /* Read USB-PD from LFB */ #define GFX_CMD_USB_PD_USE_LFB 0x480 +/* Retry times for vmbx ready wait */ +#define PSP_VMBX_POLLING_LIMIT 20000 + /* VBIOS gfl defines */ #define MBOX_READY_MASK 0x80000000 #define MBOX_STATUS_MASK 0x0000FFFF @@ -138,7 +141,7 @@ static int psp_v13_0_wait_for_vmbx_ready(struct psp_context *psp) struct amdgpu_device *adev = psp->adev; int retry_loop, ret; - for (retry_loop = 0; retry_loop < 70; retry_loop++) { + for (retry_loop = 0; retry_loop < PSP_VMBX_POLLING_LIMIT; retry_loop++) { /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_33 set to 1 */ ret = psp_wait_for( From 82c4cf2c0596ffd5fe59f713906691df325ca916 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 29 Sep 2023 22:12:18 -0500 Subject: [PATCH 0056/1397] drm/amd: Update `update_pcie_parameters` functions to use uint8_t arguments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 7752ccf85b929a22e658ec145283e8f31232f4bb ] The matching values for `pcie_gen_cap` and `pcie_width_cap` when fetched from powerplay tables are 1 byte, so narrow the arguments to match to ensure min() and max() comparisons without casts. Signed-off-by: Mario Limonciello Acked-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 2 +- drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 2 +- drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 4 ++-- drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 4 ++-- drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 8 ++++---- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 4 ++-- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index f005a90c35af..b47fd42414f4 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1232,7 +1232,7 @@ static int smu_smc_hw_setup(struct smu_context *smu) { struct smu_feature *feature = &smu->smu_feature; struct amdgpu_device *adev = smu->adev; - uint32_t pcie_gen = 0, pcie_width = 0; + uint8_t pcie_gen = 0, pcie_width = 0; uint64_t features_supported; int ret = 0; diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index 5a52098bcf16..72ed83632896 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -844,7 +844,7 @@ struct pptable_funcs { * &pcie_gen_cap: Maximum allowed PCIe generation. * &pcie_width_cap: Maximum allowed PCIe width. */ - int (*update_pcie_parameters)(struct smu_context *smu, uint32_t pcie_gen_cap, uint32_t pcie_width_cap); + int (*update_pcie_parameters)(struct smu_context *smu, uint8_t pcie_gen_cap, uint8_t pcie_width_cap); /** * @i2c_init: Initialize i2c. diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index 355c156d871a..cc02f979e9e9 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -296,8 +296,8 @@ int smu_v13_0_get_pptable_from_firmware(struct smu_context *smu, uint32_t pptable_id); int smu_v13_0_update_pcie_parameters(struct smu_context *smu, - uint32_t pcie_gen_cap, - uint32_t pcie_width_cap); + uint8_t pcie_gen_cap, + uint8_t pcie_width_cap); #endif #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c index 18487ae10bcf..c564f6e191f8 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c @@ -2376,8 +2376,8 @@ static int navi10_get_power_limit(struct smu_context *smu, } static int navi10_update_pcie_parameters(struct smu_context *smu, - uint32_t pcie_gen_cap, - uint32_t pcie_width_cap) + uint8_t pcie_gen_cap, + uint8_t pcie_width_cap) { struct smu_11_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; PPTable_t *pptable = smu->smu_table.driver_pptable; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index da2860da6018..0cc5d9769d38 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -2085,14 +2085,14 @@ static int sienna_cichlid_display_disable_memory_clock_switch(struct smu_context #define MAX(a, b) ((a) > (b) ? (a) : (b)) static int sienna_cichlid_update_pcie_parameters(struct smu_context *smu, - uint32_t pcie_gen_cap, - uint32_t pcie_width_cap) + uint8_t pcie_gen_cap, + uint8_t pcie_width_cap) { struct smu_11_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; struct smu_11_0_pcie_table *pcie_table = &dpm_context->dpm_tables.pcie_table; uint8_t *table_member1, *table_member2; - uint32_t min_gen_speed, max_gen_speed; - uint32_t min_lane_width, max_lane_width; + uint8_t min_gen_speed, max_gen_speed; + uint8_t min_lane_width, max_lane_width; uint32_t smu_pcie_arg; int ret, i; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 0232adb95df3..a280c1ed007f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -2420,8 +2420,8 @@ int smu_v13_0_mode1_reset(struct smu_context *smu) } int smu_v13_0_update_pcie_parameters(struct smu_context *smu, - uint32_t pcie_gen_cap, - uint32_t pcie_width_cap) + uint8_t pcie_gen_cap, + uint8_t pcie_width_cap) { struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; struct smu_13_0_pcie_table *pcie_table = From dec0a70e528256084c5c6cd1384e309531b631a6 Mon Sep 17 00:00:00 2001 From: Wenjing Liu Date: Thu, 21 Sep 2023 14:43:21 -0400 Subject: [PATCH 0057/1397] drm/amd/display: use full update for clip size increase of large plane source [ Upstream commit 05b78277ef0efc1deebc8a22384fffec29a3676e ] [why] Clip size increase will increase viewport, which could cause us to switch to MPC combine. If we skip full update, we are not able to change to MPC combine in fast update. This will cause corruption showing on the video plane. [how] treat clip size increase of a surface larger than 5k as a full update. Reviewed-by: Jun Lei Acked-by: Aurabindo Pillai Signed-off-by: Wenjing Liu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/core/dc.c | 12 ++++++++++-- drivers/gpu/drm/amd/display/dc/dc.h | 5 +++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 14c3c1907b95..38abbd0c9d99 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -990,7 +990,8 @@ static bool dc_construct(struct dc *dc, /* set i2c speed if not done by the respective dcnxxx__resource.c */ if (dc->caps.i2c_speed_in_khz_hdcp == 0) dc->caps.i2c_speed_in_khz_hdcp = dc->caps.i2c_speed_in_khz; - + if (dc->caps.max_optimizable_video_width == 0) + dc->caps.max_optimizable_video_width = 5120; dc->clk_mgr = dc_clk_mgr_create(dc->ctx, dc->res_pool->pp_smu, dc->res_pool->dccg); if (!dc->clk_mgr) goto fail; @@ -2442,6 +2443,7 @@ static enum surface_update_type get_plane_info_update_type(const struct dc_surfa } static enum surface_update_type get_scaling_info_update_type( + const struct dc *dc, const struct dc_surface_update *u) { union surface_update_flags *update_flags = &u->surface->update_flags; @@ -2474,6 +2476,12 @@ static enum surface_update_type get_scaling_info_update_type( update_flags->bits.clock_change = 1; } + if (u->scaling_info->src_rect.width > dc->caps.max_optimizable_video_width && + (u->scaling_info->clip_rect.width > u->surface->clip_rect.width || + u->scaling_info->clip_rect.height > u->surface->clip_rect.height)) + /* Changing clip size of a large surface may result in MPC slice count change */ + update_flags->bits.bandwidth_change = 1; + if (u->scaling_info->src_rect.x != u->surface->src_rect.x || u->scaling_info->src_rect.y != u->surface->src_rect.y || u->scaling_info->clip_rect.x != u->surface->clip_rect.x @@ -2511,7 +2519,7 @@ static enum surface_update_type det_surface_update(const struct dc *dc, type = get_plane_info_update_type(u); elevate_update_type(&overall_type, type); - type = get_scaling_info_update_type(u); + type = get_scaling_info_update_type(dc, u); elevate_update_type(&overall_type, type); if (u->flip_addr) { diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 31e3183497a7..c05e91b257ac 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -231,6 +231,11 @@ struct dc_caps { uint32_t dmdata_alloc_size; unsigned int max_cursor_size; unsigned int max_video_width; + /* + * max video plane width that can be safely assumed to be always + * supported by single DPP pipe. + */ + unsigned int max_optimizable_video_width; unsigned int min_horizontal_blanking_period; int linear_pitch_alignment; bool dcc_const_color; From b105ed9c92df9f940de61492e97191a5f71edb4e Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Wed, 20 Sep 2023 14:36:09 +0200 Subject: [PATCH 0058/1397] string.h: add array-wrappers for (v)memdup_user() [ Upstream commit 313ebe47d75558511aa1237b6e35c663b5c0ec6f ] Currently, user array duplications are sometimes done without an overflow check. Sometimes the checks are done manually; sometimes the array size is calculated with array_size() and sometimes by calculating n * size directly in code. Introduce wrappers for arrays for memdup_user() and vmemdup_user() to provide a standardized and safe way for duplicating user arrays. This is both for new code as well as replacing usage of (v)memdup_user() in existing code that uses, e.g., n * size to calculate array sizes. Suggested-by: David Airlie Signed-off-by: Philipp Stanner Reviewed-by: Andy Shevchenko Reviewed-by: Kees Cook Reviewed-by: Zack Rusin Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20230920123612.16914-3-pstanner@redhat.com Signed-off-by: Sasha Levin --- include/linux/string.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/include/linux/string.h b/include/linux/string.h index 9e3cb6923b0e..5077776e995e 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -5,7 +5,9 @@ #include /* for inline */ #include /* for size_t */ #include /* for NULL */ +#include /* for ERR_PTR() */ #include /* for E2BIG */ +#include /* for check_mul_overflow() */ #include #include @@ -14,6 +16,44 @@ extern void *memdup_user(const void __user *, size_t); extern void *vmemdup_user(const void __user *, size_t); extern void *memdup_user_nul(const void __user *, size_t); +/** + * memdup_array_user - duplicate array from user space + * @src: source address in user space + * @n: number of array members to copy + * @size: size of one array member + * + * Return: an ERR_PTR() on failure. Result is physically + * contiguous, to be freed by kfree(). + */ +static inline void *memdup_array_user(const void __user *src, size_t n, size_t size) +{ + size_t nbytes; + + if (check_mul_overflow(n, size, &nbytes)) + return ERR_PTR(-EOVERFLOW); + + return memdup_user(src, nbytes); +} + +/** + * vmemdup_array_user - duplicate array from user space + * @src: source address in user space + * @n: number of array members to copy + * @size: size of one array member + * + * Return: an ERR_PTR() on failure. Result may be not + * physically contiguous. Use kvfree() to free. + */ +static inline void *vmemdup_array_user(const void __user *src, size_t n, size_t size) +{ + size_t nbytes; + + if (check_mul_overflow(n, size, &nbytes)) + return ERR_PTR(-EOVERFLOW); + + return vmemdup_user(src, nbytes); +} + /* * Include machine specific inline routines */ From 4fc857cc5cb9b7ce6940898857d773564973a584 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Wed, 20 Sep 2023 14:36:10 +0200 Subject: [PATCH 0059/1397] kernel: kexec: copy user-array safely [ Upstream commit 569c8d82f95eb5993c84fb61a649a9c4ddd208b3 ] Currently, there is no overflow-check with memdup_user(). Use the new function memdup_array_user() instead of memdup_user() for duplicating the user-space array safely. Suggested-by: David Airlie Signed-off-by: Philipp Stanner Acked-by: Baoquan He Reviewed-by: Kees Cook Reviewed-by: Zack Rusin Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20230920123612.16914-4-pstanner@redhat.com Signed-off-by: Sasha Levin --- kernel/kexec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/kexec.c b/kernel/kexec.c index 107f355eac10..8f35a5a42af8 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -247,7 +247,7 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, ((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT)) return -EINVAL; - ksegments = memdup_user(segments, nr_segments * sizeof(ksegments[0])); + ksegments = memdup_array_user(segments, nr_segments, sizeof(ksegments[0])); if (IS_ERR(ksegments)) return PTR_ERR(ksegments); From 0f403ebad98e6151aaa9c96c9aae5549aa4d87cd Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Wed, 20 Sep 2023 14:36:11 +0200 Subject: [PATCH 0060/1397] kernel: watch_queue: copy user-array safely [ Upstream commit ca0776571d3163bd03b3e8c9e3da936abfaecbf6 ] Currently, there is no overflow-check with memdup_user(). Use the new function memdup_array_user() instead of memdup_user() for duplicating the user-space array safely. Suggested-by: David Airlie Signed-off-by: Philipp Stanner Reviewed-by: Kees Cook Reviewed-by: Zack Rusin Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20230920123612.16914-5-pstanner@redhat.com Signed-off-by: Sasha Levin --- kernel/watch_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c index d0b6b390ee42..778b4056700f 100644 --- a/kernel/watch_queue.c +++ b/kernel/watch_queue.c @@ -331,7 +331,7 @@ long watch_queue_set_filter(struct pipe_inode_info *pipe, filter.__reserved != 0) return -EINVAL; - tf = memdup_user(_filter->filters, filter.nr_filters * sizeof(*tf)); + tf = memdup_array_user(_filter->filters, filter.nr_filters, sizeof(*tf)); if (IS_ERR(tf)) return PTR_ERR(tf); From ea42bc330723644a0bd01d7124a601ab60b27747 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Wed, 20 Sep 2023 14:36:12 +0200 Subject: [PATCH 0061/1397] drm_lease.c: copy user-array safely [ Upstream commit f37d63e219c39199a59b8b8a211412ff27192830 ] Currently, there is no overflow-check with memdup_user(). Use the new function memdup_array_user() instead of memdup_user() for duplicating the user-space array safely. Suggested-by: David Airlie Signed-off-by: Philipp Stanner Reviewed-by: Kees Cook Reviewed-by: Zack Rusin Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20230920123612.16914-6-pstanner@redhat.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_lease.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_lease.c b/drivers/gpu/drm/drm_lease.c index 150fe1555068..94375c6a5425 100644 --- a/drivers/gpu/drm/drm_lease.c +++ b/drivers/gpu/drm/drm_lease.c @@ -510,8 +510,8 @@ int drm_mode_create_lease_ioctl(struct drm_device *dev, /* Handle leased objects, if any */ idr_init(&leases); if (object_count != 0) { - object_ids = memdup_user(u64_to_user_ptr(cl->object_ids), - array_size(object_count, sizeof(__u32))); + object_ids = memdup_array_user(u64_to_user_ptr(cl->object_ids), + object_count, sizeof(__u32)); if (IS_ERR(object_ids)) { ret = PTR_ERR(object_ids); idr_destroy(&leases); From 21e29f1437b7c36c76efa908589578eaf0f50900 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Wed, 20 Sep 2023 14:36:13 +0200 Subject: [PATCH 0062/1397] drm: vmwgfx_surface.c: copy user-array safely [ Upstream commit 06ab64a0d836ac430c5f94669710a78aa43942cb ] Currently, there is no overflow-check with memdup_user(). Use the new function memdup_array_user() instead of memdup_user() for duplicating the user-space array safely. Suggested-by: David Airlie Signed-off-by: Philipp Stanner Reviewed-by: Kees Cook Reviewed-by: Zack Rusin Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20230920123612.16914-7-pstanner@redhat.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index 3829be282ff0..17463aeeef28 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -774,9 +774,9 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, sizeof(metadata->mip_levels)); metadata->num_sizes = num_sizes; metadata->sizes = - memdup_user((struct drm_vmw_size __user *)(unsigned long) + memdup_array_user((struct drm_vmw_size __user *)(unsigned long) req->size_addr, - sizeof(*metadata->sizes) * metadata->num_sizes); + metadata->num_sizes, sizeof(*metadata->sizes)); if (IS_ERR(metadata->sizes)) { ret = PTR_ERR(metadata->sizes); goto out_no_sizes; From ea1e7d6fdc5e4452e78f73bd59fdad24e0b452b1 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 1 Sep 2023 17:20:34 +0300 Subject: [PATCH 0063/1397] drm/msm/dp: skip validity check for DP CTS EDID checksum [ Upstream commit a251c9d8e30833b260101edb9383b176ee2b7cb1 ] The DP CTS test for EDID last block checksum expects the checksum for the last block, invalid or not. Skip the validity check. For the most part (*), the EDIDs returned by drm_get_edid() will be valid anyway, and there's the CTS workaround to get the checksum for completely invalid EDIDs. See commit 7948fe12d47a ("drm/msm/dp: return correct edid checksum after corrupted edid checksum read"). This lets us remove one user of drm_edid_block_valid() with hopes the function can be removed altogether in the future. (*) drm_get_edid() ignores checksum errors on CTA extensions. Cc: Abhinav Kumar Cc: Dmitry Baryshkov Cc: Kuogee Hsieh Cc: Marijn Suijten Cc: Rob Clark Cc: Sean Paul Cc: Stephen Boyd Cc: linux-arm-msm@vger.kernel.org Cc: freedreno@lists.freedesktop.org Signed-off-by: Jani Nikula Reviewed-by: Stephen Boyd Reviewed-by: Abhinav Kumar Reviewed-by: Kuogee Hsieh Patchwork: https://patchwork.freedesktop.org/patch/555361/ Link: https://lore.kernel.org/r/20230901142034.580802-1-jani.nikula@intel.com Signed-off-by: Dmitry Baryshkov Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/dp/dp_panel.c | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/msm/dp/dp_panel.c b/drivers/gpu/drm/msm/dp/dp_panel.c index 42d52510ffd4..86a8e06c7a60 100644 --- a/drivers/gpu/drm/msm/dp/dp_panel.c +++ b/drivers/gpu/drm/msm/dp/dp_panel.c @@ -289,26 +289,9 @@ int dp_panel_get_modes(struct dp_panel *dp_panel, static u8 dp_panel_get_edid_checksum(struct edid *edid) { - struct edid *last_block; - u8 *raw_edid; - bool is_edid_corrupt = false; + edid += edid->extensions; - if (!edid) { - DRM_ERROR("invalid edid input\n"); - return 0; - } - - raw_edid = (u8 *)edid; - raw_edid += (edid->extensions * EDID_LENGTH); - last_block = (struct edid *)raw_edid; - - /* block type extension */ - drm_edid_block_valid(raw_edid, 1, false, &is_edid_corrupt); - if (!is_edid_corrupt) - return last_block->checksum; - - DRM_ERROR("Invalid block, no checksum\n"); - return 0; + return edid->checksum; } void dp_panel_handle_sink_request(struct dp_panel *dp_panel) From 92a775e7c9707aed28782bafe636bf87675f5a97 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 4 Oct 2023 15:22:52 -0500 Subject: [PATCH 0064/1397] drm/amd: Fix UBSAN array-index-out-of-bounds for SMU7 [ Upstream commit 760efbca74a405dc439a013a5efaa9fadc95a8c3 ] For pptable structs that use flexible array sizes, use flexible arrays. Suggested-by: Felix Held Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2874 Signed-off-by: Mario Limonciello Acked-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/include/pptable.h | 4 ++-- drivers/gpu/drm/amd/pm/powerplay/hwmgr/pptable_v1_0.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/include/pptable.h b/drivers/gpu/drm/amd/include/pptable.h index 0b6a057e0a4c..5aac8d545bdc 100644 --- a/drivers/gpu/drm/amd/include/pptable.h +++ b/drivers/gpu/drm/amd/include/pptable.h @@ -78,7 +78,7 @@ typedef struct _ATOM_PPLIB_THERMALCONTROLLER typedef struct _ATOM_PPLIB_STATE { UCHAR ucNonClockStateIndex; - UCHAR ucClockStateIndices[1]; // variable-sized + UCHAR ucClockStateIndices[]; // variable-sized } ATOM_PPLIB_STATE; @@ -473,7 +473,7 @@ typedef struct _ATOM_PPLIB_STATE_V2 /** * Driver will read the first ucNumDPMLevels in this array */ - UCHAR clockInfoIndex[1]; + UCHAR clockInfoIndex[]; } ATOM_PPLIB_STATE_V2; typedef struct _StateArray{ diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pptable_v1_0.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pptable_v1_0.h index 7a31cfa5e7fb..57bca1e81d3a 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pptable_v1_0.h +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pptable_v1_0.h @@ -179,7 +179,7 @@ typedef struct _ATOM_Tonga_MCLK_Dependency_Record { typedef struct _ATOM_Tonga_MCLK_Dependency_Table { UCHAR ucRevId; UCHAR ucNumEntries; /* Number of entries. */ - ATOM_Tonga_MCLK_Dependency_Record entries[1]; /* Dynamically allocate entries. */ + ATOM_Tonga_MCLK_Dependency_Record entries[]; /* Dynamically allocate entries. */ } ATOM_Tonga_MCLK_Dependency_Table; typedef struct _ATOM_Tonga_SCLK_Dependency_Record { @@ -194,7 +194,7 @@ typedef struct _ATOM_Tonga_SCLK_Dependency_Record { typedef struct _ATOM_Tonga_SCLK_Dependency_Table { UCHAR ucRevId; UCHAR ucNumEntries; /* Number of entries. */ - ATOM_Tonga_SCLK_Dependency_Record entries[1]; /* Dynamically allocate entries. */ + ATOM_Tonga_SCLK_Dependency_Record entries[]; /* Dynamically allocate entries. */ } ATOM_Tonga_SCLK_Dependency_Table; typedef struct _ATOM_Polaris_SCLK_Dependency_Record { From b3b8b7c040cf069da7afe11c5bd73b870b8f3d18 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 4 Oct 2023 15:46:44 -0500 Subject: [PATCH 0065/1397] drm/amd: Fix UBSAN array-index-out-of-bounds for Polaris and Tonga [ Upstream commit 0f0e59075b5c22f1e871fbd508d6e4f495048356 ] For pptable structs that use flexible array sizes, use flexible arrays. Link: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/2036742 Signed-off-by: Mario Limonciello Acked-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- .../gpu/drm/amd/pm/powerplay/hwmgr/pptable_v1_0.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pptable_v1_0.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pptable_v1_0.h index 57bca1e81d3a..9fcad69a9f34 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pptable_v1_0.h +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pptable_v1_0.h @@ -164,7 +164,7 @@ typedef struct _ATOM_Tonga_State { typedef struct _ATOM_Tonga_State_Array { UCHAR ucRevId; UCHAR ucNumEntries; /* Number of entries. */ - ATOM_Tonga_State entries[1]; /* Dynamically allocate entries. */ + ATOM_Tonga_State entries[]; /* Dynamically allocate entries. */ } ATOM_Tonga_State_Array; typedef struct _ATOM_Tonga_MCLK_Dependency_Record { @@ -210,7 +210,7 @@ typedef struct _ATOM_Polaris_SCLK_Dependency_Record { typedef struct _ATOM_Polaris_SCLK_Dependency_Table { UCHAR ucRevId; UCHAR ucNumEntries; /* Number of entries. */ - ATOM_Polaris_SCLK_Dependency_Record entries[1]; /* Dynamically allocate entries. */ + ATOM_Polaris_SCLK_Dependency_Record entries[]; /* Dynamically allocate entries. */ } ATOM_Polaris_SCLK_Dependency_Table; typedef struct _ATOM_Tonga_PCIE_Record { @@ -222,7 +222,7 @@ typedef struct _ATOM_Tonga_PCIE_Record { typedef struct _ATOM_Tonga_PCIE_Table { UCHAR ucRevId; UCHAR ucNumEntries; /* Number of entries. */ - ATOM_Tonga_PCIE_Record entries[1]; /* Dynamically allocate entries. */ + ATOM_Tonga_PCIE_Record entries[]; /* Dynamically allocate entries. */ } ATOM_Tonga_PCIE_Table; typedef struct _ATOM_Polaris10_PCIE_Record { @@ -235,7 +235,7 @@ typedef struct _ATOM_Polaris10_PCIE_Record { typedef struct _ATOM_Polaris10_PCIE_Table { UCHAR ucRevId; UCHAR ucNumEntries; /* Number of entries. */ - ATOM_Polaris10_PCIE_Record entries[1]; /* Dynamically allocate entries. */ + ATOM_Polaris10_PCIE_Record entries[]; /* Dynamically allocate entries. */ } ATOM_Polaris10_PCIE_Table; @@ -252,7 +252,7 @@ typedef struct _ATOM_Tonga_MM_Dependency_Record { typedef struct _ATOM_Tonga_MM_Dependency_Table { UCHAR ucRevId; UCHAR ucNumEntries; /* Number of entries. */ - ATOM_Tonga_MM_Dependency_Record entries[1]; /* Dynamically allocate entries. */ + ATOM_Tonga_MM_Dependency_Record entries[]; /* Dynamically allocate entries. */ } ATOM_Tonga_MM_Dependency_Table; typedef struct _ATOM_Tonga_Voltage_Lookup_Record { @@ -265,7 +265,7 @@ typedef struct _ATOM_Tonga_Voltage_Lookup_Record { typedef struct _ATOM_Tonga_Voltage_Lookup_Table { UCHAR ucRevId; UCHAR ucNumEntries; /* Number of entries. */ - ATOM_Tonga_Voltage_Lookup_Record entries[1]; /* Dynamically allocate entries. */ + ATOM_Tonga_Voltage_Lookup_Record entries[]; /* Dynamically allocate entries. */ } ATOM_Tonga_Voltage_Lookup_Table; typedef struct _ATOM_Tonga_Fan_Table { From da46e63482fdc5e35c008865c22ac64027f6f0c2 Mon Sep 17 00:00:00 2001 From: "Stanley.Yang" Date: Wed, 27 Sep 2023 16:22:29 +0800 Subject: [PATCH 0066/1397] drm/amdgpu: Fix potential null pointer derefernce [ Upstream commit 80285ae1ec8717b597b20de38866c29d84d321a1 ] The amdgpu_ras_get_context may return NULL if device not support ras feature, so add check before using. Signed-off-by: Stanley.Yang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 2b8356699f23..69f72bca229c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5183,7 +5183,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, * Flush RAM to disk so that after reboot * the user can read log and see why the system rebooted. */ - if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) { + if (need_emergency_restart && amdgpu_ras_get_context(adev) && + amdgpu_ras_get_context(adev)->reboot) { DRM_WARN("Emergency reboot."); ksys_sync_helper(); From 8a9dd36fcb4f3906982b82593393578db4479992 Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Sat, 7 Oct 2023 11:31:05 +0800 Subject: [PATCH 0067/1397] drm/panel: fix a possible null pointer dereference [ Upstream commit 924e5814d1f84e6fa5cb19c6eceb69f066225229 ] In versatile_panel_get_modes(), the return value of drm_mode_duplicate() is assigned to mode, which will lead to a NULL pointer dereference on failure of drm_mode_duplicate(). Add a check to avoid npd. Signed-off-by: Ma Ke Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20231007033105.3997998-1-make_ruc2021@163.com Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20231007033105.3997998-1-make_ruc2021@163.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/panel/panel-arm-versatile.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/panel/panel-arm-versatile.c b/drivers/gpu/drm/panel/panel-arm-versatile.c index abb0788843c6..503ecea72c5e 100644 --- a/drivers/gpu/drm/panel/panel-arm-versatile.c +++ b/drivers/gpu/drm/panel/panel-arm-versatile.c @@ -267,6 +267,8 @@ static int versatile_panel_get_modes(struct drm_panel *panel, connector->display_info.bus_flags = vpanel->panel_type->bus_flags; mode = drm_mode_duplicate(connector->dev, &vpanel->panel_type->mode); + if (!mode) + return -ENOMEM; drm_mode_set_name(mode); mode->type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED; From eaede6900c0961b072669d6bd97fe8f90ed1900f Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Mon, 9 Oct 2023 17:04:46 +0800 Subject: [PATCH 0068/1397] drm/panel/panel-tpo-tpg110: fix a possible null pointer dereference [ Upstream commit f22def5970c423ea7f87d5247bd0ef91416b0658 ] In tpg110_get_modes(), the return value of drm_mode_duplicate() is assigned to mode, which will lead to a NULL pointer dereference on failure of drm_mode_duplicate(). Add a check to avoid npd. Signed-off-by: Ma Ke Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20231009090446.4043798-1-make_ruc2021@163.com Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20231009090446.4043798-1-make_ruc2021@163.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/panel/panel-tpo-tpg110.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/panel/panel-tpo-tpg110.c b/drivers/gpu/drm/panel/panel-tpo-tpg110.c index 845304435e23..f6a212e542cb 100644 --- a/drivers/gpu/drm/panel/panel-tpo-tpg110.c +++ b/drivers/gpu/drm/panel/panel-tpo-tpg110.c @@ -379,6 +379,8 @@ static int tpg110_get_modes(struct drm_panel *panel, connector->display_info.bus_flags = tpg->panel_mode->bus_flags; mode = drm_mode_duplicate(connector->dev, &tpg->panel_mode->mode); + if (!mode) + return -ENOMEM; drm_mode_set_name(mode); mode->type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED; From c0fe7aacc32e482d78b5e1eb30e3b6270c47298b Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Wed, 11 Oct 2023 09:21:43 +0800 Subject: [PATCH 0069/1397] drm/radeon: fix a possible null pointer dereference [ Upstream commit 2c1fe3c480f9e1deefd50d4b18be4a046011ee1f ] In radeon_tv_get_modes(), the return value of drm_cvt_mode() is assigned to mode, which will lead to a NULL pointer dereference on failure of drm_cvt_mode(). Add a check to avoid null point dereference. Signed-off-by: Ma Ke Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/radeon/radeon_connectors.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index d2f02c3dfce2..b84b58926106 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -1119,6 +1119,8 @@ static int radeon_tv_get_modes(struct drm_connector *connector) else { /* only 800x600 is supported right now on pre-avivo chips */ tv_mode = drm_cvt_mode(dev, 800, 600, 60, false, false, false); + if (!tv_mode) + return 0; tv_mode->type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED; drm_mode_probed_add(connector, tv_mode); } From 70f831f21155c692bb336c434936fd6f24f3f81a Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Fri, 13 Oct 2023 09:53:43 +0800 Subject: [PATCH 0070/1397] drm/amdgpu/vkms: fix a possible null pointer dereference [ Upstream commit cd90511557fdfb394bb4ac4c3b539b007383914c ] In amdgpu_vkms_conn_get_modes(), the return value of drm_cvt_mode() is assigned to mode, which will lead to a NULL pointer dereference on failure of drm_cvt_mode(). Add a check to avoid null pointer dereference. Signed-off-by: Ma Ke Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index 7148a216ae2f..db6fc0cb18eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -239,6 +239,8 @@ static int amdgpu_vkms_conn_get_modes(struct drm_connector *connector) for (i = 0; i < ARRAY_SIZE(common_modes); i++) { mode = drm_cvt_mode(dev, common_modes[i].w, common_modes[i].h, 60, false, false, false); + if (!mode) + continue; drm_mode_probed_add(connector, mode); } From 1afe397315a8cbdfde070bf815c67dd5c4e6ee77 Mon Sep 17 00:00:00 2001 From: Ondrej Jirman Date: Sat, 11 Feb 2023 18:17:48 +0100 Subject: [PATCH 0071/1397] drm/panel: st7703: Pick different reset sequence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d12d635bb03c7cb4830acb641eb176ee9ff2aa89 ] Switching to a different reset sequence, enabling IOVCC before enabling VCC. There also needs to be a delay after enabling the supplies and before deasserting the reset. The datasheet specifies 1ms after the supplies reach the required voltage. Use 10-20ms to also give the power supplies some time to reach the required voltage, too. This fixes intermittent panel initialization failures and screen corruption during resume from sleep on panel xingbangda,xbd599 (e.g. used in PinePhone). Signed-off-by: Ondrej Jirman Signed-off-by: Frank Oltmanns Reported-by: Samuel Holland Reviewed-by: Guido Günther Tested-by: Guido Günther Signed-off-by: Guido Günther Link: https://patchwork.freedesktop.org/patch/msgid/20230211171748.36692-2-frank@oltmanns.dev Signed-off-by: Sasha Levin --- drivers/gpu/drm/panel/panel-sitronix-st7703.c | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-sitronix-st7703.c b/drivers/gpu/drm/panel/panel-sitronix-st7703.c index 6a3945639535..7bb723d445ad 100644 --- a/drivers/gpu/drm/panel/panel-sitronix-st7703.c +++ b/drivers/gpu/drm/panel/panel-sitronix-st7703.c @@ -506,29 +506,30 @@ static int st7703_prepare(struct drm_panel *panel) return 0; dev_dbg(ctx->dev, "Resetting the panel\n"); - ret = regulator_enable(ctx->vcc); + gpiod_set_value_cansleep(ctx->reset_gpio, 1); + + ret = regulator_enable(ctx->iovcc); if (ret < 0) { - dev_err(ctx->dev, "Failed to enable vcc supply: %d\n", ret); + dev_err(ctx->dev, "Failed to enable iovcc supply: %d\n", ret); return ret; } - ret = regulator_enable(ctx->iovcc); + + ret = regulator_enable(ctx->vcc); if (ret < 0) { - dev_err(ctx->dev, "Failed to enable iovcc supply: %d\n", ret); - goto disable_vcc; + dev_err(ctx->dev, "Failed to enable vcc supply: %d\n", ret); + regulator_disable(ctx->iovcc); + return ret; } - gpiod_set_value_cansleep(ctx->reset_gpio, 1); - usleep_range(20, 40); + /* Give power supplies time to stabilize before deasserting reset. */ + usleep_range(10000, 20000); + gpiod_set_value_cansleep(ctx->reset_gpio, 0); - msleep(20); + usleep_range(15000, 20000); ctx->prepared = true; return 0; - -disable_vcc: - regulator_disable(ctx->vcc); - return ret; } static const u32 mantix_bus_formats[] = { From 56649c43d40ce0147465a2d5756d300e87f9ee1c Mon Sep 17 00:00:00 2001 From: Jesse Zhang Date: Fri, 20 Oct 2023 09:43:51 +0800 Subject: [PATCH 0072/1397] drm/amdkfd: Fix shift out-of-bounds issue [ Upstream commit 282c1d793076c2edac6c3db51b7e8ed2b41d60a5 ] [ 567.613292] shift exponent 255 is too large for 64-bit type 'long unsigned int' [ 567.614498] CPU: 5 PID: 238 Comm: kworker/5:1 Tainted: G OE 6.2.0-34-generic #34~22.04.1-Ubuntu [ 567.614502] Hardware name: AMD Splinter/Splinter-RPL, BIOS WS43927N_871 09/25/2023 [ 567.614504] Workqueue: events send_exception_work_handler [amdgpu] [ 567.614748] Call Trace: [ 567.614750] [ 567.614753] dump_stack_lvl+0x48/0x70 [ 567.614761] dump_stack+0x10/0x20 [ 567.614763] __ubsan_handle_shift_out_of_bounds+0x156/0x310 [ 567.614769] ? srso_alias_return_thunk+0x5/0x7f [ 567.614773] ? update_sd_lb_stats.constprop.0+0xf2/0x3c0 [ 567.614780] svm_range_split_by_granularity.cold+0x2b/0x34 [amdgpu] [ 567.615047] ? srso_alias_return_thunk+0x5/0x7f [ 567.615052] svm_migrate_to_ram+0x185/0x4d0 [amdgpu] [ 567.615286] do_swap_page+0x7b6/0xa30 [ 567.615291] ? srso_alias_return_thunk+0x5/0x7f [ 567.615294] ? __free_pages+0x119/0x130 [ 567.615299] handle_pte_fault+0x227/0x280 [ 567.615303] __handle_mm_fault+0x3c0/0x720 [ 567.615311] handle_mm_fault+0x119/0x330 [ 567.615314] ? lock_mm_and_find_vma+0x44/0x250 [ 567.615318] do_user_addr_fault+0x1a9/0x640 [ 567.615323] exc_page_fault+0x81/0x1b0 [ 567.615328] asm_exc_page_fault+0x27/0x30 [ 567.615332] RIP: 0010:__get_user_8+0x1c/0x30 Signed-off-by: Jesse Zhang Suggested-by: Philip Yang Reviewed-by: Yifan Zhang Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 2591bdfcc228..63ce30ea6891 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -767,7 +767,7 @@ svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange, prange->flags &= ~attrs[i].value; break; case KFD_IOCTL_SVM_ATTR_GRANULARITY: - prange->granularity = attrs[i].value; + prange->granularity = min_t(uint32_t, attrs[i].value, 0x3F); break; default: WARN_ONCE(1, "svm_range_check_attrs wasn't called?"); From ba3c0796d292de84f2932cc5bbb0f771fc720996 Mon Sep 17 00:00:00 2001 From: Qu Huang Date: Mon, 23 Oct 2023 12:56:37 +0000 Subject: [PATCH 0073/1397] drm/amdgpu: Fix a null pointer access when the smc_rreg pointer is NULL [ Upstream commit 5104fdf50d326db2c1a994f8b35dcd46e63ae4ad ] In certain types of chips, such as VEGA20, reading the amdgpu_regs_smc file could result in an abnormal null pointer access when the smc_rreg pointer is NULL. Below are the steps to reproduce this issue and the corresponding exception log: 1. Navigate to the directory: /sys/kernel/debug/dri/0 2. Execute command: cat amdgpu_regs_smc 3. Exception Log:: [4005007.702554] BUG: kernel NULL pointer dereference, address: 0000000000000000 [4005007.702562] #PF: supervisor instruction fetch in kernel mode [4005007.702567] #PF: error_code(0x0010) - not-present page [4005007.702570] PGD 0 P4D 0 [4005007.702576] Oops: 0010 [#1] SMP NOPTI [4005007.702581] CPU: 4 PID: 62563 Comm: cat Tainted: G OE 5.15.0-43-generic #46-Ubunt u [4005007.702590] RIP: 0010:0x0 [4005007.702598] Code: Unable to access opcode bytes at RIP 0xffffffffffffffd6. [4005007.702600] RSP: 0018:ffffa82b46d27da0 EFLAGS: 00010206 [4005007.702605] RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffa82b46d27e68 [4005007.702609] RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffff9940656e0000 [4005007.702612] RBP: ffffa82b46d27dd8 R08: 0000000000000000 R09: ffff994060c07980 [4005007.702615] R10: 0000000000020000 R11: 0000000000000000 R12: 00007f5e06753000 [4005007.702618] R13: ffff9940656e0000 R14: ffffa82b46d27e68 R15: 00007f5e06753000 [4005007.702622] FS: 00007f5e0755b740(0000) GS:ffff99479d300000(0000) knlGS:0000000000000000 [4005007.702626] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [4005007.702629] CR2: ffffffffffffffd6 CR3: 00000003253fc000 CR4: 00000000003506e0 [4005007.702633] Call Trace: [4005007.702636] [4005007.702640] amdgpu_debugfs_regs_smc_read+0xb0/0x120 [amdgpu] [4005007.703002] full_proxy_read+0x5c/0x80 [4005007.703011] vfs_read+0x9f/0x1a0 [4005007.703019] ksys_read+0x67/0xe0 [4005007.703023] __x64_sys_read+0x19/0x20 [4005007.703028] do_syscall_64+0x5c/0xc0 [4005007.703034] ? do_user_addr_fault+0x1e3/0x670 [4005007.703040] ? exit_to_user_mode_prepare+0x37/0xb0 [4005007.703047] ? irqentry_exit_to_user_mode+0x9/0x20 [4005007.703052] ? irqentry_exit+0x19/0x30 [4005007.703057] ? exc_page_fault+0x89/0x160 [4005007.703062] ? asm_exc_page_fault+0x8/0x30 [4005007.703068] entry_SYSCALL_64_after_hwframe+0x44/0xae [4005007.703075] RIP: 0033:0x7f5e07672992 [4005007.703079] Code: c0 e9 b2 fe ff ff 50 48 8d 3d fa b2 0c 00 e8 c5 1d 02 00 0f 1f 44 00 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 0f 05 <48> 3d 00 f0 ff ff 77 56 c3 0f 1f 44 00 00 48 83 e c 28 48 89 54 24 [4005007.703083] RSP: 002b:00007ffe03097898 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [4005007.703088] RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007f5e07672992 [4005007.703091] RDX: 0000000000020000 RSI: 00007f5e06753000 RDI: 0000000000000003 [4005007.703094] RBP: 00007f5e06753000 R08: 00007f5e06752010 R09: 00007f5e06752010 [4005007.703096] R10: 0000000000000022 R11: 0000000000000246 R12: 0000000000022000 [4005007.703099] R13: 0000000000000003 R14: 0000000000020000 R15: 0000000000020000 [4005007.703105] [4005007.703107] Modules linked in: nf_tables libcrc32c nfnetlink algif_hash af_alg binfmt_misc nls_ iso8859_1 ipmi_ssif ast intel_rapl_msr intel_rapl_common drm_vram_helper drm_ttm_helper amd64_edac t tm edac_mce_amd kvm_amd ccp mac_hid k10temp kvm acpi_ipmi ipmi_si rapl sch_fq_codel ipmi_devintf ipm i_msghandler msr parport_pc ppdev lp parport mtd pstore_blk efi_pstore ramoops pstore_zone reed_solo mon ip_tables x_tables autofs4 ib_uverbs ib_core amdgpu(OE) amddrm_ttm_helper(OE) amdttm(OE) iommu_v 2 amd_sched(OE) amdkcl(OE) drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops cec rc_core drm igb ahci xhci_pci libahci i2c_piix4 i2c_algo_bit xhci_pci_renesas dca [4005007.703184] CR2: 0000000000000000 [4005007.703188] ---[ end trace ac65a538d240da39 ]--- [4005007.800865] RIP: 0010:0x0 [4005007.800871] Code: Unable to access opcode bytes at RIP 0xffffffffffffffd6. [4005007.800874] RSP: 0018:ffffa82b46d27da0 EFLAGS: 00010206 [4005007.800878] RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffa82b46d27e68 [4005007.800881] RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffff9940656e0000 [4005007.800883] RBP: ffffa82b46d27dd8 R08: 0000000000000000 R09: ffff994060c07980 [4005007.800886] R10: 0000000000020000 R11: 0000000000000000 R12: 00007f5e06753000 [4005007.800888] R13: ffff9940656e0000 R14: ffffa82b46d27e68 R15: 00007f5e06753000 [4005007.800891] FS: 00007f5e0755b740(0000) GS:ffff99479d300000(0000) knlGS:0000000000000000 [4005007.800895] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [4005007.800898] CR2: ffffffffffffffd6 CR3: 00000003253fc000 CR4: 00000000003506e0 Signed-off-by: Qu Huang Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index a4faea4fa0b5..05405da51e7a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -748,6 +748,9 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, ssize_t result = 0; int r; + if (!adev->smc_rreg) + return -EPERM; + if (size & 0x3 || *pos & 0x3) return -EINVAL; @@ -804,6 +807,9 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user * ssize_t result = 0; int r; + if (!adev->smc_wreg) + return -EPERM; + if (size & 0x3 || *pos & 0x3) return -EINVAL; From 745682e634ca92b3704991ae7b5abb0798e1571e Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 23 Oct 2023 15:42:00 -0500 Subject: [PATCH 0074/1397] drm/amd: Disable PP_PCIE_DPM_MASK when dynamic speed switching not supported [ Upstream commit fbf1035b033a51eee48d5f42e781b02fff272ca0 ] Rather than individual ASICs checking for the quirk, set the quirk at the driver level. Signed-off-by: Mario Limonciello Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 ++ drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c | 4 +--- drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 2 +- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 69f72bca229c..b9fd755419fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2103,6 +2103,8 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) adev->pm.pp_feature &= ~PP_GFXOFF_MASK; if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID) adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK; + if (!amdgpu_device_pcie_dynamic_switching_supported()) + adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK; total = true; for (i = 0; i < adev->num_ip_blocks; i++) { diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c index 5a2371484a58..11372fcc59c8 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c @@ -1823,9 +1823,7 @@ static void smu7_init_dpm_defaults(struct pp_hwmgr *hwmgr) data->mclk_dpm_key_disabled = hwmgr->feature_mask & PP_MCLK_DPM_MASK ? false : true; data->sclk_dpm_key_disabled = hwmgr->feature_mask & PP_SCLK_DPM_MASK ? false : true; - data->pcie_dpm_key_disabled = - !amdgpu_device_pcie_dynamic_switching_supported() || - !(hwmgr->feature_mask & PP_PCIE_DPM_MASK); + data->pcie_dpm_key_disabled = !(hwmgr->feature_mask & PP_PCIE_DPM_MASK); /* need to set voltage control types before EVV patching */ data->voltage_control = SMU7_VOLTAGE_CONTROL_NONE; data->vddci_control = SMU7_VOLTAGE_CONTROL_NONE; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 0cc5d9769d38..a7f4f82d23b4 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -2108,7 +2108,7 @@ static int sienna_cichlid_update_pcie_parameters(struct smu_context *smu, min_lane_width = min_lane_width > max_lane_width ? max_lane_width : min_lane_width; - if (!amdgpu_device_pcie_dynamic_switching_supported()) { + if (!(smu->adev->pm.pp_feature & PP_PCIE_DPM_MASK)) { pcie_table->pcie_gen[0] = max_gen_speed; pcie_table->pcie_lane[0] = max_lane_width; } else { diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index a280c1ed007f..4aeb84572e5b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -2430,7 +2430,7 @@ int smu_v13_0_update_pcie_parameters(struct smu_context *smu, uint32_t smu_pcie_arg; int ret, i; - if (!amdgpu_device_pcie_dynamic_switching_supported()) { + if (!(smu->adev->pm.pp_feature & PP_PCIE_DPM_MASK)) { if (pcie_table->pcie_gen[num_of_levels - 1] < pcie_gen_cap) pcie_gen_cap = pcie_table->pcie_gen[num_of_levels - 1]; From b58ace0e8c981ab884d5014369aea9f3d611d5e4 Mon Sep 17 00:00:00 2001 From: Samson Tam Date: Thu, 5 Oct 2023 01:31:12 -0400 Subject: [PATCH 0075/1397] drm/amd/display: fix num_ways overflow error [ Upstream commit 79f3f1b66753b3a3a269d73676bf50987921f267 ] [Why] Helper function calculates num_ways using 32-bit. But is returned as 8-bit. If num_ways exceeds 8-bit, then it reports back the incorrect num_ways and erroneously uses MALL when it should not [How] Make returned value 32-bit and convert after it checks against caps.cache_num_ways, which is under 8-bit Reviewed-by: Alvin Lee Acked-by: Roman Li Signed-off-by: Samson Tam Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c index 018376146d97..be59e1c02f8a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c @@ -214,7 +214,7 @@ static bool dcn32_check_no_memory_request_for_cab(struct dc *dc) static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *ctx) { int i; - uint8_t num_ways = 0; + uint32_t num_ways = 0; uint32_t mall_ss_size_bytes = 0; mall_ss_size_bytes = ctx->bw_ctx.bw.dcn.mall_ss_size_bytes; @@ -244,7 +244,8 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c bool dcn32_apply_idle_power_optimizations(struct dc *dc, bool enable) { union dmub_rb_cmd cmd; - uint8_t ways, i; + uint8_t i; + uint32_t ways; int j; bool mall_ss_unsupported = false; struct dc_plane_state *plane = NULL; @@ -304,7 +305,7 @@ bool dcn32_apply_idle_power_optimizations(struct dc *dc, bool enable) cmd.cab.header.type = DMUB_CMD__CAB_FOR_SS; cmd.cab.header.sub_type = DMUB_CMD__CAB_DCN_SS_FIT_IN_CAB; cmd.cab.header.payload_bytes = sizeof(cmd.cab) - sizeof(cmd.cab.header); - cmd.cab.cab_alloc_ways = ways; + cmd.cab.cab_alloc_ways = (uint8_t)ways; dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT); From 09f617219fe9ccd8d7b65dc3e879b5889f663b5a Mon Sep 17 00:00:00 2001 From: "Lin.Cao" Date: Wed, 25 Oct 2023 11:32:41 +0800 Subject: [PATCH 0076/1397] drm/amd: check num of link levels when update pcie param [ Upstream commit 406e8845356d18bdf3d3a23b347faf67706472ec ] In SR-IOV environment, the value of pcie_table->num_of_link_levels will be 0, and num_of_levels - 1 will cause array index out of bounds Signed-off-by: Lin.Cao Acked-by: Jingwen Chen Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 4aeb84572e5b..5355f621388b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -2430,6 +2430,9 @@ int smu_v13_0_update_pcie_parameters(struct smu_context *smu, uint32_t smu_pcie_arg; int ret, i; + if (!num_of_levels) + return 0; + if (!(smu->adev->pm.pp_feature & PP_PCIE_DPM_MASK)) { if (pcie_table->pcie_gen[num_of_levels - 1] < pcie_gen_cap) pcie_gen_cap = pcie_table->pcie_gen[num_of_levels - 1]; From bce7b184f2698a2bdf73156d0256bf8cef36a4ed Mon Sep 17 00:00:00 2001 From: Lu Hongfei Date: Mon, 12 Jun 2023 21:34:52 +0800 Subject: [PATCH 0077/1397] soc: qcom: pmic: Fix resource leaks in a device_for_each_child_node() loop [ Upstream commit 5692aeea5bcb9331e956628c3bc8fc9afcc9765d ] The device_for_each_child_node loop should call fwnode_handle_put() before return in the error cases, to avoid resource leaks. Let's fix this bug in pmic_glink_altmode_probe(). Signed-off-by: Lu Hongfei Link: https://lore.kernel.org/r/20230612133452.47315-1-luhongfei@vivo.com [bjorn: Rebased patch, moved fw_handle_put() from jump target into the loop] Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- drivers/soc/qcom/pmic_glink_altmode.c | 30 ++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/drivers/soc/qcom/pmic_glink_altmode.c b/drivers/soc/qcom/pmic_glink_altmode.c index 974c14d1e0bf..6f8b2f7ae3cc 100644 --- a/drivers/soc/qcom/pmic_glink_altmode.c +++ b/drivers/soc/qcom/pmic_glink_altmode.c @@ -444,6 +444,7 @@ static int pmic_glink_altmode_probe(struct auxiliary_device *adev, ret = fwnode_property_read_u32(fwnode, "reg", &port); if (ret < 0) { dev_err(dev, "missing reg property of %pOFn\n", fwnode); + fwnode_handle_put(fwnode); return ret; } @@ -454,6 +455,7 @@ static int pmic_glink_altmode_probe(struct auxiliary_device *adev, if (altmode->ports[port].altmode) { dev_err(dev, "multiple connector definition for port %u\n", port); + fwnode_handle_put(fwnode); return -EINVAL; } @@ -468,45 +470,59 @@ static int pmic_glink_altmode_probe(struct auxiliary_device *adev, alt_port->bridge.type = DRM_MODE_CONNECTOR_DisplayPort; ret = devm_drm_bridge_add(dev, &alt_port->bridge); - if (ret) + if (ret) { + fwnode_handle_put(fwnode); return ret; + } alt_port->dp_alt.svid = USB_TYPEC_DP_SID; alt_port->dp_alt.mode = USB_TYPEC_DP_MODE; alt_port->dp_alt.active = 1; alt_port->typec_mux = fwnode_typec_mux_get(fwnode); - if (IS_ERR(alt_port->typec_mux)) + if (IS_ERR(alt_port->typec_mux)) { + fwnode_handle_put(fwnode); return dev_err_probe(dev, PTR_ERR(alt_port->typec_mux), "failed to acquire mode-switch for port: %d\n", port); + } ret = devm_add_action_or_reset(dev, pmic_glink_altmode_put_mux, alt_port->typec_mux); - if (ret) + if (ret) { + fwnode_handle_put(fwnode); return ret; + } alt_port->typec_retimer = fwnode_typec_retimer_get(fwnode); - if (IS_ERR(alt_port->typec_retimer)) + if (IS_ERR(alt_port->typec_retimer)) { + fwnode_handle_put(fwnode); return dev_err_probe(dev, PTR_ERR(alt_port->typec_retimer), "failed to acquire retimer-switch for port: %d\n", port); + } ret = devm_add_action_or_reset(dev, pmic_glink_altmode_put_retimer, alt_port->typec_retimer); - if (ret) + if (ret) { + fwnode_handle_put(fwnode); return ret; + } alt_port->typec_switch = fwnode_typec_switch_get(fwnode); - if (IS_ERR(alt_port->typec_switch)) + if (IS_ERR(alt_port->typec_switch)) { + fwnode_handle_put(fwnode); return dev_err_probe(dev, PTR_ERR(alt_port->typec_switch), "failed to acquire orientation-switch for port: %d\n", port); + } ret = devm_add_action_or_reset(dev, pmic_glink_altmode_put_switch, alt_port->typec_switch); - if (ret) + if (ret) { + fwnode_handle_put(fwnode); return ret; + } } altmode->client = devm_pmic_glink_register_client(dev, From e93c90f25fcee99881f902c9a98544feef2d5beb Mon Sep 17 00:00:00 2001 From: John Clark Date: Wed, 6 Sep 2023 01:23:05 +0000 Subject: [PATCH 0078/1397] arm64: dts: rockchip: Add NanoPC T6 PCIe e-key support [ Upstream commit ac76b786cc370b000c76f3115a5d2ee76ff05c08 ] before ~~~~ 0000:00:00.0 PCI bridge: Rockchip Electronics Co., Ltd RK3588 (rev 01) 0002:20:00.0 PCI bridge: Rockchip Electronics Co., Ltd RK3588 (rev 01) 0002:21:00.0 Ethernet controller: Realtek Semiconductor Co., Ltd. RTL8125 2.5GbE Controller (rev 05) 0004:40:00.0 PCI bridge: Rockchip Electronics Co., Ltd RK3588 (rev 01) 0004:41:00.0 Ethernet controller: Realtek Semiconductor Co., Ltd. RTL8125 2.5GbE Controller (rev 05) after ~~~ 0000:00:00.0 PCI bridge: Rockchip Electronics Co., Ltd RK3588 (rev 01) 0002:20:00.0 PCI bridge: Rockchip Electronics Co., Ltd RK3588 (rev 01) 0002:21:00.0 Ethernet controller: Realtek Semiconductor Co., Ltd. RTL8125 2.5GbE Controller (rev 05) 0003:30:00.0 PCI bridge: Rockchip Electronics Co., Ltd RK3588 (rev 01) 0003:31:00.0 Network controller: Realtek Semiconductor Co., Ltd. RTL8822CE 802.11ac PCIe Wireless Network Adapter 0004:40:00.0 PCI bridge: Rockchip Electronics Co., Ltd RK3588 (rev 01) 0004:41:00.0 Ethernet controller: Realtek Semiconductor Co., Ltd. RTL8125 2.5GbE Controller (rev 05) Signed-off-by: John Clark Link: https://lore.kernel.org/r/20230906012305.7113-1-inindev@gmail.com Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- .../boot/dts/rockchip/rk3588-nanopc-t6.dts | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-nanopc-t6.dts b/arch/arm64/boot/dts/rockchip/rk3588-nanopc-t6.dts index 0bd80e515754..97af4f912828 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-nanopc-t6.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-nanopc-t6.dts @@ -137,6 +137,18 @@ vin-supply = <&vcc5v0_sys>; }; + vcc3v3_pcie2x1l0: vcc3v3-pcie2x1l0-regulator { + compatible = "regulator-fixed"; + enable-active-high; + gpio = <&gpio4 RK_PC2 GPIO_ACTIVE_HIGH>; + pinctrl-names = "default"; + pinctrl-0 = <&pcie_m2_1_pwren>; + regulator-name = "vcc3v3_pcie2x1l0"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + vin-supply = <&vcc5v0_sys>; + }; + vcc3v3_pcie30: vcc3v3-pcie30-regulator { compatible = "regulator-fixed"; enable-active-high; @@ -421,6 +433,14 @@ status = "okay"; }; +&pcie2x1l1 { + reset-gpios = <&gpio4 RK_PA2 GPIO_ACTIVE_HIGH>; + vpcie3v3-supply = <&vcc3v3_pcie2x1l0>; + pinctrl-names = "default"; + pinctrl-0 = <&pcie2_1_rst>; + status = "okay"; +}; + &pcie2x1l2 { reset-gpios = <&gpio4 RK_PA4 GPIO_ACTIVE_HIGH>; vpcie3v3-supply = <&vcc_3v3_pcie20>; @@ -467,6 +487,10 @@ rockchip,pins = <4 RK_PB3 RK_FUNC_GPIO &pcfg_pull_none>; }; + pcie2_1_rst: pcie2-1-rst { + rockchip,pins = <4 RK_PA2 RK_FUNC_GPIO &pcfg_pull_none>; + }; + pcie2_2_rst: pcie2-2-rst { rockchip,pins = <4 RK_PA4 RK_FUNC_GPIO &pcfg_pull_none>; }; @@ -474,6 +498,10 @@ pcie_m2_0_pwren: pcie-m20-pwren { rockchip,pins = <2 RK_PC5 RK_FUNC_GPIO &pcfg_pull_none>; }; + + pcie_m2_1_pwren: pcie-m21-pwren { + rockchip,pins = <4 RK_PC2 RK_FUNC_GPIO &pcfg_pull_none>; + }; }; usb { From 6b9b8904474dbc4774477e8caa2dac07c87ed994 Mon Sep 17 00:00:00 2001 From: Laurentiu Tudor Date: Mon, 25 Sep 2023 18:10:15 +0300 Subject: [PATCH 0079/1397] arm64: dts: ls208xa: use a pseudo-bus to constrain usb dma size [ Upstream commit b39d5016456871a88f5cd141914a5043591b46f3 ] Wrap the usb controllers in an intermediate simple-bus and use it to constrain the dma address size of these usb controllers to the 40b that they generate toward the interconnect. This is required because the SoC uses 48b address sizes and this mismatch would lead to smmu context faults [1] because the usb generates 40b addresses while the smmu page tables are populated with 48b wide addresses. [1] xhci-hcd xhci-hcd.0.auto: xHCI Host Controller xhci-hcd xhci-hcd.0.auto: new USB bus registered, assigned bus number 1 xhci-hcd xhci-hcd.0.auto: hcc params 0x0220f66d hci version 0x100 quirks 0x0000000002000010 xhci-hcd xhci-hcd.0.auto: irq 108, io mem 0x03100000 xhci-hcd xhci-hcd.0.auto: xHCI Host Controller xhci-hcd xhci-hcd.0.auto: new USB bus registered, assigned bus number 2 xhci-hcd xhci-hcd.0.auto: Host supports USB 3.0 SuperSpeed arm-smmu 5000000.iommu: Unhandled context fault: fsr=0x402, iova=0xffffffb000, fsynr=0x0, cbfrsynra=0xc01, cb=3 Signed-off-by: Laurentiu Tudor Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- .../arm64/boot/dts/freescale/fsl-ls208xa.dtsi | 46 +++++++++++-------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi index d2f5345d0560..717288bbdb8b 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi @@ -1186,26 +1186,34 @@ dma-coherent; }; - usb0: usb@3100000 { - status = "disabled"; - compatible = "snps,dwc3"; - reg = <0x0 0x3100000 0x0 0x10000>; - interrupts = <0 80 0x4>; /* Level high type */ - dr_mode = "host"; - snps,quirk-frame-length-adjustment = <0x20>; - snps,dis_rxdet_inp3_quirk; - snps,incr-burst-type-adjustment = <1>, <4>, <8>, <16>; - }; + bus: bus { + #address-cells = <2>; + #size-cells = <2>; + compatible = "simple-bus"; + ranges; + dma-ranges = <0x0 0x0 0x0 0x0 0x100 0x00000000>; + + usb0: usb@3100000 { + compatible = "snps,dwc3"; + reg = <0x0 0x3100000 0x0 0x10000>; + interrupts = <0 80 0x4>; /* Level high type */ + dr_mode = "host"; + snps,quirk-frame-length-adjustment = <0x20>; + snps,dis_rxdet_inp3_quirk; + snps,incr-burst-type-adjustment = <1>, <4>, <8>, <16>; + status = "disabled"; + }; - usb1: usb@3110000 { - status = "disabled"; - compatible = "snps,dwc3"; - reg = <0x0 0x3110000 0x0 0x10000>; - interrupts = <0 81 0x4>; /* Level high type */ - dr_mode = "host"; - snps,quirk-frame-length-adjustment = <0x20>; - snps,dis_rxdet_inp3_quirk; - snps,incr-burst-type-adjustment = <1>, <4>, <8>, <16>; + usb1: usb@3110000 { + compatible = "snps,dwc3"; + reg = <0x0 0x3110000 0x0 0x10000>; + interrupts = <0 81 0x4>; /* Level high type */ + dr_mode = "host"; + snps,quirk-frame-length-adjustment = <0x20>; + snps,dis_rxdet_inp3_quirk; + snps,incr-burst-type-adjustment = <1>, <4>, <8>, <16>; + status = "disabled"; + }; }; ccn@4000000 { From 87215fbda37d5eceff83fa0e0003cd677cfda2fb Mon Sep 17 00:00:00 2001 From: zhujun2 Date: Tue, 17 Oct 2023 18:59:21 -0700 Subject: [PATCH 0080/1397] selftests/efivarfs: create-read: fix a resource leak [ Upstream commit 3f6f8a8c5e11a9b384a36df4f40f0c9a653b6975 ] The opened file should be closed in main(), otherwise resource leak will occur that this problem was discovered by code reading Signed-off-by: zhujun2 Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin --- tools/testing/selftests/efivarfs/create-read.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/efivarfs/create-read.c b/tools/testing/selftests/efivarfs/create-read.c index 9674a19396a3..7bc7af4eb2c1 100644 --- a/tools/testing/selftests/efivarfs/create-read.c +++ b/tools/testing/selftests/efivarfs/create-read.c @@ -32,8 +32,10 @@ int main(int argc, char **argv) rc = read(fd, buf, sizeof(buf)); if (rc != 0) { fprintf(stderr, "Reading a new var should return EOF\n"); + close(fd); return EXIT_FAILURE; } + close(fd); return EXIT_SUCCESS; } From a3e98ceff6e9ee7e6f012e704f51af4cdfafc41a Mon Sep 17 00:00:00 2001 From: Trevor Wu Date: Fri, 25 Aug 2023 10:49:33 +0800 Subject: [PATCH 0081/1397] ASoC: mediatek: mt8188-mt6359: support dynamic pinctrl [ Upstream commit d601bb78f06b9e3cbb52e6b87b88add9920a11b6 ] To avoid power leakage, it is recommended to replace the default pinctrl state with dynamic pinctrl since certain audio pinmux functions can remain in a HIGH state even when audio is disabled. Linking pinctrl with DAPM using SND_SOC_DAPM_PINCTRL will ensure that audio pins remain in GPIO mode by default and only switch to an audio function when necessary. Signed-off-by: Trevor Wu Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20230825024935.10878-2-trevor.wu@mediatek.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/mediatek/mt8188/mt8188-mt6359.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/sound/soc/mediatek/mt8188/mt8188-mt6359.c b/sound/soc/mediatek/mt8188/mt8188-mt6359.c index 9017f48b6272..f7e22abb7584 100644 --- a/sound/soc/mediatek/mt8188/mt8188-mt6359.c +++ b/sound/soc/mediatek/mt8188/mt8188-mt6359.c @@ -246,6 +246,11 @@ static const struct snd_soc_dapm_widget mt8188_mt6359_widgets[] = { SND_SOC_DAPM_MIC("Headset Mic", NULL), SND_SOC_DAPM_SINK("HDMI"), SND_SOC_DAPM_SINK("DP"), + + /* dynamic pinctrl */ + SND_SOC_DAPM_PINCTRL("ETDM_SPK_PIN", "aud_etdm_spk_on", "aud_etdm_spk_off"), + SND_SOC_DAPM_PINCTRL("ETDM_HP_PIN", "aud_etdm_hp_on", "aud_etdm_hp_off"), + SND_SOC_DAPM_PINCTRL("MTKAIF_PIN", "aud_mtkaif_on", "aud_mtkaif_off"), }; static const struct snd_kcontrol_new mt8188_mt6359_controls[] = { @@ -267,6 +272,7 @@ static int mt8188_mt6359_mtkaif_calibration(struct snd_soc_pcm_runtime *rtd) snd_soc_rtdcom_lookup(rtd, AFE_PCM_NAME); struct snd_soc_component *cmpnt_codec = asoc_rtd_to_codec(rtd, 0)->component; + struct snd_soc_dapm_widget *pin_w = NULL, *w; struct mtk_base_afe *afe; struct mt8188_afe_private *afe_priv; struct mtkaif_param *param; @@ -306,6 +312,18 @@ static int mt8188_mt6359_mtkaif_calibration(struct snd_soc_pcm_runtime *rtd) return 0; } + for_each_card_widgets(rtd->card, w) { + if (!strcmp(w->name, "MTKAIF_PIN")) { + pin_w = w; + break; + } + } + + if (pin_w) + dapm_pinctrl_event(pin_w, NULL, SND_SOC_DAPM_PRE_PMU); + else + dev_dbg(afe->dev, "%s(), no pinmux widget, please check if default on\n", __func__); + pm_runtime_get_sync(afe->dev); mt6359_mtkaif_calibration_enable(cmpnt_codec); @@ -403,6 +421,9 @@ static int mt8188_mt6359_mtkaif_calibration(struct snd_soc_pcm_runtime *rtd) for (i = 0; i < MT8188_MTKAIF_MISO_NUM; i++) param->mtkaif_phase_cycle[i] = mtkaif_phase_cycle[i]; + if (pin_w) + dapm_pinctrl_event(pin_w, NULL, SND_SOC_DAPM_POST_PMD); + dev_dbg(afe->dev, "%s(), end, calibration ok %d\n", __func__, param->mtkaif_calibration_ok); From c766264e0e9e9a2f0f57b65ed0c98bab67cfdf2f Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 12 Sep 2023 17:32:04 +0100 Subject: [PATCH 0082/1397] ASoC: soc-card: Add storage for PCI SSID [ Upstream commit 47f56e38a199bd45514b8e0142399cba4feeaf1a ] Add members to struct snd_soc_card to store the PCI subsystem ID (SSID) of the soundcard. The PCI specification provides two registers to store a vendor-specific SSID that can be read by drivers to uniquely identify a particular "soundcard". This is defined in the PCI specification to distinguish products that use the same silicon (and therefore have the same silicon ID) so that product-specific differences can be applied. PCI only defines 0xFFFF as an invalid value. 0x0000 is not defined as invalid. So the usual pattern of zero-filling the struct and then assuming a zero value unset will not work. A flag is included to indicate when the SSID information has been filled in. Unlike DMI information, which has a free-format entirely up to the vendor, the PCI SSID has a strictly defined format and a registry of vendor IDs. It is usual in Windows drivers that the SSID is used as the sole identifier of the specific end-product and the Windows driver contains tables mapping that to information about the hardware setup, rather than using ACPI properties. This SSID is important information for ASoC components that need to apply hardware-specific configuration on PCI-based systems. As the SSID is a generic part of the PCI specification and is treated as identifying the "soundcard", it is reasonable to include this information in struct snd_soc_card, instead of components inventing their own custom ways to pass this information around. Signed-off-by: Richard Fitzgerald Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20230912163207.3498161-2-rf@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- include/sound/soc-card.h | 37 +++++++++++++++++++++++++++++++++++++ include/sound/soc.h | 11 +++++++++++ 2 files changed, 48 insertions(+) diff --git a/include/sound/soc-card.h b/include/sound/soc-card.h index fc94dfb0021f..e8ff2e089cd0 100644 --- a/include/sound/soc-card.h +++ b/include/sound/soc-card.h @@ -59,6 +59,43 @@ int snd_soc_card_add_dai_link(struct snd_soc_card *card, void snd_soc_card_remove_dai_link(struct snd_soc_card *card, struct snd_soc_dai_link *dai_link); +#ifdef CONFIG_PCI +static inline void snd_soc_card_set_pci_ssid(struct snd_soc_card *card, + unsigned short vendor, + unsigned short device) +{ + card->pci_subsystem_vendor = vendor; + card->pci_subsystem_device = device; + card->pci_subsystem_set = true; +} + +static inline int snd_soc_card_get_pci_ssid(struct snd_soc_card *card, + unsigned short *vendor, + unsigned short *device) +{ + if (!card->pci_subsystem_set) + return -ENOENT; + + *vendor = card->pci_subsystem_vendor; + *device = card->pci_subsystem_device; + + return 0; +} +#else /* !CONFIG_PCI */ +static inline void snd_soc_card_set_pci_ssid(struct snd_soc_card *card, + unsigned short vendor, + unsigned short device) +{ +} + +static inline int snd_soc_card_get_pci_ssid(struct snd_soc_card *card, + unsigned short *vendor, + unsigned short *device) +{ + return -ENOENT; +} +#endif /* CONFIG_PCI */ + /* device driver data */ static inline void snd_soc_card_set_drvdata(struct snd_soc_card *card, void *data) diff --git a/include/sound/soc.h b/include/sound/soc.h index 37f9d3fe302a..49ec688eed60 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -932,6 +932,17 @@ struct snd_soc_card { #ifdef CONFIG_DMI char dmi_longname[80]; #endif /* CONFIG_DMI */ + +#ifdef CONFIG_PCI + /* + * PCI does not define 0 as invalid, so pci_subsystem_set indicates + * whether a value has been written to these fields. + */ + unsigned short pci_subsystem_vendor; + unsigned short pci_subsystem_device; + bool pci_subsystem_set; +#endif /* CONFIG_PCI */ + char topology_shortname[32]; struct device *dev; From 17560515ae9204a2fccc3b5c1e8a9cb3d3ca369a Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 12 Sep 2023 17:32:05 +0100 Subject: [PATCH 0083/1397] ASoC: SOF: Pass PCI SSID to machine driver [ Upstream commit ba2de401d32625fe538d3f2c00ca73740dd2d516 ] Pass the PCI SSID of the audio interface through to the machine driver. This allows the machine driver to use the SSID to uniquely identify the specific hardware configuration and apply any platform-specific configuration. struct snd_sof_pdata is passed around inside the SOF code, but it then passes configuration information to the machine driver through struct snd_soc_acpi_mach and struct snd_soc_acpi_mach_params. So SSID information has been added to both snd_sof_pdata and snd_soc_acpi_mach_params. PCI does not define 0x0000 as an invalid value so we can't use zero to indicate that the struct member was not written. Instead a flag is included to indicate that a value has been written to the subsystem_vendor and subsystem_device members. sof_pci_probe() creates the struct snd_sof_pdata. It is passed a struct pci_dev so it can fill in the SSID value. sof_machine_check() finds the appropriate struct snd_soc_acpi_mach. It copies the SSID information across to the struct snd_soc_acpi_mach_params. This done before calling any custom set_mach_params() so that it could be used by the set_mach_params() callback to apply variant params. The machine driver receives the struct snd_soc_acpi_mach as its platform_data. Signed-off-by: Richard Fitzgerald Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20230912163207.3498161-3-rf@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- include/sound/soc-acpi.h | 7 +++++++ include/sound/sof.h | 8 ++++++++ sound/soc/sof/sof-audio.c | 7 +++++++ sound/soc/sof/sof-pci-dev.c | 8 ++++++++ 4 files changed, 30 insertions(+) diff --git a/include/sound/soc-acpi.h b/include/sound/soc-acpi.h index 6d31d535e8f6..23d6d6bfb073 100644 --- a/include/sound/soc-acpi.h +++ b/include/sound/soc-acpi.h @@ -68,6 +68,10 @@ static inline struct snd_soc_acpi_mach *snd_soc_acpi_codec_list(void *arg) * @i2s_link_mask: I2S/TDM links enabled on the board * @num_dai_drivers: number of elements in @dai_drivers * @dai_drivers: pointer to dai_drivers, used e.g. in nocodec mode + * @subsystem_vendor: optional PCI SSID vendor value + * @subsystem_device: optional PCI SSID device value + * @subsystem_id_set: true if a value has been written to + * subsystem_vendor and subsystem_device. */ struct snd_soc_acpi_mach_params { u32 acpi_ipc_irq_index; @@ -80,6 +84,9 @@ struct snd_soc_acpi_mach_params { u32 i2s_link_mask; u32 num_dai_drivers; struct snd_soc_dai_driver *dai_drivers; + unsigned short subsystem_vendor; + unsigned short subsystem_device; + bool subsystem_id_set; }; /** diff --git a/include/sound/sof.h b/include/sound/sof.h index d3c41f87ac31..51294f2ba302 100644 --- a/include/sound/sof.h +++ b/include/sound/sof.h @@ -64,6 +64,14 @@ struct snd_sof_pdata { const char *name; const char *platform; + /* + * PCI SSID. As PCI does not define 0 as invalid, the subsystem_id_set + * flag indicates that a value has been written to these members. + */ + unsigned short subsystem_vendor; + unsigned short subsystem_device; + bool subsystem_id_set; + struct device *dev; /* diff --git a/sound/soc/sof/sof-audio.c b/sound/soc/sof/sof-audio.c index e5405f854a91..563fe6f7789f 100644 --- a/sound/soc/sof/sof-audio.c +++ b/sound/soc/sof/sof-audio.c @@ -1032,6 +1032,13 @@ int sof_machine_check(struct snd_sof_dev *sdev) mach = snd_sof_machine_select(sdev); if (mach) { sof_pdata->machine = mach; + + if (sof_pdata->subsystem_id_set) { + mach->mach_params.subsystem_vendor = sof_pdata->subsystem_vendor; + mach->mach_params.subsystem_device = sof_pdata->subsystem_device; + mach->mach_params.subsystem_id_set = true; + } + snd_sof_set_mach_params(mach, sdev); return 0; } diff --git a/sound/soc/sof/sof-pci-dev.c b/sound/soc/sof/sof-pci-dev.c index f42c85df88a8..69a2352f2e1a 100644 --- a/sound/soc/sof/sof-pci-dev.c +++ b/sound/soc/sof/sof-pci-dev.c @@ -221,6 +221,14 @@ int sof_pci_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) return ret; sof_pdata->name = pci_name(pci); + + /* PCI defines a vendor ID of 0xFFFF as invalid. */ + if (pci->subsystem_vendor != 0xFFFF) { + sof_pdata->subsystem_vendor = pci->subsystem_vendor; + sof_pdata->subsystem_device = pci->subsystem_device; + sof_pdata->subsystem_id_set = true; + } + sof_pdata->desc = desc; sof_pdata->dev = dev; From e9083128b156209d0d18e26b88adf680d069c60b Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 12 Sep 2023 17:32:06 +0100 Subject: [PATCH 0084/1397] ASoC: Intel: sof_sdw: Copy PCI SSID to struct snd_soc_card [ Upstream commit d8b387544ff4d02eda1d1839a0c601de4b037c33 ] If the PCI SSID has been set in the struct snd_soc_acpi_mach_params, copy this to struct snd_soc_card so that it can be used by other ASoC components. This is important for components that must apply system-specific configuration. Signed-off-by: Richard Fitzgerald Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20230912163207.3498161-4-rf@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/intel/boards/sof_sdw.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c index 47d22cab5af6..24e966a2ac2b 100644 --- a/sound/soc/intel/boards/sof_sdw.c +++ b/sound/soc/intel/boards/sof_sdw.c @@ -1934,6 +1934,12 @@ static int mc_probe(struct platform_device *pdev) for (i = 0; i < ARRAY_SIZE(codec_info_list); i++) codec_info_list[i].amp_num = 0; + if (mach->mach_params.subsystem_id_set) { + snd_soc_card_set_pci_ssid(card, + mach->mach_params.subsystem_vendor, + mach->mach_params.subsystem_device); + } + ret = sof_card_dai_links_create(card); if (ret < 0) return ret; From 66a492c7667e5abbd4e21e93b991a523d9735813 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 12 Sep 2023 17:32:07 +0100 Subject: [PATCH 0085/1397] ASoC: cs35l56: Use PCI SSID as the firmware UID [ Upstream commit 1a1c3d794ef65ef2978c5e65e1aed3fe6f014e90 ] If the driver properties do not define a cirrus,firmware-uid try to get the PCI SSID as the UID. On PCI-based systems the PCI SSID is used to uniquely identify the specific sound hardware. This is the standard mechanism for x86 systems and is the way to get a unique system identifier for systems that use the CS35L56 on SoundWire. For non-SoundWire systems there is no Windows equivalent of the ASoC driver in I2C/SPI mode. These would be: 1. HDA systems, which are handled by the HDA subsystem. 2. Linux-specific systems. 3. Composite devices where the cs35l56 is not present in ACPI and is configured using software nodes. Case 2 can use the firmware-uid property, though the PCI SSID is supported as an alternative, as it is the standard PCI mechanism. Case 3 is a SoundWire system where some other codec is the SoundWire bridge device and CS35L56 is not listed in ACPI. As these are SoundWire systems they will normally use the PCI SSID. Signed-off-by: Richard Fitzgerald Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20230912163207.3498161-5-rf@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/cs35l56.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index f9059780b7a7..32d4ab2cd672 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -772,9 +772,20 @@ static int cs35l56_component_probe(struct snd_soc_component *component) { struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(component); struct dentry *debugfs_root = component->debugfs_root; + unsigned short vendor, device; BUILD_BUG_ON(ARRAY_SIZE(cs35l56_tx_input_texts) != ARRAY_SIZE(cs35l56_tx_input_values)); + if (!cs35l56->dsp.system_name && + (snd_soc_card_get_pci_ssid(component->card, &vendor, &device) == 0)) { + cs35l56->dsp.system_name = devm_kasprintf(cs35l56->base.dev, + GFP_KERNEL, + "%04x%04x", + vendor, device); + if (!cs35l56->dsp.system_name) + return -ENOMEM; + } + if (!wait_for_completion_timeout(&cs35l56->init_completion, msecs_to_jiffies(5000))) { dev_err(cs35l56->base.dev, "%s: init_completion timed out\n", __func__); From 372636debe852913529b1716f44addd94fff2d28 Mon Sep 17 00:00:00 2001 From: Lu Jialin Date: Mon, 4 Sep 2023 13:33:41 +0000 Subject: [PATCH 0086/1397] crypto: pcrypt - Fix hungtask for PADATA_RESET [ Upstream commit 8f4f68e788c3a7a696546291258bfa5fdb215523 ] We found a hungtask bug in test_aead_vec_cfg as follows: INFO: task cryptomgr_test:391009 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. Call trace: __switch_to+0x98/0xe0 __schedule+0x6c4/0xf40 schedule+0xd8/0x1b4 schedule_timeout+0x474/0x560 wait_for_common+0x368/0x4e0 wait_for_completion+0x20/0x30 wait_for_completion+0x20/0x30 test_aead_vec_cfg+0xab4/0xd50 test_aead+0x144/0x1f0 alg_test_aead+0xd8/0x1e0 alg_test+0x634/0x890 cryptomgr_test+0x40/0x70 kthread+0x1e0/0x220 ret_from_fork+0x10/0x18 Kernel panic - not syncing: hung_task: blocked tasks For padata_do_parallel, when the return err is 0 or -EBUSY, it will call wait_for_completion(&wait->completion) in test_aead_vec_cfg. In normal case, aead_request_complete() will be called in pcrypt_aead_serial and the return err is 0 for padata_do_parallel. But, when pinst->flags is PADATA_RESET, the return err is -EBUSY for padata_do_parallel, and it won't call aead_request_complete(). Therefore, test_aead_vec_cfg will hung at wait_for_completion(&wait->completion), which will cause hungtask. The problem comes as following: (padata_do_parallel) | rcu_read_lock_bh(); | err = -EINVAL; | (padata_replace) | pinst->flags |= PADATA_RESET; err = -EBUSY | if (pinst->flags & PADATA_RESET) | rcu_read_unlock_bh() | return err In order to resolve the problem, we replace the return err -EBUSY with -EAGAIN, which means parallel_data is changing, and the caller should call it again. v3: remove retry and just change the return err. v2: introduce padata_try_do_parallel() in pcrypt_aead_encrypt and pcrypt_aead_decrypt to solve the hungtask. Signed-off-by: Lu Jialin Signed-off-by: Guo Zihua Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- crypto/pcrypt.c | 4 ++++ kernel/padata.c | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c index 8c1d0ca41213..d0d954fe9d54 100644 --- a/crypto/pcrypt.c +++ b/crypto/pcrypt.c @@ -117,6 +117,8 @@ static int pcrypt_aead_encrypt(struct aead_request *req) err = padata_do_parallel(ictx->psenc, padata, &ctx->cb_cpu); if (!err) return -EINPROGRESS; + if (err == -EBUSY) + return -EAGAIN; return err; } @@ -164,6 +166,8 @@ static int pcrypt_aead_decrypt(struct aead_request *req) err = padata_do_parallel(ictx->psdec, padata, &ctx->cb_cpu); if (!err) return -EINPROGRESS; + if (err == -EBUSY) + return -EAGAIN; return err; } diff --git a/kernel/padata.c b/kernel/padata.c index ff349e1084c1..179fb1518070 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -202,7 +202,7 @@ int padata_do_parallel(struct padata_shell *ps, *cb_cpu = cpu; } - err = -EBUSY; + err = -EBUSY; if ((pinst->flags & PADATA_RESET)) goto out; From 424a8ca3c21ce16b551b7100eccd2e7eab7940e8 Mon Sep 17 00:00:00 2001 From: Rander Wang Date: Tue, 19 Sep 2023 12:24:16 +0300 Subject: [PATCH 0087/1397] ASoC: SOF: ipc4: handle EXCEPTION_CAUGHT notification from firmware MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c1c48fd6bbe788458e3685fea74bdb3cb148ff93 ] Driver will receive exception IPC message and process it by snd_sof_dsp_panic. Signed-off-by: Rander Wang Reviewed-by: Péter Ujfalusi Reviewed-by: Kai Vehmanen Reviewed-by: Pierre-Louis Bossart Reviewed-by: Guennadi Liakhovetski Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20230919092416.4137-10-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/sof/ipc4.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/sof/ipc4.c b/sound/soc/sof/ipc4.c index ab6eddd91bb7..1b09496733fb 100644 --- a/sound/soc/sof/ipc4.c +++ b/sound/soc/sof/ipc4.c @@ -614,6 +614,9 @@ static void sof_ipc4_rx_msg(struct snd_sof_dev *sdev) case SOF_IPC4_NOTIFY_LOG_BUFFER_STATUS: sof_ipc4_mtrace_update_pos(sdev, SOF_IPC4_LOG_CORE_GET(ipc4_msg->primary)); break; + case SOF_IPC4_NOTIFY_EXCEPTION_CAUGHT: + snd_sof_dsp_panic(sdev, 0, true); + break; default: dev_dbg(sdev->dev, "Unhandled DSP message: %#x|%#x\n", ipc4_msg->primary, ipc4_msg->extension); From 69319be69f13c4b758a0cde1cb6d77f3fde190b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 19 Sep 2023 15:56:41 +0300 Subject: [PATCH 0088/1397] RDMA/hfi1: Use FIELD_GET() to extract Link Width MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 8bf7187d978610b9e327a3d92728c8864a575ebd ] Use FIELD_GET() to extract PCIe Negotiated Link Width field instead of custom masking and shifting, and remove extract_width() which only wraps that FIELD_GET(). Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20230919125648.1920-2-ilpo.jarvinen@linux.intel.com Reviewed-by: Jonathan Cameron Reviewed-by: Dean Luick Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hfi1/pcie.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 08732e1ac966..c132a9c073bf 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -3,6 +3,7 @@ * Copyright(c) 2015 - 2019 Intel Corporation. */ +#include #include #include #include @@ -210,12 +211,6 @@ static u32 extract_speed(u16 linkstat) return speed; } -/* return the PCIe link speed from the given link status */ -static u32 extract_width(u16 linkstat) -{ - return (linkstat & PCI_EXP_LNKSTA_NLW) >> PCI_EXP_LNKSTA_NLW_SHIFT; -} - /* read the link status and set dd->{lbus_width,lbus_speed,lbus_info} */ static void update_lbus_info(struct hfi1_devdata *dd) { @@ -228,7 +223,7 @@ static void update_lbus_info(struct hfi1_devdata *dd) return; } - dd->lbus_width = extract_width(linkstat); + dd->lbus_width = FIELD_GET(PCI_EXP_LNKSTA_NLW, linkstat); dd->lbus_speed = extract_speed(linkstat); snprintf(dd->lbus_info, sizeof(dd->lbus_info), "PCIe,%uMHz,x%u", dd->lbus_speed, dd->lbus_width); From b4465009e7d60c6111946db4c8f1e50d401ed7be Mon Sep 17 00:00:00 2001 From: Yihang Li Date: Wed, 13 Sep 2023 10:15:25 +0800 Subject: [PATCH 0089/1397] scsi: hisi_sas: Set debugfs_dir pointer to NULL after removing debugfs [ Upstream commit 6de426f9276c448e2db7238911c97fb157cb23be ] If init debugfs failed during device registration due to memory allocation failure, debugfs_remove_recursive() is called, after which debugfs_dir is not set to NULL. debugfs_remove_recursive() will be called again during device removal. As a result, illegal pointer is accessed. [ 1665.467244] hisi_sas_v3_hw 0000:b4:02.0: failed to init debugfs! ... [ 1669.836708] Unable to handle kernel NULL pointer dereference at virtual address 00000000000000a0 [ 1669.872669] pc : down_write+0x24/0x70 [ 1669.876315] lr : down_write+0x1c/0x70 [ 1669.879961] sp : ffff000036f53a30 [ 1669.883260] x29: ffff000036f53a30 x28: ffffa027c31549f8 [ 1669.888547] x27: ffffa027c3140000 x26: 0000000000000000 [ 1669.893834] x25: ffffa027bf37c270 x24: ffffa027bf37c270 [ 1669.899122] x23: ffff0000095406b8 x22: ffff0000095406a8 [ 1669.904408] x21: 0000000000000000 x20: ffffa027bf37c310 [ 1669.909695] x19: 00000000000000a0 x18: ffff8027dcd86f10 [ 1669.914982] x17: 0000000000000000 x16: 0000000000000000 [ 1669.920268] x15: 0000000000000000 x14: ffffa0274014f870 [ 1669.925555] x13: 0000000000000040 x12: 0000000000000228 [ 1669.930842] x11: 0000000000000020 x10: 0000000000000bb0 [ 1669.936129] x9 : ffff000036f537f0 x8 : ffff80273088ca10 [ 1669.941416] x7 : 000000000000001d x6 : 00000000ffffffff [ 1669.946702] x5 : ffff000008a36310 x4 : ffff80273088be00 [ 1669.951989] x3 : ffff000009513e90 x2 : 0000000000000000 [ 1669.957276] x1 : 00000000000000a0 x0 : ffffffff00000001 [ 1669.962563] Call trace: [ 1669.965000] down_write+0x24/0x70 [ 1669.968301] debugfs_remove_recursive+0x5c/0x1b0 [ 1669.972905] hisi_sas_debugfs_exit+0x24/0x30 [hisi_sas_main] [ 1669.978541] hisi_sas_v3_remove+0x130/0x150 [hisi_sas_v3_hw] [ 1669.984175] pci_device_remove+0x48/0xd8 [ 1669.988082] device_release_driver_internal+0x1b4/0x250 [ 1669.993282] device_release_driver+0x28/0x38 [ 1669.997534] pci_stop_bus_device+0x84/0xb8 [ 1670.001611] pci_stop_and_remove_bus_device_locked+0x24/0x40 [ 1670.007244] remove_store+0xfc/0x140 [ 1670.010802] dev_attr_store+0x44/0x60 [ 1670.014448] sysfs_kf_write+0x58/0x80 [ 1670.018095] kernfs_fop_write+0xe8/0x1f0 [ 1670.022000] __vfs_write+0x60/0x190 [ 1670.025472] vfs_write+0xac/0x1c0 [ 1670.028771] ksys_write+0x6c/0xd8 [ 1670.032071] __arm64_sys_write+0x24/0x30 [ 1670.035977] el0_svc_common+0x78/0x130 [ 1670.039710] el0_svc_handler+0x38/0x78 [ 1670.043442] el0_svc+0x8/0xc To fix this, set debugfs_dir to NULL after debugfs_remove_recursive(). Signed-off-by: Yihang Li Signed-off-by: Xingui Yang Signed-off-by: Xiang Chen Link: https://lore.kernel.org/r/1694571327-78697-2-git-send-email-chenxiang66@hisilicon.com Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index bbb64ee6afd7..089186fe1791 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -4865,6 +4865,12 @@ static void debugfs_bist_init_v3_hw(struct hisi_hba *hisi_hba) hisi_hba->debugfs_bist_linkrate = SAS_LINK_RATE_1_5_GBPS; } +static void debugfs_exit_v3_hw(struct hisi_hba *hisi_hba) +{ + debugfs_remove_recursive(hisi_hba->debugfs_dir); + hisi_hba->debugfs_dir = NULL; +} + static void debugfs_init_v3_hw(struct hisi_hba *hisi_hba) { struct device *dev = hisi_hba->dev; @@ -4888,18 +4894,13 @@ static void debugfs_init_v3_hw(struct hisi_hba *hisi_hba) for (i = 0; i < hisi_sas_debugfs_dump_count; i++) { if (debugfs_alloc_v3_hw(hisi_hba, i)) { - debugfs_remove_recursive(hisi_hba->debugfs_dir); + debugfs_exit_v3_hw(hisi_hba); dev_dbg(dev, "failed to init debugfs!\n"); break; } } } -static void debugfs_exit_v3_hw(struct hisi_hba *hisi_hba) -{ - debugfs_remove_recursive(hisi_hba->debugfs_dir); -} - static int hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id) { From 8bbe784c2ff28d56ca0c548aaf3e584edc77052d Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Thu, 21 Sep 2023 17:54:25 -0500 Subject: [PATCH 0090/1397] scsi: ibmvfc: Remove BUG_ON in the case of an empty event pool [ Upstream commit b39f2d10b86d0af353ea339e5815820026bca48f ] In practice the driver should never send more commands than are allocated to a queue's event pool. In the unlikely event that this happens, the code asserts a BUG_ON, and in the case that the kernel is not configured to crash on panic returns a junk event pointer from the empty event list causing things to spiral from there. This BUG_ON is a historical artifact of the ibmvfc driver first being upstreamed, and it is well known now that the use of BUG_ON is bad practice except in the most unrecoverable scenario. There is nothing about this scenario that prevents the driver from recovering and carrying on. Remove the BUG_ON in question from ibmvfc_get_event() and return a NULL pointer in the case of an empty event pool. Update all call sites to ibmvfc_get_event() to check for a NULL pointer and perfrom the appropriate failure or recovery action. Signed-off-by: Tyrel Datwyler Link: https://lore.kernel.org/r/20230921225435.3537728-2-tyreld@linux.ibm.com Reviewed-by: Brian King Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/ibmvscsi/ibmvfc.c | 124 ++++++++++++++++++++++++++++++++- 1 file changed, 122 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index 470e8e6c41b6..c98346e464b4 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -1518,7 +1518,11 @@ static struct ibmvfc_event *ibmvfc_get_event(struct ibmvfc_queue *queue) unsigned long flags; spin_lock_irqsave(&queue->l_lock, flags); - BUG_ON(list_empty(&queue->free)); + if (list_empty(&queue->free)) { + ibmvfc_log(queue->vhost, 4, "empty event pool on queue:%ld\n", queue->hwq_id); + spin_unlock_irqrestore(&queue->l_lock, flags); + return NULL; + } evt = list_entry(queue->free.next, struct ibmvfc_event, queue_list); atomic_set(&evt->free, 0); list_del(&evt->queue_list); @@ -1947,9 +1951,15 @@ static int ibmvfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd) if (vhost->using_channels) { scsi_channel = hwq % vhost->scsi_scrqs.active_queues; evt = ibmvfc_get_event(&vhost->scsi_scrqs.scrqs[scsi_channel]); + if (!evt) + return SCSI_MLQUEUE_HOST_BUSY; + evt->hwq = hwq % vhost->scsi_scrqs.active_queues; - } else + } else { evt = ibmvfc_get_event(&vhost->crq); + if (!evt) + return SCSI_MLQUEUE_HOST_BUSY; + } ibmvfc_init_event(evt, ibmvfc_scsi_done, IBMVFC_CMD_FORMAT); evt->cmnd = cmnd; @@ -2037,6 +2047,11 @@ static int ibmvfc_bsg_timeout(struct bsg_job *job) vhost->aborting_passthru = 1; evt = ibmvfc_get_event(&vhost->crq); + if (!evt) { + spin_unlock_irqrestore(vhost->host->host_lock, flags); + return -ENOMEM; + } + ibmvfc_init_event(evt, ibmvfc_bsg_timeout_done, IBMVFC_MAD_FORMAT); tmf = &evt->iu.tmf; @@ -2095,6 +2110,10 @@ static int ibmvfc_bsg_plogi(struct ibmvfc_host *vhost, unsigned int port_id) goto unlock_out; evt = ibmvfc_get_event(&vhost->crq); + if (!evt) { + rc = -ENOMEM; + goto unlock_out; + } ibmvfc_init_event(evt, ibmvfc_sync_completion, IBMVFC_MAD_FORMAT); plogi = &evt->iu.plogi; memset(plogi, 0, sizeof(*plogi)); @@ -2213,6 +2232,11 @@ static int ibmvfc_bsg_request(struct bsg_job *job) } evt = ibmvfc_get_event(&vhost->crq); + if (!evt) { + spin_unlock_irqrestore(vhost->host->host_lock, flags); + rc = -ENOMEM; + goto out; + } ibmvfc_init_event(evt, ibmvfc_sync_completion, IBMVFC_MAD_FORMAT); mad = &evt->iu.passthru; @@ -2301,6 +2325,11 @@ static int ibmvfc_reset_device(struct scsi_device *sdev, int type, char *desc) else evt = ibmvfc_get_event(&vhost->crq); + if (!evt) { + spin_unlock_irqrestore(vhost->host->host_lock, flags); + return -ENOMEM; + } + ibmvfc_init_event(evt, ibmvfc_sync_completion, IBMVFC_CMD_FORMAT); tmf = ibmvfc_init_vfc_cmd(evt, sdev); iu = ibmvfc_get_fcp_iu(vhost, tmf); @@ -2504,6 +2533,8 @@ static struct ibmvfc_event *ibmvfc_init_tmf(struct ibmvfc_queue *queue, struct ibmvfc_tmf *tmf; evt = ibmvfc_get_event(queue); + if (!evt) + return NULL; ibmvfc_init_event(evt, ibmvfc_sync_completion, IBMVFC_MAD_FORMAT); tmf = &evt->iu.tmf; @@ -2560,6 +2591,11 @@ static int ibmvfc_cancel_all_mq(struct scsi_device *sdev, int type) if (found_evt && vhost->logged_in) { evt = ibmvfc_init_tmf(&queues[i], sdev, type); + if (!evt) { + spin_unlock(queues[i].q_lock); + spin_unlock_irqrestore(vhost->host->host_lock, flags); + return -ENOMEM; + } evt->sync_iu = &queues[i].cancel_rsp; ibmvfc_send_event(evt, vhost, default_timeout); list_add_tail(&evt->cancel, &cancelq); @@ -2773,6 +2809,10 @@ static int ibmvfc_abort_task_set(struct scsi_device *sdev) if (vhost->state == IBMVFC_ACTIVE) { evt = ibmvfc_get_event(&vhost->crq); + if (!evt) { + spin_unlock_irqrestore(vhost->host->host_lock, flags); + return -ENOMEM; + } ibmvfc_init_event(evt, ibmvfc_sync_completion, IBMVFC_CMD_FORMAT); tmf = ibmvfc_init_vfc_cmd(evt, sdev); iu = ibmvfc_get_fcp_iu(vhost, tmf); @@ -4031,6 +4071,12 @@ static void ibmvfc_tgt_send_prli(struct ibmvfc_target *tgt) kref_get(&tgt->kref); evt = ibmvfc_get_event(&vhost->crq); + if (!evt) { + ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_NONE); + kref_put(&tgt->kref, ibmvfc_release_tgt); + __ibmvfc_reset_host(vhost); + return; + } vhost->discovery_threads++; ibmvfc_init_event(evt, ibmvfc_tgt_prli_done, IBMVFC_MAD_FORMAT); evt->tgt = tgt; @@ -4138,6 +4184,12 @@ static void ibmvfc_tgt_send_plogi(struct ibmvfc_target *tgt) kref_get(&tgt->kref); tgt->logo_rcvd = 0; evt = ibmvfc_get_event(&vhost->crq); + if (!evt) { + ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_NONE); + kref_put(&tgt->kref, ibmvfc_release_tgt); + __ibmvfc_reset_host(vhost); + return; + } vhost->discovery_threads++; ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_INIT_WAIT); ibmvfc_init_event(evt, ibmvfc_tgt_plogi_done, IBMVFC_MAD_FORMAT); @@ -4214,6 +4266,8 @@ static struct ibmvfc_event *__ibmvfc_tgt_get_implicit_logout_evt(struct ibmvfc_t kref_get(&tgt->kref); evt = ibmvfc_get_event(&vhost->crq); + if (!evt) + return NULL; ibmvfc_init_event(evt, done, IBMVFC_MAD_FORMAT); evt->tgt = tgt; mad = &evt->iu.implicit_logout; @@ -4241,6 +4295,13 @@ static void ibmvfc_tgt_implicit_logout(struct ibmvfc_target *tgt) vhost->discovery_threads++; evt = __ibmvfc_tgt_get_implicit_logout_evt(tgt, ibmvfc_tgt_implicit_logout_done); + if (!evt) { + vhost->discovery_threads--; + ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_NONE); + kref_put(&tgt->kref, ibmvfc_release_tgt); + __ibmvfc_reset_host(vhost); + return; + } ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_INIT_WAIT); if (ibmvfc_send_event(evt, vhost, default_timeout)) { @@ -4380,6 +4441,12 @@ static void ibmvfc_tgt_move_login(struct ibmvfc_target *tgt) kref_get(&tgt->kref); evt = ibmvfc_get_event(&vhost->crq); + if (!evt) { + ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_DEL_RPORT); + kref_put(&tgt->kref, ibmvfc_release_tgt); + __ibmvfc_reset_host(vhost); + return; + } vhost->discovery_threads++; ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_INIT_WAIT); ibmvfc_init_event(evt, ibmvfc_tgt_move_login_done, IBMVFC_MAD_FORMAT); @@ -4546,6 +4613,14 @@ static void ibmvfc_adisc_timeout(struct timer_list *t) vhost->abort_threads++; kref_get(&tgt->kref); evt = ibmvfc_get_event(&vhost->crq); + if (!evt) { + tgt_err(tgt, "Failed to get cancel event for ADISC.\n"); + vhost->abort_threads--; + kref_put(&tgt->kref, ibmvfc_release_tgt); + __ibmvfc_reset_host(vhost); + spin_unlock_irqrestore(vhost->host->host_lock, flags); + return; + } ibmvfc_init_event(evt, ibmvfc_tgt_adisc_cancel_done, IBMVFC_MAD_FORMAT); evt->tgt = tgt; @@ -4596,6 +4671,12 @@ static void ibmvfc_tgt_adisc(struct ibmvfc_target *tgt) kref_get(&tgt->kref); evt = ibmvfc_get_event(&vhost->crq); + if (!evt) { + ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_NONE); + kref_put(&tgt->kref, ibmvfc_release_tgt); + __ibmvfc_reset_host(vhost); + return; + } vhost->discovery_threads++; ibmvfc_init_event(evt, ibmvfc_tgt_adisc_done, IBMVFC_MAD_FORMAT); evt->tgt = tgt; @@ -4699,6 +4780,12 @@ static void ibmvfc_tgt_query_target(struct ibmvfc_target *tgt) kref_get(&tgt->kref); evt = ibmvfc_get_event(&vhost->crq); + if (!evt) { + ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_NONE); + kref_put(&tgt->kref, ibmvfc_release_tgt); + __ibmvfc_reset_host(vhost); + return; + } vhost->discovery_threads++; evt->tgt = tgt; ibmvfc_init_event(evt, ibmvfc_tgt_query_target_done, IBMVFC_MAD_FORMAT); @@ -4871,6 +4958,13 @@ static void ibmvfc_discover_targets(struct ibmvfc_host *vhost) { struct ibmvfc_discover_targets *mad; struct ibmvfc_event *evt = ibmvfc_get_event(&vhost->crq); + int level = IBMVFC_DEFAULT_LOG_LEVEL; + + if (!evt) { + ibmvfc_log(vhost, level, "Discover Targets failed: no available events\n"); + ibmvfc_hard_reset_host(vhost); + return; + } ibmvfc_init_event(evt, ibmvfc_discover_targets_done, IBMVFC_MAD_FORMAT); mad = &evt->iu.discover_targets; @@ -4948,8 +5042,15 @@ static void ibmvfc_channel_setup(struct ibmvfc_host *vhost) struct ibmvfc_scsi_channels *scrqs = &vhost->scsi_scrqs; unsigned int num_channels = min(vhost->client_scsi_channels, vhost->max_vios_scsi_channels); + int level = IBMVFC_DEFAULT_LOG_LEVEL; int i; + if (!evt) { + ibmvfc_log(vhost, level, "Channel Setup failed: no available events\n"); + ibmvfc_hard_reset_host(vhost); + return; + } + memset(setup_buf, 0, sizeof(*setup_buf)); if (num_channels == 0) setup_buf->flags = cpu_to_be32(IBMVFC_CANCEL_CHANNELS); @@ -5011,6 +5112,13 @@ static void ibmvfc_channel_enquiry(struct ibmvfc_host *vhost) { struct ibmvfc_channel_enquiry *mad; struct ibmvfc_event *evt = ibmvfc_get_event(&vhost->crq); + int level = IBMVFC_DEFAULT_LOG_LEVEL; + + if (!evt) { + ibmvfc_log(vhost, level, "Channel Enquiry failed: no available events\n"); + ibmvfc_hard_reset_host(vhost); + return; + } ibmvfc_init_event(evt, ibmvfc_channel_enquiry_done, IBMVFC_MAD_FORMAT); mad = &evt->iu.channel_enquiry; @@ -5133,6 +5241,12 @@ static void ibmvfc_npiv_login(struct ibmvfc_host *vhost) struct ibmvfc_npiv_login_mad *mad; struct ibmvfc_event *evt = ibmvfc_get_event(&vhost->crq); + if (!evt) { + ibmvfc_dbg(vhost, "NPIV Login failed: no available events\n"); + ibmvfc_hard_reset_host(vhost); + return; + } + ibmvfc_gather_partition_info(vhost); ibmvfc_set_login_info(vhost); ibmvfc_init_event(evt, ibmvfc_npiv_login_done, IBMVFC_MAD_FORMAT); @@ -5197,6 +5311,12 @@ static void ibmvfc_npiv_logout(struct ibmvfc_host *vhost) struct ibmvfc_event *evt; evt = ibmvfc_get_event(&vhost->crq); + if (!evt) { + ibmvfc_dbg(vhost, "NPIV Logout failed: no available events\n"); + ibmvfc_hard_reset_host(vhost); + return; + } + ibmvfc_init_event(evt, ibmvfc_npiv_logout_done, IBMVFC_MAD_FORMAT); mad = &evt->iu.npiv_logout; From 1a7c53fdea1d189087544d9a606d249e93c4934b Mon Sep 17 00:00:00 2001 From: Juntong Deng Date: Mon, 2 Oct 2023 17:56:58 +0800 Subject: [PATCH 0091/1397] fs/jfs: Add check for negative db_l2nbperpage [ Upstream commit 525b861a008143048535011f3816d407940f4bfa ] l2nbperpage is log2(number of blks per page), and the minimum legal value should be 0, not negative. In the case of l2nbperpage being negative, an error will occur when subsequently used as shift exponent. Syzbot reported this bug: UBSAN: shift-out-of-bounds in fs/jfs/jfs_dmap.c:799:12 shift exponent -16777216 is negative Reported-by: syzbot+debee9ab7ae2b34b0307@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=debee9ab7ae2b34b0307 Signed-off-by: Juntong Deng Signed-off-by: Dave Kleikamp Signed-off-by: Sasha Levin --- fs/jfs/jfs_dmap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 88afd108c2dd..3a1842348112 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -180,7 +180,8 @@ int dbMount(struct inode *ipbmap) bmp->db_nfree = le64_to_cpu(dbmp_le->dn_nfree); bmp->db_l2nbperpage = le32_to_cpu(dbmp_le->dn_l2nbperpage); - if (bmp->db_l2nbperpage > L2PSIZE - L2MINBLOCKSIZE) { + if (bmp->db_l2nbperpage > L2PSIZE - L2MINBLOCKSIZE || + bmp->db_l2nbperpage < 0) { err = -EINVAL; goto err_release_metapage; } From 2323de34a3ae61a9f9b544c18583f71cea86721f Mon Sep 17 00:00:00 2001 From: Juntong Deng Date: Wed, 4 Oct 2023 02:06:41 +0800 Subject: [PATCH 0092/1397] fs/jfs: Add validity check for db_maxag and db_agpref [ Upstream commit 64933ab7b04881c6c18b21ff206c12278341c72e ] Both db_maxag and db_agpref are used as the index of the db_agfree array, but there is currently no validity check for db_maxag and db_agpref, which can lead to errors. The following is related bug reported by Syzbot: UBSAN: array-index-out-of-bounds in fs/jfs/jfs_dmap.c:639:20 index 7936 is out of range for type 'atomic_t[128]' Add checking that the values of db_maxag and db_agpref are valid indexes for the db_agfree array. Reported-by: syzbot+38e876a8aa44b7115c76@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=38e876a8aa44b7115c76 Signed-off-by: Juntong Deng Signed-off-by: Dave Kleikamp Signed-off-by: Sasha Levin --- fs/jfs/jfs_dmap.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 3a1842348112..4d59373f9e6c 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -195,6 +195,12 @@ int dbMount(struct inode *ipbmap) bmp->db_maxlevel = le32_to_cpu(dbmp_le->dn_maxlevel); bmp->db_maxag = le32_to_cpu(dbmp_le->dn_maxag); bmp->db_agpref = le32_to_cpu(dbmp_le->dn_agpref); + if (bmp->db_maxag >= MAXAG || bmp->db_maxag < 0 || + bmp->db_agpref >= MAXAG || bmp->db_agpref < 0) { + err = -EINVAL; + goto err_release_metapage; + } + bmp->db_aglevel = le32_to_cpu(dbmp_le->dn_aglevel); bmp->db_agheight = le32_to_cpu(dbmp_le->dn_agheight); bmp->db_agwidth = le32_to_cpu(dbmp_le->dn_agwidth); From 87c681ab49e99039ff2dd3e71852417381b13878 Mon Sep 17 00:00:00 2001 From: Manas Ghandat Date: Wed, 4 Oct 2023 11:17:18 +0530 Subject: [PATCH 0093/1397] jfs: fix array-index-out-of-bounds in dbFindLeaf [ Upstream commit 22cad8bc1d36547cdae0eef316c47d917ce3147c ] Currently while searching for dmtree_t for sufficient free blocks there is an array out of bounds while getting element in tp->dm_stree. To add the required check for out of bound we first need to determine the type of dmtree. Thus added an extra parameter to dbFindLeaf so that the type of tree can be determined and the required check can be applied. Reported-by: syzbot+aea1ad91e854d0a83e04@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=aea1ad91e854d0a83e04 Signed-off-by: Manas Ghandat Signed-off-by: Dave Kleikamp Signed-off-by: Sasha Levin --- fs/jfs/jfs_dmap.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 4d59373f9e6c..11c77757ead9 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -87,7 +87,7 @@ static int dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks); static int dbFindBits(u32 word, int l2nb); static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno); -static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx); +static int dbFindLeaf(dmtree_t *tp, int l2nb, int *leafidx, bool is_ctl); static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno, int nblocks); static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, @@ -1717,7 +1717,7 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno) * dbFindLeaf() returns the index of the leaf at which * free space was found. */ - rc = dbFindLeaf((dmtree_t *) dcp, l2nb, &leafidx); + rc = dbFindLeaf((dmtree_t *) dcp, l2nb, &leafidx, true); /* release the buffer. */ @@ -1964,7 +1964,7 @@ dbAllocDmapLev(struct bmap * bmp, * free space. if sufficient free space is found, dbFindLeaf() * returns the index of the leaf at which free space was found. */ - if (dbFindLeaf((dmtree_t *) & dp->tree, l2nb, &leafidx)) + if (dbFindLeaf((dmtree_t *) &dp->tree, l2nb, &leafidx, false)) return -ENOSPC; if (leafidx < 0) @@ -2928,14 +2928,18 @@ static void dbAdjTree(dmtree_t * tp, int leafno, int newval) * leafidx - return pointer to be set to the index of the leaf * describing at least l2nb free blocks if sufficient * free blocks are found. + * is_ctl - determines if the tree is of type ctl * * RETURN VALUES: * 0 - success * -ENOSPC - insufficient free blocks. */ -static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx) +static int dbFindLeaf(dmtree_t *tp, int l2nb, int *leafidx, bool is_ctl) { int ti, n = 0, k, x = 0; + int max_size; + + max_size = is_ctl ? CTLTREESIZE : TREESIZE; /* first check the root of the tree to see if there is * sufficient free space. @@ -2956,6 +2960,8 @@ static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx) /* sufficient free space found. move to the next * level (or quit if this is the last level). */ + if (x + n > max_size) + return -ENOSPC; if (l2nb <= tp->dmt_stree[x + n]) break; } From 1708d0a9917fea579cc9da3d87b154285abd2cd8 Mon Sep 17 00:00:00 2001 From: Manas Ghandat Date: Wed, 4 Oct 2023 13:10:40 +0530 Subject: [PATCH 0094/1397] jfs: fix array-index-out-of-bounds in diAlloc [ Upstream commit 05d9ea1ceb62a55af6727a69269a4fd310edf483 ] Currently there is not check against the agno of the iag while allocating new inodes to avoid fragmentation problem. Added the check which is required. Reported-by: syzbot+79d792676d8ac050949f@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=79d792676d8ac050949f Signed-off-by: Manas Ghandat Signed-off-by: Dave Kleikamp Signed-off-by: Sasha Levin --- fs/jfs/jfs_imap.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index 923a58422c46..1b267eec3f36 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c @@ -1320,7 +1320,7 @@ diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp) int diAlloc(struct inode *pip, bool dir, struct inode *ip) { int rc, ino, iagno, addext, extno, bitno, sword; - int nwords, rem, i, agno; + int nwords, rem, i, agno, dn_numag; u32 mask, inosmap, extsmap; struct inode *ipimap; struct metapage *mp; @@ -1356,6 +1356,9 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip) /* get the ag number of this iag */ agno = BLKTOAG(JFS_IP(pip)->agstart, JFS_SBI(pip->i_sb)); + dn_numag = JFS_SBI(pip->i_sb)->bmap->db_numag; + if (agno < 0 || agno > dn_numag) + return -EIO; if (atomic_read(&JFS_SBI(pip->i_sb)->bmap->db_active[agno])) { /* From 69f2af4db703f3332ba5a96f6eec3f3975dc3570 Mon Sep 17 00:00:00 2001 From: Mikhail Khvainitski Date: Sun, 24 Sep 2023 01:58:30 +0300 Subject: [PATCH 0095/1397] HID: lenovo: Detect quirk-free fw on cptkbd and stop applying workaround [ Upstream commit 46a0a2c96f0f47628190f122c2e3d879e590bcbe ] Built-in firmware of cptkbd handles scrolling by itself (when middle button is pressed) but with issues: it does not support horizontal and hi-res scrolling and upon middle button release it sends middle button click even if there was a scrolling event. Commit 3cb5ff0220e3 ("HID: lenovo: Hide middle-button press until release") workarounds last issue but it's impossible to workaround scrolling-related issues without firmware modification. Likely, Dennis Schneider has reverse engineered the firmware and provided an instruction on how to patch it [1]. However, aforementioned workaround prevents userspace (libinput) from knowing exact moment when middle button has been pressed down and performing "On-Button scrolling". This commit detects correctly-behaving patched firmware if cursor movement events has been received during middle button being pressed and stops applying workaround for this device. Link: https://hohlerde.org/rauch/en/elektronik/projekte/tpkbd-fix/ [1] Signed-off-by: Mikhail Khvainitski Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-lenovo.c | 68 ++++++++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 23 deletions(-) diff --git a/drivers/hid/hid-lenovo.c b/drivers/hid/hid-lenovo.c index 44763c0da444..9c1181313e44 100644 --- a/drivers/hid/hid-lenovo.c +++ b/drivers/hid/hid-lenovo.c @@ -51,7 +51,12 @@ struct lenovo_drvdata { int select_right; int sensitivity; int press_speed; - u8 middlebutton_state; /* 0:Up, 1:Down (undecided), 2:Scrolling */ + /* 0: Up + * 1: Down (undecided) + * 2: Scrolling + * 3: Patched firmware, disable workaround + */ + u8 middlebutton_state; bool fn_lock; }; @@ -668,31 +673,48 @@ static int lenovo_event_cptkbd(struct hid_device *hdev, { struct lenovo_drvdata *cptkbd_data = hid_get_drvdata(hdev); - /* "wheel" scroll events */ - if (usage->type == EV_REL && (usage->code == REL_WHEEL || - usage->code == REL_HWHEEL)) { - /* Scroll events disable middle-click event */ - cptkbd_data->middlebutton_state = 2; - return 0; - } + if (cptkbd_data->middlebutton_state != 3) { + /* REL_X and REL_Y events during middle button pressed + * are only possible on patched, bug-free firmware + * so set middlebutton_state to 3 + * to never apply workaround anymore + */ + if (cptkbd_data->middlebutton_state == 1 && + usage->type == EV_REL && + (usage->code == REL_X || usage->code == REL_Y)) { + cptkbd_data->middlebutton_state = 3; + /* send middle button press which was hold before */ + input_event(field->hidinput->input, + EV_KEY, BTN_MIDDLE, 1); + input_sync(field->hidinput->input); + } - /* Middle click events */ - if (usage->type == EV_KEY && usage->code == BTN_MIDDLE) { - if (value == 1) { - cptkbd_data->middlebutton_state = 1; - } else if (value == 0) { - if (cptkbd_data->middlebutton_state == 1) { - /* No scrolling inbetween, send middle-click */ - input_event(field->hidinput->input, - EV_KEY, BTN_MIDDLE, 1); - input_sync(field->hidinput->input); - input_event(field->hidinput->input, - EV_KEY, BTN_MIDDLE, 0); - input_sync(field->hidinput->input); + /* "wheel" scroll events */ + if (usage->type == EV_REL && (usage->code == REL_WHEEL || + usage->code == REL_HWHEEL)) { + /* Scroll events disable middle-click event */ + cptkbd_data->middlebutton_state = 2; + return 0; + } + + /* Middle click events */ + if (usage->type == EV_KEY && usage->code == BTN_MIDDLE) { + if (value == 1) { + cptkbd_data->middlebutton_state = 1; + } else if (value == 0) { + if (cptkbd_data->middlebutton_state == 1) { + /* No scrolling inbetween, send middle-click */ + input_event(field->hidinput->input, + EV_KEY, BTN_MIDDLE, 1); + input_sync(field->hidinput->input); + input_event(field->hidinput->input, + EV_KEY, BTN_MIDDLE, 0); + input_sync(field->hidinput->input); + } + cptkbd_data->middlebutton_state = 0; } - cptkbd_data->middlebutton_state = 0; + return 1; } - return 1; } if (usage->type == EV_KEY && usage->code == KEY_FN_ESC && value == 1) { From b39764d0d2d6bc0384f7d9c8012551897032beb1 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Mon, 21 Aug 2023 08:45:21 +0100 Subject: [PATCH 0096/1397] ARM: 9320/1: fix stack depot IRQ stack filter [ Upstream commit b0150014878c32197cfa66e3e2f79e57f66babc0 ] Place IRQ handlers such as gic_handle_irq() in the irqentry section even if FUNCTION_GRAPH_TRACER is not enabled. Without this, the stack depot's filter_irq_stacks() does not correctly filter out IRQ stacks in those configurations, which hampers deduplication and eventually leads to "Stack depot reached limit capacity" splats with KASAN. A similar fix was done for arm64 in commit f6794950f0e5ba37e3bbed ("arm64: set __exception_irq_entry with __irq_entry as a default"). Link: https://lore.kernel.org/r/20230803-arm-irqentry-v1-1-8aad8e260b1c@axis.com Signed-off-by: Vincent Whitchurch Signed-off-by: Russell King (Oracle) Signed-off-by: Sasha Levin --- arch/arm/include/asm/exception.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/arm/include/asm/exception.h b/arch/arm/include/asm/exception.h index 58e039a851af..3c82975d46db 100644 --- a/arch/arm/include/asm/exception.h +++ b/arch/arm/include/asm/exception.h @@ -10,10 +10,6 @@ #include -#ifdef CONFIG_FUNCTION_GRAPH_TRACER #define __exception_irq_entry __irq_entry -#else -#define __exception_irq_entry -#endif #endif /* __ASM_ARM_EXCEPTION_H */ From 4a320da7f7cbdab2098b103c47f45d5061f42edd Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Fri, 6 Oct 2023 12:28:55 +0200 Subject: [PATCH 0097/1397] ALSA: hda: Fix possible null-ptr-deref when assigning a stream [ Upstream commit f93dc90c2e8ed664985e366aa6459ac83cdab236 ] While AudioDSP drivers assign streams exclusively of HOST or LINK type, nothing blocks a user to attempt to assign a COUPLED stream. As supplied substream instance may be a stub, what is the case when code-loading, such scenario ends with null-ptr-deref. Signed-off-by: Cezary Rojewski Link: https://lore.kernel.org/r/20231006102857.749143-2-cezary.rojewski@intel.com Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/hda/hdac_stream.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/hda/hdac_stream.c b/sound/hda/hdac_stream.c index 2633a4bb1d85..214a0680524b 100644 --- a/sound/hda/hdac_stream.c +++ b/sound/hda/hdac_stream.c @@ -354,8 +354,10 @@ struct hdac_stream *snd_hdac_stream_assign(struct hdac_bus *bus, struct hdac_stream *res = NULL; /* make a non-zero unique key for the substream */ - int key = (substream->pcm->device << 16) | (substream->number << 2) | - (substream->stream + 1); + int key = (substream->number << 2) | (substream->stream + 1); + + if (substream->pcm) + key |= (substream->pcm->device << 16); spin_lock_irq(&bus->reg_lock); list_for_each_entry(azx_dev, &bus->stream_list, list) { From b7d773ea9b202ff1038978f13c10425cd34bc560 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 6 Oct 2023 15:46:24 +0200 Subject: [PATCH 0098/1397] gpiolib: of: Add quirk for mt2701-cs42448 ASoC sound [ Upstream commit 9e189e80dcb68528dea9e061d9704993f98cb84f ] These gpio names are due to old DT bindings not following the "-gpio"/"-gpios" conventions. Handle it using a quirk so the driver can just look up the GPIOs. Signed-off-by: Linus Walleij Acked-by: Bartosz Golaszewski Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20231006-descriptors-asoc-mediatek-v1-1-07fe79f337f5@linaro.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/gpio/gpiolib-of.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index 531faabead0f..d9525d95e818 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -512,6 +512,10 @@ static struct gpio_desc *of_find_gpio_rename(struct device_node *np, #if IS_ENABLED(CONFIG_SND_SOC_CS42L56) { "reset", "cirrus,gpio-nreset", "cirrus,cs42l56" }, #endif +#if IS_ENABLED(CONFIG_SND_SOC_MT2701_CS42448) + { "i2s1-in-sel-gpio1", NULL, "mediatek,mt2701-cs42448-machine" }, + { "i2s1-in-sel-gpio2", NULL, "mediatek,mt2701-cs42448-machine" }, +#endif #if IS_ENABLED(CONFIG_SND_SOC_TLV320AIC3X) { "reset", "gpio-reset", "ti,tlv320aic3x" }, { "reset", "gpio-reset", "ti,tlv320aic33" }, From b92d2f5ea852d740c577e01176d290171bc58b4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 19 Sep 2023 15:56:44 +0300 Subject: [PATCH 0099/1397] PCI: tegra194: Use FIELD_GET()/FIELD_PREP() with Link Width fields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 759574abd78e3b47ec45bbd31a64e8832cf73f97 ] Use FIELD_GET() to extract PCIe Negotiated Link Width field instead of custom masking and shifting. Similarly, change custom code that misleadingly used PCI_EXP_LNKSTA_NLW_SHIFT to prepare value for PCI_EXP_LNKCAP write to use FIELD_PREP() with correct field define (PCI_EXP_LNKCAP_MLW). Link: https://lore.kernel.org/r/20230919125648.1920-5-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/pci/controller/dwc/pcie-tegra194.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-tegra194.c b/drivers/pci/controller/dwc/pcie-tegra194.c index 4bba31502ce1..248cd9347e8f 100644 --- a/drivers/pci/controller/dwc/pcie-tegra194.c +++ b/drivers/pci/controller/dwc/pcie-tegra194.c @@ -9,6 +9,7 @@ * Author: Vidya Sagar */ +#include #include #include #include @@ -346,8 +347,7 @@ static void apply_bad_link_workaround(struct dw_pcie_rp *pp) */ val = dw_pcie_readw_dbi(pci, pcie->pcie_cap_base + PCI_EXP_LNKSTA); if (val & PCI_EXP_LNKSTA_LBMS) { - current_link_width = (val & PCI_EXP_LNKSTA_NLW) >> - PCI_EXP_LNKSTA_NLW_SHIFT; + current_link_width = FIELD_GET(PCI_EXP_LNKSTA_NLW, val); if (pcie->init_link_width > current_link_width) { dev_warn(pci->dev, "PCIe link is bad, width reduced\n"); val = dw_pcie_readw_dbi(pci, pcie->pcie_cap_base + @@ -760,8 +760,7 @@ static void tegra_pcie_enable_system_interrupts(struct dw_pcie_rp *pp) val_w = dw_pcie_readw_dbi(&pcie->pci, pcie->pcie_cap_base + PCI_EXP_LNKSTA); - pcie->init_link_width = (val_w & PCI_EXP_LNKSTA_NLW) >> - PCI_EXP_LNKSTA_NLW_SHIFT; + pcie->init_link_width = FIELD_GET(PCI_EXP_LNKSTA_NLW, val_w); val_w = dw_pcie_readw_dbi(&pcie->pci, pcie->pcie_cap_base + PCI_EXP_LNKCTL); @@ -920,7 +919,7 @@ static int tegra_pcie_dw_host_init(struct dw_pcie_rp *pp) /* Configure Max lane width from DT */ val = dw_pcie_readl_dbi(pci, pcie->pcie_cap_base + PCI_EXP_LNKCAP); val &= ~PCI_EXP_LNKCAP_MLW; - val |= (pcie->num_lanes << PCI_EXP_LNKSTA_NLW_SHIFT); + val |= FIELD_PREP(PCI_EXP_LNKCAP_MLW, pcie->num_lanes); dw_pcie_writel_dbi(pci, pcie->pcie_cap_base + PCI_EXP_LNKCAP, val); /* Clear Slot Clock Configuration bit if SRNS configuration */ From 163a0a8b6fb5954ee06112034a796d39ef871613 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 19 Sep 2023 15:56:45 +0300 Subject: [PATCH 0100/1397] PCI: mvebu: Use FIELD_PREP() with Link Width MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 408599ec561ad5862cda4f107626009f6fa97a74 ] mvebu_pcie_setup_hw() setups the Maximum Link Width field in the Link Capabilities registers using an open-coded variant of FIELD_PREP() with a literal in shift. Improve readability by using FIELD_PREP(PCI_EXP_LNKCAP_MLW, ...). Link: https://lore.kernel.org/r/20230919125648.1920-6-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/pci/controller/pci-mvebu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/controller/pci-mvebu.c b/drivers/pci/controller/pci-mvebu.c index 60810a1fbfb7..29fe09c99e7d 100644 --- a/drivers/pci/controller/pci-mvebu.c +++ b/drivers/pci/controller/pci-mvebu.c @@ -264,7 +264,7 @@ static void mvebu_pcie_setup_hw(struct mvebu_pcie_port *port) */ lnkcap = mvebu_readl(port, PCIE_CAP_PCIEXP + PCI_EXP_LNKCAP); lnkcap &= ~PCI_EXP_LNKCAP_MLW; - lnkcap |= (port->is_x4 ? 4 : 1) << 4; + lnkcap |= FIELD_PREP(PCI_EXP_LNKCAP_MLW, port->is_x4 ? 4 : 1); mvebu_writel(port, lnkcap, PCIE_CAP_PCIEXP + PCI_EXP_LNKCAP); /* Disable Root Bridge I/O space, memory space and bus mastering. */ From 142d999e660b0d27ceda485070123c69559b6eac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 11 Sep 2023 15:53:51 +0300 Subject: [PATCH 0101/1397] atm: iphase: Do PCI error checks on own line MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c28742447ca9879b52fbaf022ad844f0ffcd749c ] In get_esi() PCI errors are checked inside line-split "if" conditions (in addition to the file not following the coding style). To make the code in get_esi() more readable, fix the coding style and use the usual error handling pattern with a separate variable. In addition, initialization of 'error' variable at declaration is not needed. No functional changes intended. Link: https://lore.kernel.org/r/20230911125354.25501-4-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin --- drivers/atm/iphase.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c index 324148686953..9bba8f280a4d 100644 --- a/drivers/atm/iphase.c +++ b/drivers/atm/iphase.c @@ -2291,19 +2291,21 @@ static int get_esi(struct atm_dev *dev) static int reset_sar(struct atm_dev *dev) { IADEV *iadev; - int i, error = 1; + int i, error; unsigned int pci[64]; iadev = INPH_IA_DEV(dev); - for(i=0; i<64; i++) - if ((error = pci_read_config_dword(iadev->pci, - i*4, &pci[i])) != PCIBIOS_SUCCESSFUL) - return error; + for (i = 0; i < 64; i++) { + error = pci_read_config_dword(iadev->pci, i * 4, &pci[i]); + if (error != PCIBIOS_SUCCESSFUL) + return error; + } writel(0, iadev->reg+IPHASE5575_EXT_RESET); - for(i=0; i<64; i++) - if ((error = pci_write_config_dword(iadev->pci, - i*4, pci[i])) != PCIBIOS_SUCCESSFUL) - return error; + for (i = 0; i < 64; i++) { + error = pci_write_config_dword(iadev->pci, i * 4, pci[i]); + if (error != PCIBIOS_SUCCESSFUL) + return error; + } udelay(5); return 0; } From eaea9f7b44e1da79aaf1a651499b40b651a587b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 11 Sep 2023 15:53:52 +0300 Subject: [PATCH 0102/1397] PCI: Do error check on own line to split long "if" conditions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d15f18053e5cc5576af9e7eef0b2a91169b6326d ] Placing PCI error code check inside "if" condition usually results in need to split lines. Combined with additional conditions the "if" condition becomes messy. Convert to the usual error handling pattern with an additional variable to improve code readability. In addition, reverse the logic in pci_find_vsec_capability() to get rid of &&. No functional changes intended. Link: https://lore.kernel.org/r/20230911125354.25501-5-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen [bhelgaas: PCI_POSSIBLE_ERROR()] Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin --- drivers/pci/pci.c | 9 ++++++--- drivers/pci/probe.c | 6 +++--- drivers/pci/quirks.c | 6 +++--- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 59c01d68c6d5..5e51e8bd5c13 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -732,15 +732,18 @@ u16 pci_find_vsec_capability(struct pci_dev *dev, u16 vendor, int cap) { u16 vsec = 0; u32 header; + int ret; if (vendor != dev->vendor) return 0; while ((vsec = pci_find_next_ext_capability(dev, vsec, PCI_EXT_CAP_ID_VNDR))) { - if (pci_read_config_dword(dev, vsec + PCI_VNDR_HEADER, - &header) == PCIBIOS_SUCCESSFUL && - PCI_VNDR_HEADER_ID(header) == cap) + ret = pci_read_config_dword(dev, vsec + PCI_VNDR_HEADER, &header); + if (ret != PCIBIOS_SUCCESSFUL) + continue; + + if (PCI_VNDR_HEADER_ID(header) == cap) return vsec; } diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 795534589b98..43159965e09e 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1652,15 +1652,15 @@ static void pci_set_removable(struct pci_dev *dev) static bool pci_ext_cfg_is_aliased(struct pci_dev *dev) { #ifdef CONFIG_PCI_QUIRKS - int pos; + int pos, ret; u32 header, tmp; pci_read_config_dword(dev, PCI_VENDOR_ID, &header); for (pos = PCI_CFG_SPACE_SIZE; pos < PCI_CFG_SPACE_EXP_SIZE; pos += PCI_CFG_SPACE_SIZE) { - if (pci_read_config_dword(dev, pos, &tmp) != PCIBIOS_SUCCESSFUL - || header != tmp) + ret = pci_read_config_dword(dev, pos, &tmp); + if ((ret != PCIBIOS_SUCCESSFUL) || (header != tmp)) return false; } diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index e3e915329510..7b62797e242b 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -5385,7 +5385,7 @@ int pci_dev_specific_disable_acs_redir(struct pci_dev *dev) */ static void quirk_intel_qat_vf_cap(struct pci_dev *pdev) { - int pos, i = 0; + int pos, i = 0, ret; u8 next_cap; u16 reg16, *cap; struct pci_cap_saved_state *state; @@ -5431,8 +5431,8 @@ static void quirk_intel_qat_vf_cap(struct pci_dev *pdev) pdev->pcie_mpss = reg16 & PCI_EXP_DEVCAP_PAYLOAD; pdev->cfg_size = PCI_CFG_SPACE_EXP_SIZE; - if (pci_read_config_dword(pdev, PCI_CFG_SPACE_SIZE, &status) != - PCIBIOS_SUCCESSFUL || (status == 0xffffffff)) + ret = pci_read_config_dword(pdev, PCI_CFG_SPACE_SIZE, &status); + if ((ret != PCIBIOS_SUCCESSFUL) || (PCI_POSSIBLE_ERROR(status))) pdev->cfg_size = PCI_CFG_SPACE_SIZE; if (pci_find_saved_cap(pdev, PCI_CAP_ID_EXP)) From 6b9ecf4e1032e645873933e5b43cbb84cac19106 Mon Sep 17 00:00:00 2001 From: Wenchao Hao Date: Wed, 11 Oct 2023 21:03:50 +0800 Subject: [PATCH 0103/1397] scsi: libfc: Fix potential NULL pointer dereference in fc_lport_ptp_setup() [ Upstream commit 4df105f0ce9f6f30cda4e99f577150d23f0c9c5f ] fc_lport_ptp_setup() did not check the return value of fc_rport_create() which can return NULL and would cause a NULL pointer dereference. Address this issue by checking return value of fc_rport_create() and log error message on fc_rport_create() failed. Signed-off-by: Wenchao Hao Link: https://lore.kernel.org/r/20231011130350.819571-1-haowenchao2@huawei.com Reviewed-by: Simon Horman Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/libfc/fc_lport.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c index 9c02c9523c4d..ab06e9aeb613 100644 --- a/drivers/scsi/libfc/fc_lport.c +++ b/drivers/scsi/libfc/fc_lport.c @@ -241,6 +241,12 @@ static void fc_lport_ptp_setup(struct fc_lport *lport, } mutex_lock(&lport->disc.disc_mutex); lport->ptp_rdata = fc_rport_create(lport, remote_fid); + if (!lport->ptp_rdata) { + printk(KERN_WARNING "libfc: Failed to setup lport 0x%x\n", + lport->port_id); + mutex_unlock(&lport->disc.disc_mutex); + return; + } kref_get(&lport->ptp_rdata->kref); lport->ptp_rdata->ids.port_name = remote_wwpn; lport->ptp_rdata->ids.node_name = remote_wwnn; From d4c5d6fc2b9cced824f3237d3df5534dae708dcc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 19 Sep 2023 15:56:46 +0300 Subject: [PATCH 0104/1397] PCI: Use FIELD_GET() to extract Link Width MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d1f9b39da4a5347150246871325190018cda8cb3 ] Use FIELD_GET() to extract PCIe Negotiated and Maximum Link Width fields instead of custom masking and shifting. Link: https://lore.kernel.org/r/20230919125648.1920-7-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen [bhelgaas: drop duplicate include of ] Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/pci/pci-sysfs.c | 5 ++--- drivers/pci/pci.c | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index d9eede2dbc0e..5a6241044c3c 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -12,7 +12,7 @@ * Modeled after usb's driverfs.c */ - +#include #include #include #include @@ -230,8 +230,7 @@ static ssize_t current_link_width_show(struct device *dev, if (err) return -EINVAL; - return sysfs_emit(buf, "%u\n", - (linkstat & PCI_EXP_LNKSTA_NLW) >> PCI_EXP_LNKSTA_NLW_SHIFT); + return sysfs_emit(buf, "%u\n", FIELD_GET(PCI_EXP_LNKSTA_NLW, linkstat)); } static DEVICE_ATTR_RO(current_link_width); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 5e51e8bd5c13..ec43ebfc24a5 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -6260,8 +6260,7 @@ u32 pcie_bandwidth_available(struct pci_dev *dev, struct pci_dev **limiting_dev, pcie_capability_read_word(dev, PCI_EXP_LNKSTA, &lnksta); next_speed = pcie_link_speed[lnksta & PCI_EXP_LNKSTA_CLS]; - next_width = (lnksta & PCI_EXP_LNKSTA_NLW) >> - PCI_EXP_LNKSTA_NLW_SHIFT; + next_width = FIELD_GET(PCI_EXP_LNKSTA_NLW, lnksta); next_bw = next_width * PCIE_SPEED2MBS_ENC(next_speed); @@ -6333,7 +6332,7 @@ enum pcie_link_width pcie_get_width_cap(struct pci_dev *dev) pcie_capability_read_dword(dev, PCI_EXP_LNKCAP, &lnkcap); if (lnkcap) - return (lnkcap & PCI_EXP_LNKCAP_MLW) >> 4; + return FIELD_GET(PCI_EXP_LNKCAP_MLW, lnkcap); return PCIE_LNK_WIDTH_UNKNOWN; } From d3ca6149eecc76000dbbc2c7ea32fae842a9cdb2 Mon Sep 17 00:00:00 2001 From: Bartosz Pawlowski Date: Fri, 8 Sep 2023 14:36:05 +0000 Subject: [PATCH 0105/1397] PCI: Extract ATS disabling to a helper function [ Upstream commit f18b1137d38c091cc8c16365219f0a1d4a30b3d1 ] Introduce quirk_no_ats() helper function to provide a standard way to disable ATS capability in PCI quirks. Suggested-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230908143606.685930-2-bartosz.pawlowski@intel.com Signed-off-by: Bartosz Pawlowski Signed-off-by: Bjorn Helgaas Reviewed-by: Andy Shevchenko Signed-off-by: Sasha Levin --- drivers/pci/quirks.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 7b62797e242b..8a8f601b5d69 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -5509,6 +5509,12 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0420, quirk_no_ext_tags); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0422, quirk_no_ext_tags); #ifdef CONFIG_PCI_ATS +static void quirk_no_ats(struct pci_dev *pdev) +{ + pci_info(pdev, "disabling ATS\n"); + pdev->ats_cap = 0; +} + /* * Some devices require additional driver setup to enable ATS. Don't use * ATS for those devices as ATS will be enabled before the driver has had a @@ -5522,14 +5528,10 @@ static void quirk_amd_harvest_no_ats(struct pci_dev *pdev) (pdev->subsystem_device == 0xce19 || pdev->subsystem_device == 0xcc10 || pdev->subsystem_device == 0xcc08)) - goto no_ats; - else - return; + quirk_no_ats(pdev); + } else { + quirk_no_ats(pdev); } - -no_ats: - pci_info(pdev, "disabling ATS\n"); - pdev->ats_cap = 0; } /* AMD Stoney platform GPU */ From 1edfc5bfdefa69a8a6d32314e2014398bbc034b9 Mon Sep 17 00:00:00 2001 From: Bartosz Pawlowski Date: Fri, 8 Sep 2023 14:36:06 +0000 Subject: [PATCH 0106/1397] PCI: Disable ATS for specific Intel IPU E2000 devices [ Upstream commit a18615b1cfc04f00548c60eb9a77e0ce56e848fd ] Due to a hardware issue in A and B steppings of Intel IPU E2000, it expects wrong endianness in ATS invalidation message body. This problem can lead to outdated translations being returned as valid and finally cause system instability. To prevent such issues, add quirk_intel_e2000_no_ats() to disable ATS for vulnerable IPU E2000 devices. Link: https://lore.kernel.org/r/20230908143606.685930-3-bartosz.pawlowski@intel.com Signed-off-by: Bartosz Pawlowski Signed-off-by: Bjorn Helgaas Reviewed-by: Andy Shevchenko Reviewed-by: Alexander Lobakin Signed-off-by: Sasha Levin --- drivers/pci/quirks.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 8a8f601b5d69..3da1a044827d 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -5554,6 +5554,25 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7347, quirk_amd_harvest_no_ats); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x734f, quirk_amd_harvest_no_ats); /* AMD Raven platform iGPU */ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x15d8, quirk_amd_harvest_no_ats); + +/* + * Intel IPU E2000 revisions before C0 implement incorrect endianness + * in ATS Invalidate Request message body. Disable ATS for those devices. + */ +static void quirk_intel_e2000_no_ats(struct pci_dev *pdev) +{ + if (pdev->revision < 0x20) + quirk_no_ats(pdev); +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1451, quirk_intel_e2000_no_ats); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1452, quirk_intel_e2000_no_ats); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1453, quirk_intel_e2000_no_ats); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1454, quirk_intel_e2000_no_ats); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1455, quirk_intel_e2000_no_ats); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1457, quirk_intel_e2000_no_ats); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1459, quirk_intel_e2000_no_ats); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x145a, quirk_intel_e2000_no_ats); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x145c, quirk_intel_e2000_no_ats); #endif /* CONFIG_PCI_ATS */ /* Freescale PCIe doesn't support MSI in RC mode */ From e8bde5eb103c062a4cd195a5039b854bd2e210e9 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 18 Oct 2023 17:56:18 +0900 Subject: [PATCH 0107/1397] PCI: dwc: Add dw_pcie_link_set_max_link_width() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a9a1bcba90254975d4adbcca53f720318cf81c0c ] This is a preparation before adding the Max-Link-width capability setup which would in its turn complete the max-link-width setup procedure defined by Synopsys in the HW-manual. Seeing there is a max-link-speed setup method defined in the DW PCIe core driver it would be good to have a similar function for the link width setup. That's why we need to define a dedicated function first from already implemented but incomplete link-width setting up code. Link: https://lore.kernel.org/linux-pci/20231018085631.1121289-3-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Yoshihiro Shimoda Signed-off-by: Krzysztof Wilczyński Reviewed-by: Manivannan Sadhasivam Reviewed-by: Serge Semin Signed-off-by: Sasha Levin --- drivers/pci/controller/dwc/pcie-designware.c | 86 ++++++++++---------- 1 file changed, 41 insertions(+), 45 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c index 1c1c7348972b..da4aba4aee62 100644 --- a/drivers/pci/controller/dwc/pcie-designware.c +++ b/drivers/pci/controller/dwc/pcie-designware.c @@ -732,6 +732,46 @@ static void dw_pcie_link_set_max_speed(struct dw_pcie *pci, u32 link_gen) } +static void dw_pcie_link_set_max_link_width(struct dw_pcie *pci, u32 num_lanes) +{ + u32 lwsc, plc; + + if (!num_lanes) + return; + + /* Set the number of lanes */ + plc = dw_pcie_readl_dbi(pci, PCIE_PORT_LINK_CONTROL); + plc &= ~PORT_LINK_FAST_LINK_MODE; + plc &= ~PORT_LINK_MODE_MASK; + + /* Set link width speed control register */ + lwsc = dw_pcie_readl_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL); + lwsc &= ~PORT_LOGIC_LINK_WIDTH_MASK; + switch (num_lanes) { + case 1: + plc |= PORT_LINK_MODE_1_LANES; + lwsc |= PORT_LOGIC_LINK_WIDTH_1_LANES; + break; + case 2: + plc |= PORT_LINK_MODE_2_LANES; + lwsc |= PORT_LOGIC_LINK_WIDTH_2_LANES; + break; + case 4: + plc |= PORT_LINK_MODE_4_LANES; + lwsc |= PORT_LOGIC_LINK_WIDTH_4_LANES; + break; + case 8: + plc |= PORT_LINK_MODE_8_LANES; + lwsc |= PORT_LOGIC_LINK_WIDTH_8_LANES; + break; + default: + dev_err(pci->dev, "num-lanes %u: invalid value\n", num_lanes); + return; + } + dw_pcie_writel_dbi(pci, PCIE_PORT_LINK_CONTROL, plc); + dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, lwsc); +} + void dw_pcie_iatu_detect(struct dw_pcie *pci) { int max_region, ob, ib; @@ -1013,49 +1053,5 @@ void dw_pcie_setup(struct dw_pcie *pci) val |= PORT_LINK_DLL_LINK_EN; dw_pcie_writel_dbi(pci, PCIE_PORT_LINK_CONTROL, val); - if (!pci->num_lanes) { - dev_dbg(pci->dev, "Using h/w default number of lanes\n"); - return; - } - - /* Set the number of lanes */ - val &= ~PORT_LINK_FAST_LINK_MODE; - val &= ~PORT_LINK_MODE_MASK; - switch (pci->num_lanes) { - case 1: - val |= PORT_LINK_MODE_1_LANES; - break; - case 2: - val |= PORT_LINK_MODE_2_LANES; - break; - case 4: - val |= PORT_LINK_MODE_4_LANES; - break; - case 8: - val |= PORT_LINK_MODE_8_LANES; - break; - default: - dev_err(pci->dev, "num-lanes %u: invalid value\n", pci->num_lanes); - return; - } - dw_pcie_writel_dbi(pci, PCIE_PORT_LINK_CONTROL, val); - - /* Set link width speed control register */ - val = dw_pcie_readl_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL); - val &= ~PORT_LOGIC_LINK_WIDTH_MASK; - switch (pci->num_lanes) { - case 1: - val |= PORT_LOGIC_LINK_WIDTH_1_LANES; - break; - case 2: - val |= PORT_LOGIC_LINK_WIDTH_2_LANES; - break; - case 4: - val |= PORT_LOGIC_LINK_WIDTH_4_LANES; - break; - case 8: - val |= PORT_LOGIC_LINK_WIDTH_8_LANES; - break; - } - dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, val); + dw_pcie_link_set_max_link_width(pci, pci->num_lanes); } From f3dda1cf33b481c22412002e5ee361aa6dbcbd75 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 18 Oct 2023 17:56:19 +0900 Subject: [PATCH 0108/1397] PCI: dwc: Add missing PCI_EXP_LNKCAP_MLW handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 89db0793c9f2da265ecb6c1681f899d9af157f37 ] Update dw_pcie_link_set_max_link_width() to set PCI_EXP_LNKCAP_MLW. In accordance with the DW PCIe RC/EP HW manuals [1,2,3,...] aside with the PORT_LINK_CTRL_OFF.LINK_CAPABLE and GEN2_CTRL_OFF.NUM_OF_LANES[8:0] field there is another one which needs to be updated. It's LINK_CAPABILITIES_REG.PCIE_CAP_MAX_LINK_WIDTH. If it isn't done at the very least the maximum link-width capability CSR won't expose the actual maximum capability. [1] DesignWare Cores PCI Express Controller Databook - DWC PCIe Root Port, Version 4.60a, March 2015, p.1032 [2] DesignWare Cores PCI Express Controller Databook - DWC PCIe Root Port, Version 4.70a, March 2016, p.1065 [3] DesignWare Cores PCI Express Controller Databook - DWC PCIe Root Port, Version 4.90a, March 2016, p.1057 ... [X] DesignWare Cores PCI Express Controller Databook - DWC PCIe Endpoint, Version 5.40a, March 2019, p.1396 [X+1] DesignWare Cores PCI Express Controller Databook - DWC PCIe Root Port, Version 5.40a, March 2019, p.1266 Suggested-by: Serge Semin Link: https://lore.kernel.org/linux-pci/20231018085631.1121289-4-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Yoshihiro Shimoda Signed-off-by: Krzysztof Wilczyński Reviewed-by: Manivannan Sadhasivam Reviewed-by: Serge Semin Signed-off-by: Sasha Levin --- drivers/pci/controller/dwc/pcie-designware.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c index da4aba4aee62..2b60d20dfdf5 100644 --- a/drivers/pci/controller/dwc/pcie-designware.c +++ b/drivers/pci/controller/dwc/pcie-designware.c @@ -734,7 +734,8 @@ static void dw_pcie_link_set_max_speed(struct dw_pcie *pci, u32 link_gen) static void dw_pcie_link_set_max_link_width(struct dw_pcie *pci, u32 num_lanes) { - u32 lwsc, plc; + u32 lnkcap, lwsc, plc; + u8 cap; if (!num_lanes) return; @@ -770,6 +771,12 @@ static void dw_pcie_link_set_max_link_width(struct dw_pcie *pci, u32 num_lanes) } dw_pcie_writel_dbi(pci, PCIE_PORT_LINK_CONTROL, plc); dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, lwsc); + + cap = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP); + lnkcap = dw_pcie_readl_dbi(pci, cap + PCI_EXP_LNKCAP); + lnkcap &= ~PCI_EXP_LNKCAP_MLW; + lnkcap |= FIELD_PREP(PCI_EXP_LNKCAP_MLW, num_lanes); + dw_pcie_writel_dbi(pci, cap + PCI_EXP_LNKCAP, lnkcap); } void dw_pcie_iatu_detect(struct dw_pcie *pci) From f22145b55283cea2096a3c91614668354380820f Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 18 Oct 2023 17:56:31 +0900 Subject: [PATCH 0109/1397] misc: pci_endpoint_test: Add Device ID for R-Car S4-8 PCIe controller MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6c4b39937f4e65688ea294725ae432b2565821ff ] Add Renesas R8A779F0 in pci_device_id table so that pci-epf-test can be used for testing PCIe EP on R-Car S4-8. Link: https://lore.kernel.org/linux-pci/20231018085631.1121289-16-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Yoshihiro Shimoda Signed-off-by: Krzysztof Wilczyński Acked-by: Manivannan Sadhasivam Signed-off-by: Sasha Levin --- drivers/misc/pci_endpoint_test.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c index 7e1acc68d435..af519088732d 100644 --- a/drivers/misc/pci_endpoint_test.c +++ b/drivers/misc/pci_endpoint_test.c @@ -82,6 +82,7 @@ #define PCI_DEVICE_ID_RENESAS_R8A774B1 0x002b #define PCI_DEVICE_ID_RENESAS_R8A774C0 0x002d #define PCI_DEVICE_ID_RENESAS_R8A774E1 0x0025 +#define PCI_DEVICE_ID_RENESAS_R8A779F0 0x0031 static DEFINE_IDA(pci_endpoint_test_ida); @@ -991,6 +992,9 @@ static const struct pci_device_id pci_endpoint_test_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_RENESAS, PCI_DEVICE_ID_RENESAS_R8A774B1),}, { PCI_DEVICE(PCI_VENDOR_ID_RENESAS, PCI_DEVICE_ID_RENESAS_R8A774C0),}, { PCI_DEVICE(PCI_VENDOR_ID_RENESAS, PCI_DEVICE_ID_RENESAS_R8A774E1),}, + { PCI_DEVICE(PCI_VENDOR_ID_RENESAS, PCI_DEVICE_ID_RENESAS_R8A779F0), + .driver_data = (kernel_ulong_t)&default_data, + }, { PCI_DEVICE(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_J721E), .driver_data = (kernel_ulong_t)&j721e_data, }, From 58c91b699c807673ecafe7c4ab10a727310970d5 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 10 Oct 2023 15:44:28 -0500 Subject: [PATCH 0110/1397] PCI: Use FIELD_GET() in Sapphire RX 5600 XT Pulse quirk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 04e82fa5951ca66495d7b05665eff673aa3852b4 ] Use FIELD_GET() to remove dependences on the field position, i.e., the shift value. No functional change intended. Separate because this isn't as trivial as the other FIELD_GET() changes. See 907830b0fc9e ("PCI: Add a REBAR size quirk for Sapphire RX 5600 XT Pulse") Link: https://lore.kernel.org/r/20231010204436.1000644-3-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Reviewed-by: Ilpo Järvinen Reviewed-by: Jonathan Cameron Reviewed-by: Kuppuswamy Sathyanarayanan Cc: Nirmoy Das Signed-off-by: Sasha Levin --- drivers/pci/pci.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index ec43ebfc24a5..a607f277ccf1 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3755,14 +3755,14 @@ u32 pci_rebar_get_possible_sizes(struct pci_dev *pdev, int bar) return 0; pci_read_config_dword(pdev, pos + PCI_REBAR_CAP, &cap); - cap &= PCI_REBAR_CAP_SIZES; + cap = FIELD_GET(PCI_REBAR_CAP_SIZES, cap); /* Sapphire RX 5600 XT Pulse has an invalid cap dword for BAR 0 */ if (pdev->vendor == PCI_VENDOR_ID_ATI && pdev->device == 0x731f && - bar == 0 && cap == 0x7000) - cap = 0x3f000; + bar == 0 && cap == 0x700) + return 0x3f00; - return cap >> 4; + return cap; } EXPORT_SYMBOL(pci_rebar_get_possible_sizes); From 80be0425eef9aac8995097e1810334689db865e0 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 21 Oct 2023 23:15:28 +0200 Subject: [PATCH 0111/1397] ASoC: Intel: soc-acpi-cht: Add Lenovo Yoga Tab 3 Pro YT3-X90 quirk [ Upstream commit 2cb54788393134d8174ee594002baae3ce52c61e ] The Lenovo Yoga Tab 3 Pro YT3-X90 x86 tablet, which ships with Android with a custom kernel as factory OS, does not list the used WM5102 codec inside its DSDT. Workaround this with a new snd_soc_acpi_intel_baytrail_machines[] entry which matches on the SST id instead of the codec id like nocodec does, combined with using a machine_quirk callback which returns NULL on other machines to skip the new entry on other machines. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20231021211534.114991-1-hdegoede@redhat.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- .../intel/common/soc-acpi-intel-cht-match.c | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/sound/soc/intel/common/soc-acpi-intel-cht-match.c b/sound/soc/intel/common/soc-acpi-intel-cht-match.c index cdcbf04b8832..5e2ec60e2954 100644 --- a/sound/soc/intel/common/soc-acpi-intel-cht-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-cht-match.c @@ -75,6 +75,39 @@ static struct snd_soc_acpi_mach *cht_ess8316_quirk(void *arg) return arg; } +/* + * The Lenovo Yoga Tab 3 Pro YT3-X90, with Android factory OS has a buggy DSDT + * with the coded not being listed at all. + */ +static const struct dmi_system_id lenovo_yoga_tab3_x90[] = { + { + /* Lenovo Yoga Tab 3 Pro YT3-X90, codec missing from DSDT */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Intel Corporation"), + DMI_MATCH(DMI_PRODUCT_NAME, "CHERRYVIEW D1 PLATFORM"), + DMI_MATCH(DMI_PRODUCT_VERSION, "Blade3-10A-001"), + }, + }, + { } +}; + +static struct snd_soc_acpi_mach cht_lenovo_yoga_tab3_x90_mach = { + .id = "10WM5102", + .drv_name = "bytcr_wm5102", + .fw_filename = "intel/fw_sst_22a8.bin", + .board = "bytcr_wm5102", + .sof_tplg_filename = "sof-cht-wm5102.tplg", +}; + +static struct snd_soc_acpi_mach *lenovo_yt3_x90_quirk(void *arg) +{ + if (dmi_check_system(lenovo_yoga_tab3_x90)) + return &cht_lenovo_yoga_tab3_x90_mach; + + /* Skip wildcard match snd_soc_acpi_intel_cherrytrail_machines[] entry */ + return NULL; +} + static const struct snd_soc_acpi_codecs rt5640_comp_ids = { .num_codecs = 2, .codecs = { "10EC5640", "10EC3276" }, @@ -175,6 +208,16 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_cherrytrail_machines[] = { .drv_name = "sof_pcm512x", .sof_tplg_filename = "sof-cht-src-50khz-pcm512x.tplg", }, + /* + * Special case for the Lenovo Yoga Tab 3 Pro YT3-X90 where the DSDT + * misses the codec. Match on the SST id instead, lenovo_yt3_x90_quirk() + * will return a YT3 specific mach or NULL when called on other hw, + * skipping this entry. + */ + { + .id = "808622A8", + .machine_quirk = lenovo_yt3_x90_quirk, + }, #if IS_ENABLED(CONFIG_SND_SOC_INTEL_BYT_CHT_NOCODEC_MACH) /* From c7f514e2663e402dbd612f93e76d96be055d4b27 Mon Sep 17 00:00:00 2001 From: Longfang Liu Date: Fri, 20 Oct 2023 17:35:58 +0800 Subject: [PATCH 0112/1397] crypto: hisilicon/qm - prevent soft lockup in receive loop [ Upstream commit 33fc506d2ac514be1072499a263c3bff8c7c95a0 ] In the scenario where the accelerator business is fully loaded. When the workqueue receiving messages and performing callback processing, there are a large number of messages that need to be received, and there are continuously messages that have been processed and need to be received. This will cause the receive loop here to be locked for a long time. This scenario will cause watchdog timeout problems on OS with kernel preemption turned off. The error logs: watchdog: BUG: soft lockup - CPU#23 stuck for 23s! [kworker/u262:1:1407] [ 1461.978428][ C23] Call trace: [ 1461.981890][ C23] complete+0x8c/0xf0 [ 1461.986031][ C23] kcryptd_async_done+0x154/0x1f4 [dm_crypt] [ 1461.992154][ C23] sec_skcipher_callback+0x7c/0xf4 [hisi_sec2] [ 1461.998446][ C23] sec_req_cb+0x104/0x1f4 [hisi_sec2] [ 1462.003950][ C23] qm_poll_req_cb+0xcc/0x150 [hisi_qm] [ 1462.009531][ C23] qm_work_process+0x60/0xc0 [hisi_qm] [ 1462.015101][ C23] process_one_work+0x1c4/0x470 [ 1462.020052][ C23] worker_thread+0x150/0x3c4 [ 1462.024735][ C23] kthread+0x108/0x13c [ 1462.028889][ C23] ret_from_fork+0x10/0x18 Therefore, it is necessary to add an actively scheduled operation in the while loop to prevent this problem. After adding it, no matter whether the OS turns on or off the kernel preemption function. Neither will cause watchdog timeout issues. Signed-off-by: Longfang Liu Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/hisilicon/qm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 1638c0a7df31..193b0b3a77cd 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -847,6 +847,8 @@ static void qm_poll_req_cb(struct hisi_qp *qp) qm_db(qm, qp->qp_id, QM_DOORBELL_CMD_CQ, qp->qp_status.cq_head, 0); atomic_dec(&qp->qp_status.used); + + cond_resched(); } /* set c_flag */ From 802785faf1e72ba8a063cc1a8a67c77a9c67f3db Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Fri, 27 Oct 2023 15:32:09 +0200 Subject: [PATCH 0113/1397] HID: Add quirk for Dell Pro Wireless Keyboard and Mouse KM5221W [ Upstream commit 62cc9c3cb3ec1bf31cc116146185ed97b450836a ] This device needs ALWAYS_POLL quirk, otherwise it keeps reconnecting indefinitely. Reported-by: Robert Ayrapetyan Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-quirks.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index e4d2dfd5d253..f7973ccd84a2 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -366,6 +366,7 @@ #define USB_VENDOR_ID_DELL 0x413c #define USB_DEVICE_ID_DELL_PIXART_USB_OPTICAL_MOUSE 0x301a +#define USB_DEVICE_ID_DELL_PRO_WIRELESS_KM5221W 0x4503 #define USB_VENDOR_ID_DELORME 0x1163 #define USB_DEVICE_ID_DELORME_EARTHMATE 0x0100 diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index 3983b4f282f8..5a48fcaa32f0 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -66,6 +66,7 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_STRAFE), HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_DELL, USB_DEVICE_ID_DELL_PIXART_USB_OPTICAL_MOUSE), HID_QUIRK_ALWAYS_POLL }, + { HID_USB_DEVICE(USB_VENDOR_ID_DELL, USB_DEVICE_ID_DELL_PRO_WIRELESS_KM5221W), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_DMI, USB_DEVICE_ID_DMI_ENC), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_DRACAL_RAPHNET, USB_DEVICE_ID_RAPHNET_2NES2SNES), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_DRACAL_RAPHNET, USB_DEVICE_ID_RAPHNET_4NES4SNES), HID_QUIRK_MULTI_INPUT }, From e835d150b1a437ed516f8add3142173403e9f27c Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Thu, 20 Jul 2023 14:23:08 +0800 Subject: [PATCH 0114/1397] exfat: support handle zero-size directory [ Upstream commit dab48b8f2fe7264d51ec9eed0adea0fe3c78830a ] After repairing a corrupted file system with exfatprogs' fsck.exfat, zero-size directories may result. It is also possible to create zero-size directories in other exFAT implementation, such as Paragon ufsd dirver. As described in the specification, the lower directory size limits is 0 bytes. Without this commit, sub-directories and files cannot be created under a zero-size directory, and it cannot be removed. Signed-off-by: Yuezhang Mo Reviewed-by: Andy Wu Reviewed-by: Aoyama Wataru Signed-off-by: Namjae Jeon Signed-off-by: Sasha Levin --- fs/exfat/namei.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index 1b9f587f6cca..95c51b025b91 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -351,14 +351,20 @@ static int exfat_find_empty_entry(struct inode *inode, if (exfat_check_max_dentries(inode)) return -ENOSPC; - /* we trust p_dir->size regardless of FAT type */ - if (exfat_find_last_cluster(sb, p_dir, &last_clu)) - return -EIO; - /* * Allocate new cluster to this directory */ - exfat_chain_set(&clu, last_clu + 1, 0, p_dir->flags); + if (ei->start_clu != EXFAT_EOF_CLUSTER) { + /* we trust p_dir->size regardless of FAT type */ + if (exfat_find_last_cluster(sb, p_dir, &last_clu)) + return -EIO; + + exfat_chain_set(&clu, last_clu + 1, 0, p_dir->flags); + } else { + /* This directory is empty */ + exfat_chain_set(&clu, EXFAT_EOF_CLUSTER, 0, + ALLOC_NO_FAT_CHAIN); + } /* allocate a cluster */ ret = exfat_alloc_cluster(inode, 1, &clu, IS_DIRSYNC(inode)); @@ -368,6 +374,11 @@ static int exfat_find_empty_entry(struct inode *inode, if (exfat_zeroed_cluster(inode, clu.dir)) return -EIO; + if (ei->start_clu == EXFAT_EOF_CLUSTER) { + ei->start_clu = clu.dir; + p_dir->dir = clu.dir; + } + /* append to the FAT chain */ if (clu.flags != p_dir->flags) { /* no-fat-chain bit is disabled, @@ -645,7 +656,7 @@ static int exfat_find(struct inode *dir, struct qstr *qname, info->type = exfat_get_entry_type(ep); info->attr = le16_to_cpu(ep->dentry.file.attr); info->size = le64_to_cpu(ep2->dentry.stream.valid_size); - if ((info->type == TYPE_FILE) && (info->size == 0)) { + if (info->size == 0) { info->flags = ALLOC_NO_FAT_CHAIN; info->start_clu = EXFAT_EOF_CLUSTER; } else { @@ -888,6 +899,9 @@ static int exfat_check_dir_empty(struct super_block *sb, dentries_per_clu = sbi->dentries_per_clu; + if (p_dir->dir == EXFAT_EOF_CLUSTER) + return 0; + exfat_chain_dup(&clu, p_dir); while (clu.dir != EXFAT_EOF_CLUSTER) { @@ -1255,7 +1269,8 @@ static int __exfat_rename(struct inode *old_parent_inode, } /* Free the clusters if new_inode is a dir(as if exfat_rmdir) */ - if (new_entry_type == TYPE_DIR) { + if (new_entry_type == TYPE_DIR && + new_ei->start_clu != EXFAT_EOF_CLUSTER) { /* new_ei, new_clu_to_free */ struct exfat_chain new_clu_to_free; From 6b1e61f7d07044563c497ea7320eb5c7ec446860 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Mon, 2 Oct 2023 11:33:44 +0300 Subject: [PATCH 0115/1397] mfd: intel-lpss: Add Intel Lunar Lake-M PCI IDs [ Upstream commit e53b22b10c6e0de0cf2a03a92b18fdad70f266c7 ] Add Intel Lunar Lake-M SoC PCI IDs. Signed-off-by: Jarkko Nikula Link: https://lore.kernel.org/r/20231002083344.75611-1-jarkko.nikula@linux.intel.com Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/mfd/intel-lpss-pci.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/mfd/intel-lpss-pci.c b/drivers/mfd/intel-lpss-pci.c index 699f44ffff0e..ae5759200622 100644 --- a/drivers/mfd/intel-lpss-pci.c +++ b/drivers/mfd/intel-lpss-pci.c @@ -561,6 +561,19 @@ static const struct pci_device_id intel_lpss_pci_ids[] = { { PCI_VDEVICE(INTEL, 0xa3e2), (kernel_ulong_t)&spt_i2c_info }, { PCI_VDEVICE(INTEL, 0xa3e3), (kernel_ulong_t)&spt_i2c_info }, { PCI_VDEVICE(INTEL, 0xa3e6), (kernel_ulong_t)&spt_uart_info }, + /* LNL-M */ + { PCI_VDEVICE(INTEL, 0xa825), (kernel_ulong_t)&bxt_uart_info }, + { PCI_VDEVICE(INTEL, 0xa826), (kernel_ulong_t)&bxt_uart_info }, + { PCI_VDEVICE(INTEL, 0xa827), (kernel_ulong_t)&tgl_info }, + { PCI_VDEVICE(INTEL, 0xa830), (kernel_ulong_t)&tgl_info }, + { PCI_VDEVICE(INTEL, 0xa846), (kernel_ulong_t)&tgl_info }, + { PCI_VDEVICE(INTEL, 0xa850), (kernel_ulong_t)&ehl_i2c_info }, + { PCI_VDEVICE(INTEL, 0xa851), (kernel_ulong_t)&ehl_i2c_info }, + { PCI_VDEVICE(INTEL, 0xa852), (kernel_ulong_t)&bxt_uart_info }, + { PCI_VDEVICE(INTEL, 0xa878), (kernel_ulong_t)&ehl_i2c_info }, + { PCI_VDEVICE(INTEL, 0xa879), (kernel_ulong_t)&ehl_i2c_info }, + { PCI_VDEVICE(INTEL, 0xa87a), (kernel_ulong_t)&ehl_i2c_info }, + { PCI_VDEVICE(INTEL, 0xa87b), (kernel_ulong_t)&ehl_i2c_info }, { } }; MODULE_DEVICE_TABLE(pci, intel_lpss_pci_ids); From 5b82e4240533bcd4691e50b64ec86d0d7fbd21b9 Mon Sep 17 00:00:00 2001 From: Zhang Shurong Date: Sat, 15 Jul 2023 23:55:50 +0800 Subject: [PATCH 0116/1397] iio: adc: stm32-adc: harden against NULL pointer deref in stm32_adc_probe() [ Upstream commit 3a23b384e7e3d64d5587ad10729a34d4f761517e ] of_match_device() may fail and returns a NULL pointer. In practice there is no known reasonable way to trigger this, but in case one is added in future, harden the code by adding the check Signed-off-by: Zhang Shurong Link: https://lore.kernel.org/r/tencent_994DA85912C937E3B5405BA960B31ED90A08@qq.com Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/adc/stm32-adc-core.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/stm32-adc-core.c b/drivers/iio/adc/stm32-adc-core.c index 2f082006550f..bbd5bdd732f0 100644 --- a/drivers/iio/adc/stm32-adc-core.c +++ b/drivers/iio/adc/stm32-adc-core.c @@ -708,6 +708,8 @@ static int stm32_adc_probe(struct platform_device *pdev) struct stm32_adc_priv *priv; struct device *dev = &pdev->dev; struct device_node *np = pdev->dev.of_node; + const struct of_device_id *of_id; + struct resource *res; u32 max_rate; int ret; @@ -720,8 +722,11 @@ static int stm32_adc_probe(struct platform_device *pdev) return -ENOMEM; platform_set_drvdata(pdev, &priv->common); - priv->cfg = (const struct stm32_adc_priv_cfg *) - of_match_device(dev->driver->of_match_table, dev)->data; + of_id = of_match_device(dev->driver->of_match_table, dev); + if (!of_id) + return -ENODEV; + + priv->cfg = (const struct stm32_adc_priv_cfg *)of_id->data; priv->nb_adc_max = priv->cfg->num_adcs; spin_lock_init(&priv->common.lock); From 3097a09f08f0b055e773e64af0232e668a0a6425 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Fri, 25 Aug 2023 10:10:35 +0300 Subject: [PATCH 0117/1397] thunderbolt: Apply USB 3.x bandwidth quirk only in software connection manager [ Upstream commit 0c35ac18256942e66d8dab6ca049185812e60c69 ] This is not needed when firmware connection manager is run so limit this to software connection manager. Signed-off-by: Mika Westerberg Signed-off-by: Sasha Levin --- drivers/thunderbolt/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/thunderbolt/quirks.c b/drivers/thunderbolt/quirks.c index 488138a28ae1..e6bfa63b40ae 100644 --- a/drivers/thunderbolt/quirks.c +++ b/drivers/thunderbolt/quirks.c @@ -31,6 +31,9 @@ static void quirk_usb3_maximum_bandwidth(struct tb_switch *sw) { struct tb_port *port; + if (tb_switch_is_icm(sw)) + return; + tb_switch_for_each_port(sw, port) { if (!tb_port_is_usb3_down(port)) continue; From 8f8771757b130383732195497e47fba2aba76d3a Mon Sep 17 00:00:00 2001 From: Yi Yang Date: Mon, 4 Sep 2023 11:52:20 +0800 Subject: [PATCH 0118/1397] tty: vcc: Add check for kstrdup() in vcc_probe() [ Upstream commit d81ffb87aaa75f842cd7aa57091810353755b3e6 ] Add check for the return value of kstrdup() and return the error, if it fails in order to avoid NULL pointer dereference. Signed-off-by: Yi Yang Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/20230904035220.48164-1-yiyang13@huawei.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/vcc.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/tty/vcc.c b/drivers/tty/vcc.c index a39ed981bfd3..5b625f20233b 100644 --- a/drivers/tty/vcc.c +++ b/drivers/tty/vcc.c @@ -579,18 +579,22 @@ static int vcc_probe(struct vio_dev *vdev, const struct vio_device_id *id) return -ENOMEM; name = kstrdup(dev_name(&vdev->dev), GFP_KERNEL); + if (!name) { + rv = -ENOMEM; + goto free_port; + } rv = vio_driver_init(&port->vio, vdev, VDEV_CONSOLE_CON, vcc_versions, ARRAY_SIZE(vcc_versions), NULL, name); if (rv) - goto free_port; + goto free_name; port->vio.debug = vcc_dbg_vio; vcc_ldc_cfg.debug = vcc_dbg_ldc; rv = vio_ldc_alloc(&port->vio, &vcc_ldc_cfg, port); if (rv) - goto free_port; + goto free_name; spin_lock_init(&port->lock); @@ -624,6 +628,11 @@ static int vcc_probe(struct vio_dev *vdev, const struct vio_device_id *id) goto unreg_tty; } port->domain = kstrdup(domain, GFP_KERNEL); + if (!port->domain) { + rv = -ENOMEM; + goto unreg_tty; + } + mdesc_release(hp); @@ -653,8 +662,9 @@ static int vcc_probe(struct vio_dev *vdev, const struct vio_device_id *id) vcc_table_remove(port->index); free_ldc: vio_ldc_free(&port->vio); -free_port: +free_name: kfree(name); +free_port: kfree(port); return rv; From de77703a0a81368a611598c89d67eaafe8c0fa78 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Wed, 13 Sep 2023 11:53:23 +0200 Subject: [PATCH 0119/1397] dt-bindings: phy: qcom,snps-eusb2-repeater: Add magic tuning overrides [ Upstream commit c20b59b2996c89c4f072c3312e6210528a298330 ] The EUSB2 repeater requires some alterations to its init sequence, depending on board design. Add support for making the necessary changes to that sequence to make USB functional on SM8550-based Xperia 1 V. They all have lackluster description due to lack of information. Signed-off-by: Konrad Dybcio Acked-by: Rob Herring Link: https://lore.kernel.org/r/20230830-topic-eusb2_override-v2-1-7d8c893d93f6@linaro.org Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- .../phy/qcom,snps-eusb2-repeater.yaml | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/Documentation/devicetree/bindings/phy/qcom,snps-eusb2-repeater.yaml b/Documentation/devicetree/bindings/phy/qcom,snps-eusb2-repeater.yaml index 029569d5fcf3..24c733c10e0e 100644 --- a/Documentation/devicetree/bindings/phy/qcom,snps-eusb2-repeater.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,snps-eusb2-repeater.yaml @@ -32,6 +32,27 @@ properties: vdd3-supply: true + qcom,tune-usb2-disc-thres: + $ref: /schemas/types.yaml#/definitions/uint8 + description: High-Speed disconnect threshold + minimum: 0 + maximum: 7 + default: 0 + + qcom,tune-usb2-amplitude: + $ref: /schemas/types.yaml#/definitions/uint8 + description: High-Speed trasmit amplitude + minimum: 0 + maximum: 15 + default: 8 + + qcom,tune-usb2-preem: + $ref: /schemas/types.yaml#/definitions/uint8 + description: High-Speed TX pre-emphasis tuning + minimum: 0 + maximum: 7 + default: 5 + required: - compatible - reg From 36cc3bd88636f261eb383e31a36d0bf16e2470ad Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Wed, 13 Sep 2023 11:53:24 +0200 Subject: [PATCH 0120/1397] phy: qualcomm: phy-qcom-eusb2-repeater: Use regmap_fields [ Upstream commit 4ba2e52718c0ce4ece6a269bec84319c355c030f ] Switch to regmap_fields, so that the values written into registers are sanitized by their explicit sizes and the different registers are structured in an iterable object to make external changes to the init sequence simpler. Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20230830-topic-eusb2_override-v2-2-7d8c893d93f6@linaro.org Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- .../phy/qualcomm/phy-qcom-eusb2-repeater.c | 91 +++++++++++++------ 1 file changed, 61 insertions(+), 30 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c b/drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c index 52c275fbb2a1..7561da01e633 100644 --- a/drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c +++ b/drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c @@ -28,14 +28,42 @@ #define EUSB2_TUNE_SQUELCH_U 0x54 #define EUSB2_TUNE_USB2_PREEM 0x57 -#define QCOM_EUSB2_REPEATER_INIT_CFG(o, v) \ +#define QCOM_EUSB2_REPEATER_INIT_CFG(r, v) \ { \ - .offset = o, \ + .reg = r, \ .val = v, \ } +enum reg_fields { + F_TUNE_USB2_PREEM, + F_TUNE_SQUELCH_U, + F_TUNE_IUSB2, + F_NUM_TUNE_FIELDS, + + F_FORCE_VAL_5 = F_NUM_TUNE_FIELDS, + F_FORCE_EN_5, + + F_EN_CTL1, + + F_RPTR_STATUS, + F_NUM_FIELDS, +}; + +static struct reg_field eusb2_repeater_tune_reg_fields[F_NUM_FIELDS] = { + [F_TUNE_USB2_PREEM] = REG_FIELD(EUSB2_TUNE_USB2_PREEM, 0, 2), + [F_TUNE_SQUELCH_U] = REG_FIELD(EUSB2_TUNE_SQUELCH_U, 0, 2), + [F_TUNE_IUSB2] = REG_FIELD(EUSB2_TUNE_IUSB2, 0, 3), + + [F_FORCE_VAL_5] = REG_FIELD(EUSB2_FORCE_VAL_5, 0, 7), + [F_FORCE_EN_5] = REG_FIELD(EUSB2_FORCE_EN_5, 0, 7), + + [F_EN_CTL1] = REG_FIELD(EUSB2_EN_CTL1, 0, 7), + + [F_RPTR_STATUS] = REG_FIELD(EUSB2_RPTR_STATUS, 0, 7), +}; + struct eusb2_repeater_init_tbl { - unsigned int offset; + unsigned int reg; unsigned int val; }; @@ -48,11 +76,10 @@ struct eusb2_repeater_cfg { struct eusb2_repeater { struct device *dev; - struct regmap *regmap; + struct regmap_field *regs[F_NUM_FIELDS]; struct phy *phy; struct regulator_bulk_data *vregs; const struct eusb2_repeater_cfg *cfg; - u16 base; enum phy_mode mode; }; @@ -61,9 +88,9 @@ static const char * const pm8550b_vreg_l[] = { }; static const struct eusb2_repeater_init_tbl pm8550b_init_tbl[] = { - QCOM_EUSB2_REPEATER_INIT_CFG(EUSB2_TUNE_IUSB2, 0x8), - QCOM_EUSB2_REPEATER_INIT_CFG(EUSB2_TUNE_SQUELCH_U, 0x3), - QCOM_EUSB2_REPEATER_INIT_CFG(EUSB2_TUNE_USB2_PREEM, 0x5), + QCOM_EUSB2_REPEATER_INIT_CFG(F_TUNE_IUSB2, 0x8), + QCOM_EUSB2_REPEATER_INIT_CFG(F_TUNE_SQUELCH_U, 0x3), + QCOM_EUSB2_REPEATER_INIT_CFG(F_TUNE_USB2_PREEM, 0x5), }; static const struct eusb2_repeater_cfg pm8550b_eusb2_cfg = { @@ -93,7 +120,6 @@ static int eusb2_repeater_init(struct phy *phy) { struct eusb2_repeater *rptr = phy_get_drvdata(phy); const struct eusb2_repeater_init_tbl *init_tbl = rptr->cfg->init_tbl; - int num = rptr->cfg->init_tbl_num; u32 val; int ret; int i; @@ -102,17 +128,14 @@ static int eusb2_repeater_init(struct phy *phy) if (ret) return ret; - regmap_update_bits(rptr->regmap, rptr->base + EUSB2_EN_CTL1, - EUSB2_RPTR_EN, EUSB2_RPTR_EN); + regmap_field_update_bits(rptr->regs[F_EN_CTL1], EUSB2_RPTR_EN, EUSB2_RPTR_EN); - for (i = 0; i < num; i++) - regmap_update_bits(rptr->regmap, - rptr->base + init_tbl[i].offset, - init_tbl[i].val, init_tbl[i].val); + for (i = 0; i < rptr->cfg->init_tbl_num; i++) + regmap_field_update_bits(rptr->regs[init_tbl[i].reg], + init_tbl[i].val, init_tbl[i].val); - ret = regmap_read_poll_timeout(rptr->regmap, - rptr->base + EUSB2_RPTR_STATUS, val, - val & RPTR_OK, 10, 5); + ret = regmap_field_read_poll_timeout(rptr->regs[F_RPTR_STATUS], + val, val & RPTR_OK, 10, 5); if (ret) dev_err(rptr->dev, "initialization timed-out\n"); @@ -131,10 +154,10 @@ static int eusb2_repeater_set_mode(struct phy *phy, * per eUSB 1.2 Spec. Below implement software workaround until * PHY and controller is fixing seen observation. */ - regmap_update_bits(rptr->regmap, rptr->base + EUSB2_FORCE_EN_5, - F_CLK_19P2M_EN, F_CLK_19P2M_EN); - regmap_update_bits(rptr->regmap, rptr->base + EUSB2_FORCE_VAL_5, - V_CLK_19P2M_EN, V_CLK_19P2M_EN); + regmap_field_update_bits(rptr->regs[F_FORCE_EN_5], + F_CLK_19P2M_EN, F_CLK_19P2M_EN); + regmap_field_update_bits(rptr->regs[F_FORCE_VAL_5], + V_CLK_19P2M_EN, V_CLK_19P2M_EN); break; case PHY_MODE_USB_DEVICE: /* @@ -143,10 +166,10 @@ static int eusb2_repeater_set_mode(struct phy *phy, * repeater doesn't clear previous value due to shared * regulators (say host <-> device mode switch). */ - regmap_update_bits(rptr->regmap, rptr->base + EUSB2_FORCE_EN_5, - F_CLK_19P2M_EN, 0); - regmap_update_bits(rptr->regmap, rptr->base + EUSB2_FORCE_VAL_5, - V_CLK_19P2M_EN, 0); + regmap_field_update_bits(rptr->regs[F_FORCE_EN_5], + F_CLK_19P2M_EN, 0); + regmap_field_update_bits(rptr->regs[F_FORCE_VAL_5], + V_CLK_19P2M_EN, 0); break; default: return -EINVAL; @@ -175,8 +198,9 @@ static int eusb2_repeater_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct phy_provider *phy_provider; struct device_node *np = dev->of_node; + struct regmap *regmap; + int i, ret; u32 res; - int ret; rptr = devm_kzalloc(dev, sizeof(*rptr), GFP_KERNEL); if (!rptr) @@ -189,15 +213,22 @@ static int eusb2_repeater_probe(struct platform_device *pdev) if (!rptr->cfg) return -EINVAL; - rptr->regmap = dev_get_regmap(dev->parent, NULL); - if (!rptr->regmap) + regmap = dev_get_regmap(dev->parent, NULL); + if (!regmap) return -ENODEV; ret = of_property_read_u32(np, "reg", &res); if (ret < 0) return ret; - rptr->base = res; + for (i = 0; i < F_NUM_FIELDS; i++) + eusb2_repeater_tune_reg_fields[i].reg += res; + + ret = devm_regmap_field_bulk_alloc(dev, regmap, rptr->regs, + eusb2_repeater_tune_reg_fields, + F_NUM_FIELDS); + if (ret) + return ret; ret = eusb2_repeater_init_vregs(rptr); if (ret < 0) { From 77f1450464558279a5c22b5ecb41b74a5812b001 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Wed, 13 Sep 2023 11:53:25 +0200 Subject: [PATCH 0121/1397] phy: qualcomm: phy-qcom-eusb2-repeater: Zero out untouched tuning regs [ Upstream commit 99a517a582fc1272d1d3cf3b9e671a14d7db77b8 ] The vendor kernel zeroes out all tuning data outside the init sequence as part of initialization. Follow suit to avoid UB. Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20230830-topic-eusb2_override-v2-3-7d8c893d93f6@linaro.org Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- .../phy/qualcomm/phy-qcom-eusb2-repeater.c | 58 ++++++++++++++----- 1 file changed, 44 insertions(+), 14 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c b/drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c index 7561da01e633..d4fb85c20eb0 100644 --- a/drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c +++ b/drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c @@ -24,9 +24,18 @@ #define EUSB2_FORCE_VAL_5 0xeD #define V_CLK_19P2M_EN BIT(6) +#define EUSB2_TUNE_USB2_CROSSOVER 0x50 #define EUSB2_TUNE_IUSB2 0x51 +#define EUSB2_TUNE_RES_FSDIF 0x52 +#define EUSB2_TUNE_HSDISC 0x53 #define EUSB2_TUNE_SQUELCH_U 0x54 +#define EUSB2_TUNE_USB2_SLEW 0x55 +#define EUSB2_TUNE_USB2_EQU 0x56 #define EUSB2_TUNE_USB2_PREEM 0x57 +#define EUSB2_TUNE_USB2_HS_COMP_CUR 0x58 +#define EUSB2_TUNE_EUSB_SLEW 0x59 +#define EUSB2_TUNE_EUSB_EQU 0x5A +#define EUSB2_TUNE_EUSB_HS_COMP_CUR 0x5B #define QCOM_EUSB2_REPEATER_INIT_CFG(r, v) \ { \ @@ -35,9 +44,18 @@ } enum reg_fields { + F_TUNE_EUSB_HS_COMP_CUR, + F_TUNE_EUSB_EQU, + F_TUNE_EUSB_SLEW, + F_TUNE_USB2_HS_COMP_CUR, F_TUNE_USB2_PREEM, + F_TUNE_USB2_EQU, + F_TUNE_USB2_SLEW, F_TUNE_SQUELCH_U, + F_TUNE_HSDISC, + F_TUNE_RES_FSDIF, F_TUNE_IUSB2, + F_TUNE_USB2_CROSSOVER, F_NUM_TUNE_FIELDS, F_FORCE_VAL_5 = F_NUM_TUNE_FIELDS, @@ -50,9 +68,18 @@ enum reg_fields { }; static struct reg_field eusb2_repeater_tune_reg_fields[F_NUM_FIELDS] = { + [F_TUNE_EUSB_HS_COMP_CUR] = REG_FIELD(EUSB2_TUNE_EUSB_HS_COMP_CUR, 0, 1), + [F_TUNE_EUSB_EQU] = REG_FIELD(EUSB2_TUNE_EUSB_EQU, 0, 1), + [F_TUNE_EUSB_SLEW] = REG_FIELD(EUSB2_TUNE_EUSB_SLEW, 0, 1), + [F_TUNE_USB2_HS_COMP_CUR] = REG_FIELD(EUSB2_TUNE_USB2_HS_COMP_CUR, 0, 1), [F_TUNE_USB2_PREEM] = REG_FIELD(EUSB2_TUNE_USB2_PREEM, 0, 2), + [F_TUNE_USB2_EQU] = REG_FIELD(EUSB2_TUNE_USB2_EQU, 0, 1), + [F_TUNE_USB2_SLEW] = REG_FIELD(EUSB2_TUNE_USB2_SLEW, 0, 1), [F_TUNE_SQUELCH_U] = REG_FIELD(EUSB2_TUNE_SQUELCH_U, 0, 2), + [F_TUNE_HSDISC] = REG_FIELD(EUSB2_TUNE_HSDISC, 0, 2), + [F_TUNE_RES_FSDIF] = REG_FIELD(EUSB2_TUNE_RES_FSDIF, 0, 2), [F_TUNE_IUSB2] = REG_FIELD(EUSB2_TUNE_IUSB2, 0, 3), + [F_TUNE_USB2_CROSSOVER] = REG_FIELD(EUSB2_TUNE_USB2_CROSSOVER, 0, 2), [F_FORCE_VAL_5] = REG_FIELD(EUSB2_FORCE_VAL_5, 0, 7), [F_FORCE_EN_5] = REG_FIELD(EUSB2_FORCE_EN_5, 0, 7), @@ -62,13 +89,8 @@ static struct reg_field eusb2_repeater_tune_reg_fields[F_NUM_FIELDS] = { [F_RPTR_STATUS] = REG_FIELD(EUSB2_RPTR_STATUS, 0, 7), }; -struct eusb2_repeater_init_tbl { - unsigned int reg; - unsigned int val; -}; - struct eusb2_repeater_cfg { - const struct eusb2_repeater_init_tbl *init_tbl; + const u32 *init_tbl; int init_tbl_num; const char * const *vreg_list; int num_vregs; @@ -87,10 +109,10 @@ static const char * const pm8550b_vreg_l[] = { "vdd18", "vdd3", }; -static const struct eusb2_repeater_init_tbl pm8550b_init_tbl[] = { - QCOM_EUSB2_REPEATER_INIT_CFG(F_TUNE_IUSB2, 0x8), - QCOM_EUSB2_REPEATER_INIT_CFG(F_TUNE_SQUELCH_U, 0x3), - QCOM_EUSB2_REPEATER_INIT_CFG(F_TUNE_USB2_PREEM, 0x5), +static const u32 pm8550b_init_tbl[F_NUM_TUNE_FIELDS] = { + [F_TUNE_IUSB2] = 0x8, + [F_TUNE_SQUELCH_U] = 0x3, + [F_TUNE_USB2_PREEM] = 0x5, }; static const struct eusb2_repeater_cfg pm8550b_eusb2_cfg = { @@ -118,8 +140,9 @@ static int eusb2_repeater_init_vregs(struct eusb2_repeater *rptr) static int eusb2_repeater_init(struct phy *phy) { + struct reg_field *regfields = eusb2_repeater_tune_reg_fields; struct eusb2_repeater *rptr = phy_get_drvdata(phy); - const struct eusb2_repeater_init_tbl *init_tbl = rptr->cfg->init_tbl; + const u32 *init_tbl = rptr->cfg->init_tbl; u32 val; int ret; int i; @@ -130,9 +153,16 @@ static int eusb2_repeater_init(struct phy *phy) regmap_field_update_bits(rptr->regs[F_EN_CTL1], EUSB2_RPTR_EN, EUSB2_RPTR_EN); - for (i = 0; i < rptr->cfg->init_tbl_num; i++) - regmap_field_update_bits(rptr->regs[init_tbl[i].reg], - init_tbl[i].val, init_tbl[i].val); + for (i = 0; i < F_NUM_TUNE_FIELDS; i++) { + if (init_tbl[i]) { + regmap_field_update_bits(rptr->regs[i], init_tbl[i], init_tbl[i]); + } else { + /* Write 0 if there's no value set */ + u32 mask = GENMASK(regfields[i].msb, regfields[i].lsb); + + regmap_field_update_bits(rptr->regs[i], mask, 0); + } + } ret = regmap_field_read_poll_timeout(rptr->regs[F_RPTR_STATUS], val, val & RPTR_OK, 10, 5); From a9876332a1bdfe6c606cab957bd901b3bce57899 Mon Sep 17 00:00:00 2001 From: Stanley Chang Date: Tue, 12 Sep 2023 12:19:02 +0800 Subject: [PATCH 0122/1397] usb: dwc3: core: configure TX/RX threshold for DWC3_IP [ Upstream commit e72fc8d6a12af7ae8dd1b52cf68ed68569d29f80 ] In Synopsys's dwc3 data book: To avoid underrun and overrun during the burst, in a high-latency bus system (like USB), threshold and burst size control is provided through GTXTHRCFG and GRXTHRCFG registers. In Realtek DHC SoC, DWC3 USB 3.0 uses AHB system bus. When dwc3 is connected with USB 2.5G Ethernet, there will be overrun problem. Therefore, setting TX/RX thresholds can avoid this issue. Signed-off-by: Stanley Chang Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20230912041904.30721-1-stanley_chang@realtek.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/dwc3/core.c | 160 +++++++++++++++++++++++++++++++--------- drivers/usb/dwc3/core.h | 13 ++++ 2 files changed, 137 insertions(+), 36 deletions(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 343d2570189f..d25490965b27 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1094,6 +1094,111 @@ static void dwc3_set_power_down_clk_scale(struct dwc3 *dwc) } } +static void dwc3_config_threshold(struct dwc3 *dwc) +{ + u32 reg; + u8 rx_thr_num; + u8 rx_maxburst; + u8 tx_thr_num; + u8 tx_maxburst; + + /* + * Must config both number of packets and max burst settings to enable + * RX and/or TX threshold. + */ + if (!DWC3_IP_IS(DWC3) && dwc->dr_mode == USB_DR_MODE_HOST) { + rx_thr_num = dwc->rx_thr_num_pkt_prd; + rx_maxburst = dwc->rx_max_burst_prd; + tx_thr_num = dwc->tx_thr_num_pkt_prd; + tx_maxburst = dwc->tx_max_burst_prd; + + if (rx_thr_num && rx_maxburst) { + reg = dwc3_readl(dwc->regs, DWC3_GRXTHRCFG); + reg |= DWC31_RXTHRNUMPKTSEL_PRD; + + reg &= ~DWC31_RXTHRNUMPKT_PRD(~0); + reg |= DWC31_RXTHRNUMPKT_PRD(rx_thr_num); + + reg &= ~DWC31_MAXRXBURSTSIZE_PRD(~0); + reg |= DWC31_MAXRXBURSTSIZE_PRD(rx_maxburst); + + dwc3_writel(dwc->regs, DWC3_GRXTHRCFG, reg); + } + + if (tx_thr_num && tx_maxburst) { + reg = dwc3_readl(dwc->regs, DWC3_GTXTHRCFG); + reg |= DWC31_TXTHRNUMPKTSEL_PRD; + + reg &= ~DWC31_TXTHRNUMPKT_PRD(~0); + reg |= DWC31_TXTHRNUMPKT_PRD(tx_thr_num); + + reg &= ~DWC31_MAXTXBURSTSIZE_PRD(~0); + reg |= DWC31_MAXTXBURSTSIZE_PRD(tx_maxburst); + + dwc3_writel(dwc->regs, DWC3_GTXTHRCFG, reg); + } + } + + rx_thr_num = dwc->rx_thr_num_pkt; + rx_maxburst = dwc->rx_max_burst; + tx_thr_num = dwc->tx_thr_num_pkt; + tx_maxburst = dwc->tx_max_burst; + + if (DWC3_IP_IS(DWC3)) { + if (rx_thr_num && rx_maxburst) { + reg = dwc3_readl(dwc->regs, DWC3_GRXTHRCFG); + reg |= DWC3_GRXTHRCFG_PKTCNTSEL; + + reg &= ~DWC3_GRXTHRCFG_RXPKTCNT(~0); + reg |= DWC3_GRXTHRCFG_RXPKTCNT(rx_thr_num); + + reg &= ~DWC3_GRXTHRCFG_MAXRXBURSTSIZE(~0); + reg |= DWC3_GRXTHRCFG_MAXRXBURSTSIZE(rx_maxburst); + + dwc3_writel(dwc->regs, DWC3_GRXTHRCFG, reg); + } + + if (tx_thr_num && tx_maxburst) { + reg = dwc3_readl(dwc->regs, DWC3_GTXTHRCFG); + reg |= DWC3_GTXTHRCFG_PKTCNTSEL; + + reg &= ~DWC3_GTXTHRCFG_TXPKTCNT(~0); + reg |= DWC3_GTXTHRCFG_TXPKTCNT(tx_thr_num); + + reg &= ~DWC3_GTXTHRCFG_MAXTXBURSTSIZE(~0); + reg |= DWC3_GTXTHRCFG_MAXTXBURSTSIZE(tx_maxburst); + + dwc3_writel(dwc->regs, DWC3_GTXTHRCFG, reg); + } + } else { + if (rx_thr_num && rx_maxburst) { + reg = dwc3_readl(dwc->regs, DWC3_GRXTHRCFG); + reg |= DWC31_GRXTHRCFG_PKTCNTSEL; + + reg &= ~DWC31_GRXTHRCFG_RXPKTCNT(~0); + reg |= DWC31_GRXTHRCFG_RXPKTCNT(rx_thr_num); + + reg &= ~DWC31_GRXTHRCFG_MAXRXBURSTSIZE(~0); + reg |= DWC31_GRXTHRCFG_MAXRXBURSTSIZE(rx_maxburst); + + dwc3_writel(dwc->regs, DWC3_GRXTHRCFG, reg); + } + + if (tx_thr_num && tx_maxburst) { + reg = dwc3_readl(dwc->regs, DWC3_GTXTHRCFG); + reg |= DWC31_GTXTHRCFG_PKTCNTSEL; + + reg &= ~DWC31_GTXTHRCFG_TXPKTCNT(~0); + reg |= DWC31_GTXTHRCFG_TXPKTCNT(tx_thr_num); + + reg &= ~DWC31_GTXTHRCFG_MAXTXBURSTSIZE(~0); + reg |= DWC31_GTXTHRCFG_MAXTXBURSTSIZE(tx_maxburst); + + dwc3_writel(dwc->regs, DWC3_GTXTHRCFG, reg); + } + } +} + /** * dwc3_core_init - Low-level initialization of DWC3 Core * @dwc: Pointer to our controller context structure @@ -1246,42 +1351,7 @@ static int dwc3_core_init(struct dwc3 *dwc) dwc3_writel(dwc->regs, DWC3_GUCTL1, reg); } - /* - * Must config both number of packets and max burst settings to enable - * RX and/or TX threshold. - */ - if (!DWC3_IP_IS(DWC3) && dwc->dr_mode == USB_DR_MODE_HOST) { - u8 rx_thr_num = dwc->rx_thr_num_pkt_prd; - u8 rx_maxburst = dwc->rx_max_burst_prd; - u8 tx_thr_num = dwc->tx_thr_num_pkt_prd; - u8 tx_maxburst = dwc->tx_max_burst_prd; - - if (rx_thr_num && rx_maxburst) { - reg = dwc3_readl(dwc->regs, DWC3_GRXTHRCFG); - reg |= DWC31_RXTHRNUMPKTSEL_PRD; - - reg &= ~DWC31_RXTHRNUMPKT_PRD(~0); - reg |= DWC31_RXTHRNUMPKT_PRD(rx_thr_num); - - reg &= ~DWC31_MAXRXBURSTSIZE_PRD(~0); - reg |= DWC31_MAXRXBURSTSIZE_PRD(rx_maxburst); - - dwc3_writel(dwc->regs, DWC3_GRXTHRCFG, reg); - } - - if (tx_thr_num && tx_maxburst) { - reg = dwc3_readl(dwc->regs, DWC3_GTXTHRCFG); - reg |= DWC31_TXTHRNUMPKTSEL_PRD; - - reg &= ~DWC31_TXTHRNUMPKT_PRD(~0); - reg |= DWC31_TXTHRNUMPKT_PRD(tx_thr_num); - - reg &= ~DWC31_MAXTXBURSTSIZE_PRD(~0); - reg |= DWC31_MAXTXBURSTSIZE_PRD(tx_maxburst); - - dwc3_writel(dwc->regs, DWC3_GTXTHRCFG, reg); - } - } + dwc3_config_threshold(dwc); return 0; @@ -1417,6 +1487,10 @@ static void dwc3_get_properties(struct dwc3 *dwc) u8 lpm_nyet_threshold; u8 tx_de_emphasis; u8 hird_threshold; + u8 rx_thr_num_pkt = 0; + u8 rx_max_burst = 0; + u8 tx_thr_num_pkt = 0; + u8 tx_max_burst = 0; u8 rx_thr_num_pkt_prd = 0; u8 rx_max_burst_prd = 0; u8 tx_thr_num_pkt_prd = 0; @@ -1479,6 +1553,14 @@ static void dwc3_get_properties(struct dwc3 *dwc) "snps,usb2-lpm-disable"); dwc->usb2_gadget_lpm_disable = device_property_read_bool(dev, "snps,usb2-gadget-lpm-disable"); + device_property_read_u8(dev, "snps,rx-thr-num-pkt", + &rx_thr_num_pkt); + device_property_read_u8(dev, "snps,rx-max-burst", + &rx_max_burst); + device_property_read_u8(dev, "snps,tx-thr-num-pkt", + &tx_thr_num_pkt); + device_property_read_u8(dev, "snps,tx-max-burst", + &tx_max_burst); device_property_read_u8(dev, "snps,rx-thr-num-pkt-prd", &rx_thr_num_pkt_prd); device_property_read_u8(dev, "snps,rx-max-burst-prd", @@ -1560,6 +1642,12 @@ static void dwc3_get_properties(struct dwc3 *dwc) dwc->hird_threshold = hird_threshold; + dwc->rx_thr_num_pkt = rx_thr_num_pkt; + dwc->rx_max_burst = rx_max_burst; + + dwc->tx_thr_num_pkt = tx_thr_num_pkt; + dwc->tx_max_burst = tx_max_burst; + dwc->rx_thr_num_pkt_prd = rx_thr_num_pkt_prd; dwc->rx_max_burst_prd = rx_max_burst_prd; diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index a69ac67d89fe..6782ec8bfd64 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -211,6 +211,11 @@ #define DWC3_GRXTHRCFG_RXPKTCNT(n) (((n) & 0xf) << 24) #define DWC3_GRXTHRCFG_PKTCNTSEL BIT(29) +/* Global TX Threshold Configuration Register */ +#define DWC3_GTXTHRCFG_MAXTXBURSTSIZE(n) (((n) & 0xff) << 16) +#define DWC3_GTXTHRCFG_TXPKTCNT(n) (((n) & 0xf) << 24) +#define DWC3_GTXTHRCFG_PKTCNTSEL BIT(29) + /* Global RX Threshold Configuration Register for DWC_usb31 only */ #define DWC31_GRXTHRCFG_MAXRXBURSTSIZE(n) (((n) & 0x1f) << 16) #define DWC31_GRXTHRCFG_RXPKTCNT(n) (((n) & 0x1f) << 21) @@ -1045,6 +1050,10 @@ struct dwc3_scratchpad_array { * @test_mode_nr: test feature selector * @lpm_nyet_threshold: LPM NYET response threshold * @hird_threshold: HIRD threshold + * @rx_thr_num_pkt: USB receive packet count + * @rx_max_burst: max USB receive burst size + * @tx_thr_num_pkt: USB transmit packet count + * @tx_max_burst: max USB transmit burst size * @rx_thr_num_pkt_prd: periodic ESS receive packet count * @rx_max_burst_prd: max periodic ESS receive burst size * @tx_thr_num_pkt_prd: periodic ESS transmit packet count @@ -1273,6 +1282,10 @@ struct dwc3 { u8 test_mode_nr; u8 lpm_nyet_threshold; u8 hird_threshold; + u8 rx_thr_num_pkt; + u8 rx_max_burst; + u8 tx_thr_num_pkt; + u8 tx_max_burst; u8 rx_thr_num_pkt_prd; u8 rx_max_burst_prd; u8 tx_thr_num_pkt_prd; From 13db7f133c615d46d0fc29f02568e881c7eae568 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Mon, 2 Oct 2023 12:20:22 +0200 Subject: [PATCH 0123/1397] usb: ucsi: glink: use the connector orientation GPIO to provide switch events [ Upstream commit c6165ed2f425c273244191930a47c8be23bc51bd ] On SM8550, the non-altmode orientation is not given anymore within altmode events, even with USB SVIDs events. On the other side, the Type-C connector orientation is correctly reported by a signal from the PMIC. Take this gpio signal when we detect some Type-C port activity to notify any Type-C switches tied to the Type-C port connectors. Acked-by: Heikki Krogerus Signed-off-by: Neil Armstrong Reviewed-by: Dmitry Baryshkov Acked-by: Konrad Dybcio Link: https://lore.kernel.org/r/20231002-topic-sm8550-upstream-type-c-orientation-v2-2-125410d3ff95@linaro.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/typec/ucsi/ucsi_glink.c | 54 ++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/ucsi/ucsi_glink.c b/drivers/usb/typec/ucsi/ucsi_glink.c index bb1854b3311d..db6e248f8208 100644 --- a/drivers/usb/typec/ucsi/ucsi_glink.c +++ b/drivers/usb/typec/ucsi/ucsi_glink.c @@ -8,9 +8,13 @@ #include #include #include +#include +#include #include #include "ucsi.h" +#define PMIC_GLINK_MAX_PORTS 2 + #define UCSI_BUF_SIZE 48 #define MSG_TYPE_REQ_RESP 1 @@ -52,6 +56,9 @@ struct ucsi_notify_ind_msg { struct pmic_glink_ucsi { struct device *dev; + struct gpio_desc *port_orientation[PMIC_GLINK_MAX_PORTS]; + struct typec_switch *port_switch[PMIC_GLINK_MAX_PORTS]; + struct pmic_glink_client *client; struct ucsi *ucsi; @@ -220,8 +227,20 @@ static void pmic_glink_ucsi_notify(struct work_struct *work) } con_num = UCSI_CCI_CONNECTOR(cci); - if (con_num) + if (con_num) { + if (con_num < PMIC_GLINK_MAX_PORTS && + ucsi->port_orientation[con_num - 1]) { + int orientation = gpiod_get_value(ucsi->port_orientation[con_num - 1]); + + if (orientation >= 0) { + typec_switch_set(ucsi->port_switch[con_num - 1], + orientation ? TYPEC_ORIENTATION_REVERSE + : TYPEC_ORIENTATION_NORMAL); + } + } + ucsi_connector_change(ucsi->ucsi, con_num); + } if (ucsi->sync_pending && cci & UCSI_CCI_BUSY) { ucsi->sync_val = -EBUSY; @@ -282,6 +301,7 @@ static int pmic_glink_ucsi_probe(struct auxiliary_device *adev, { struct pmic_glink_ucsi *ucsi; struct device *dev = &adev->dev; + struct fwnode_handle *fwnode; int ret; ucsi = devm_kzalloc(dev, sizeof(*ucsi), GFP_KERNEL); @@ -309,6 +329,38 @@ static int pmic_glink_ucsi_probe(struct auxiliary_device *adev, ucsi_set_drvdata(ucsi->ucsi, ucsi); + device_for_each_child_node(dev, fwnode) { + struct gpio_desc *desc; + u32 port; + + ret = fwnode_property_read_u32(fwnode, "reg", &port); + if (ret < 0) { + dev_err(dev, "missing reg property of %pOFn\n", fwnode); + return ret; + } + + if (port >= PMIC_GLINK_MAX_PORTS) { + dev_warn(dev, "invalid connector number, ignoring\n"); + continue; + } + + desc = devm_gpiod_get_index_optional(&adev->dev, "orientation", port, GPIOD_IN); + + /* If GPIO isn't found, continue */ + if (!desc) + continue; + + if (IS_ERR(desc)) + return dev_err_probe(dev, PTR_ERR(desc), + "unable to acquire orientation gpio\n"); + ucsi->port_orientation[port] = desc; + + ucsi->port_switch[port] = fwnode_typec_switch_get(fwnode); + if (IS_ERR(ucsi->port_switch[port])) + return dev_err_probe(dev, PTR_ERR(ucsi->port_switch[port]), + "failed to acquire orientation-switch\n"); + } + ucsi->client = devm_pmic_glink_register_client(dev, PMIC_GLINK_OWNER_USBC, pmic_glink_ucsi_callback, From e69ec2352087d9b6f5bcf31dbd4c14028b74671a Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 13 Oct 2023 09:08:33 +0800 Subject: [PATCH 0124/1397] soundwire: dmi-quirks: update HP Omen match [ Upstream commit 4ea2b6d3128ea4d502c4015df0dc16b7d1070954 ] New platforms have a slightly different DMI product name, remove trailing characters/digits to handle all cases Closes: https://github.com/thesofproject/linux/issues/4611 Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20231013010833.114271-1-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/soundwire/dmi-quirks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soundwire/dmi-quirks.c b/drivers/soundwire/dmi-quirks.c index 2a1096dab63d..9ebdd0cd0b1c 100644 --- a/drivers/soundwire/dmi-quirks.c +++ b/drivers/soundwire/dmi-quirks.c @@ -141,7 +141,7 @@ static const struct dmi_system_id adr_remap_quirk_table[] = { { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "HP"), - DMI_MATCH(DMI_PRODUCT_NAME, "OMEN by HP Gaming Laptop 16-k0xxx"), + DMI_MATCH(DMI_PRODUCT_NAME, "OMEN by HP Gaming Laptop 16"), }, .driver_data = (void *)hp_omen_16, }, From 0237975d8e3ec10c07b8169e22a6cea35afd8475 Mon Sep 17 00:00:00 2001 From: Zhiguo Niu Date: Mon, 16 Oct 2023 19:27:31 +0800 Subject: [PATCH 0125/1397] f2fs: fix error path of __f2fs_build_free_nids [ Upstream commit a5e80e18f268ea7c7a36bc4159de0deb3b5a2171 ] If NAT is corrupted, let scan_nat_page() return EFSCORRUPTED, so that, caller can set SBI_NEED_FSCK flag into checkpoint for later repair by fsck. Also, this patch introduces a new fscorrupted error flag, and in above scenario, it will persist the error flag into superblock synchronously to avoid it has no luck to trigger a checkpoint to record SBI_NEED_FSCK Signed-off-by: Zhiguo Niu Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/node.c | 11 +++++++++-- include/linux/f2fs_fs.h | 1 + 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index ee2e1dd64f25..248764badcde 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2389,7 +2389,7 @@ static int scan_nat_page(struct f2fs_sb_info *sbi, blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); if (blk_addr == NEW_ADDR) - return -EINVAL; + return -EFSCORRUPTED; if (blk_addr == NULL_ADDR) { add_free_nid(sbi, start_nid, true, true); @@ -2504,7 +2504,14 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi, if (ret) { f2fs_up_read(&nm_i->nat_tree_lock); - f2fs_err(sbi, "NAT is corrupt, run fsck to fix it"); + + if (ret == -EFSCORRUPTED) { + f2fs_err(sbi, "NAT is corrupt, run fsck to fix it"); + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_handle_error(sbi, + ERROR_INCONSISTENT_NAT); + } + return ret; } } diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index a82a4bb6ce68..cf1adceb0269 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -104,6 +104,7 @@ enum f2fs_error { ERROR_CORRUPTED_VERITY_XATTR, ERROR_CORRUPTED_XATTR, ERROR_INVALID_NODE_REFERENCE, + ERROR_INCONSISTENT_NAT, ERROR_MAX, }; From b7b01c7804775cb3586fed3c71bc8c38fcc77000 Mon Sep 17 00:00:00 2001 From: Zhiguo Niu Date: Wed, 18 Oct 2023 14:51:02 +0800 Subject: [PATCH 0126/1397] f2fs: fix error handling of __get_node_page [ Upstream commit 9b4c8dd99fe48721410741651d426015e03a4b7a ] Use f2fs_handle_error to record inconsistent node block error and return -EFSCORRUPTED instead of -EINVAL. Signed-off-by: Zhiguo Niu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/node.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 248764badcde..ed963c56ac32 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1467,7 +1467,8 @@ static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid, ofs_of_node(page), cpver_of_node(page), next_blkaddr_of_node(page)); set_sbi_flag(sbi, SBI_NEED_FSCK); - err = -EINVAL; + f2fs_handle_error(sbi, ERROR_INCONSISTENT_FOOTER); + err = -EFSCORRUPTED; out_err: ClearPageUptodate(page); out_put_err: From f83810e0ecda73bdad2c94d2db60f58c0a158e7a Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Thu, 19 Oct 2023 13:29:24 +0300 Subject: [PATCH 0127/1397] usb: host: xhci: Avoid XHCI resume delay if SSUSB device is not present [ Upstream commit 6add6dd345cb754ce18ff992c7264cabf31e59f6 ] There is a 120ms delay implemented for allowing the XHCI host controller to detect a U3 wakeup pulse. The intention is to wait for the device to retry the wakeup event if the USB3 PORTSC doesn't reflect the RESUME link status by the time it is checked. As per the USB3 specification: tU3WakeupRetryDelay ("Table 7-12. LTSSM State Transition Timeouts") This would allow the XHCI resume sequence to determine if the root hub needs to be also resumed. However, in case there is no device connected, or if there is only a HSUSB device connected, this delay would still affect the overall resume timing. Since this delay is solely for detecting U3 wake events (USB3 specific) then ignore this delay for the disconnected case and the HSUSB connected only case. [skip helper function, rename usb3_connected variable -Mathias ] Signed-off-by: Wesley Cheng Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20231019102924.2797346-20-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/xhci.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index e1b1b64a0723..132b76fa7ca6 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -968,6 +968,7 @@ int xhci_resume(struct xhci_hcd *xhci, pm_message_t msg) int retval = 0; bool comp_timer_running = false; bool pending_portevent = false; + bool suspended_usb3_devs = false; bool reinit_xhc = false; if (!hcd->state) @@ -1115,10 +1116,17 @@ int xhci_resume(struct xhci_hcd *xhci, pm_message_t msg) /* * Resume roothubs only if there are pending events. * USB 3 devices resend U3 LFPS wake after a 100ms delay if - * the first wake signalling failed, give it that chance. + * the first wake signalling failed, give it that chance if + * there are suspended USB 3 devices. */ + if (xhci->usb3_rhub.bus_state.suspended_ports || + xhci->usb3_rhub.bus_state.bus_suspended) + suspended_usb3_devs = true; + pending_portevent = xhci_pending_portevent(xhci); - if (!pending_portevent && msg.event == PM_EVENT_AUTO_RESUME) { + + if (suspended_usb3_devs && !pending_portevent && + msg.event == PM_EVENT_AUTO_RESUME) { msleep(120); pending_portevent = xhci_pending_portevent(xhci); } From d2500cd63c887ebe10b688a735b8370227596e82 Mon Sep 17 00:00:00 2001 From: Hardik Gajjar Date: Fri, 20 Oct 2023 17:33:24 +0200 Subject: [PATCH 0128/1397] usb: gadget: f_ncm: Always set current gadget in ncm_bind() [ Upstream commit a04224da1f3424b2c607b12a3bd1f0e302fb8231 ] Previously, gadget assignment to the net device occurred exclusively during the initial binding attempt. Nevertheless, the gadget pointer could change during bind/unbind cycles due to various conditions, including the unloading/loading of the UDC device driver or the detachment/reconnection of an OTG-capable USB hub device. This patch relocates the gether_set_gadget() function out from ncm_opts->bound condition check, ensuring that the correct gadget is assigned during each bind request. The provided logs demonstrate the consistency of ncm_opts throughout the power cycle, while the gadget may change. * OTG hub connected during boot up and assignment of gadget and ncm_opts pointer [ 2.366301] usb 2-1.5: New USB device found, idVendor=2996, idProduct=0105 [ 2.366304] usb 2-1.5: New USB device strings: Mfr=1, Product=2, SerialNumber=3 [ 2.366306] usb 2-1.5: Product: H2H Bridge [ 2.366308] usb 2-1.5: Manufacturer: Aptiv [ 2.366309] usb 2-1.5: SerialNumber: 13FEB2021 [ 2.427989] usb 2-1.5: New USB device found, VID=2996, PID=0105 [ 2.428959] dabridge 2-1.5:1.0: dabridge 2-4 total endpoints=5, 0000000093a8d681 [ 2.429710] dabridge 2-1.5:1.0: P(0105) D(22.06.22) F(17.3.16) H(1.1) high-speed [ 2.429714] dabridge 2-1.5:1.0: Hub 2-2 P(0151) V(06.87) [ 2.429956] dabridge 2-1.5:1.0: All downstream ports in host mode [ 2.430093] gadget 000000003c414d59 ------> gadget pointer * NCM opts and associated gadget pointer during First ncm_bind [ 34.763929] NCM opts 00000000aa304ac9 [ 34.763930] NCM gadget 000000003c414d59 * OTG capable hub disconnecte or assume driver unload. [ 97.203114] usb 2-1: USB disconnect, device number 2 [ 97.203118] usb 2-1.1: USB disconnect, device number 3 [ 97.209217] usb 2-1.5: USB disconnect, device number 4 [ 97.230990] dabr_udc deleted * Reconnect the OTG hub or load driver assaign new gadget pointer. [ 111.534035] usb 2-1.1: New USB device found, idVendor=2996, idProduct=0120, bcdDevice= 6.87 [ 111.534038] usb 2-1.1: New USB device strings: Mfr=1, Product=2, SerialNumber=3 [ 111.534040] usb 2-1.1: Product: Vendor [ 111.534041] usb 2-1.1: Manufacturer: Aptiv [ 111.534042] usb 2-1.1: SerialNumber: Superior [ 111.535175] usb 2-1.1: New USB device found, VID=2996, PID=0120 [ 111.610995] usb 2-1.5: new high-speed USB device number 8 using xhci-hcd [ 111.630052] usb 2-1.5: New USB device found, idVendor=2996, idProduct=0105, bcdDevice=21.02 [ 111.630055] usb 2-1.5: New USB device strings: Mfr=1, Product=2, SerialNumber=3 [ 111.630057] usb 2-1.5: Product: H2H Bridge [ 111.630058] usb 2-1.5: Manufacturer: Aptiv [ 111.630059] usb 2-1.5: SerialNumber: 13FEB2021 [ 111.687464] usb 2-1.5: New USB device found, VID=2996, PID=0105 [ 111.690375] dabridge 2-1.5:1.0: dabridge 2-8 total endpoints=5, 000000000d87c961 [ 111.691172] dabridge 2-1.5:1.0: P(0105) D(22.06.22) F(17.3.16) H(1.1) high-speed [ 111.691176] dabridge 2-1.5:1.0: Hub 2-6 P(0151) V(06.87) [ 111.691646] dabridge 2-1.5:1.0: All downstream ports in host mode [ 111.692298] gadget 00000000dc72f7a9 --------> new gadget ptr on connect * NCM opts and associated gadget pointer during second ncm_bind [ 113.271786] NCM opts 00000000aa304ac9 -----> same opts ptr used during first bind [ 113.271788] NCM gadget 00000000dc72f7a9 ----> however new gaget ptr, that will not set in net_device due to ncm_opts->bound = true Signed-off-by: Hardik Gajjar Link: https://lore.kernel.org/r/20231020153324.82794-1-hgajjar@de.adit-jv.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/gadget/function/f_ncm.c | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c index e6ab8cc225ff..cc0ed29a4adc 100644 --- a/drivers/usb/gadget/function/f_ncm.c +++ b/drivers/usb/gadget/function/f_ncm.c @@ -1410,7 +1410,7 @@ static int ncm_bind(struct usb_configuration *c, struct usb_function *f) struct usb_composite_dev *cdev = c->cdev; struct f_ncm *ncm = func_to_ncm(f); struct usb_string *us; - int status; + int status = 0; struct usb_ep *ep; struct f_ncm_opts *ncm_opts; @@ -1428,22 +1428,17 @@ static int ncm_bind(struct usb_configuration *c, struct usb_function *f) f->os_desc_table[0].os_desc = &ncm_opts->ncm_os_desc; } - /* - * in drivers/usb/gadget/configfs.c:configfs_composite_bind() - * configurations are bound in sequence with list_for_each_entry, - * in each configuration its functions are bound in sequence - * with list_for_each_entry, so we assume no race condition - * with regard to ncm_opts->bound access - */ - if (!ncm_opts->bound) { - mutex_lock(&ncm_opts->lock); - gether_set_gadget(ncm_opts->net, cdev->gadget); + mutex_lock(&ncm_opts->lock); + gether_set_gadget(ncm_opts->net, cdev->gadget); + if (!ncm_opts->bound) status = gether_register_netdev(ncm_opts->net); - mutex_unlock(&ncm_opts->lock); - if (status) - goto fail; - ncm_opts->bound = true; - } + mutex_unlock(&ncm_opts->lock); + + if (status) + goto fail; + + ncm_opts->bound = true; + us = usb_gstrings_attach(cdev, ncm_strings, ARRAY_SIZE(ncm_string_defs)); if (IS_ERR(us)) { From c575076ba448755c7acba688d8a6cd5c2d6a772c Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Wed, 25 Oct 2023 19:34:43 +0900 Subject: [PATCH 0129/1397] 9p/trans_fd: Annotate data-racy writes to file::f_flags [ Upstream commit 355f074609dbf3042900ea9d30fcd2b0c323a365 ] syzbot reported: | BUG: KCSAN: data-race in p9_fd_create / p9_fd_create | | read-write to 0xffff888130fb3d48 of 4 bytes by task 15599 on cpu 0: | p9_fd_open net/9p/trans_fd.c:842 [inline] | p9_fd_create+0x210/0x250 net/9p/trans_fd.c:1092 | p9_client_create+0x595/0xa70 net/9p/client.c:1010 | v9fs_session_init+0xf9/0xd90 fs/9p/v9fs.c:410 | v9fs_mount+0x69/0x630 fs/9p/vfs_super.c:123 | legacy_get_tree+0x74/0xd0 fs/fs_context.c:611 | vfs_get_tree+0x51/0x190 fs/super.c:1519 | do_new_mount+0x203/0x660 fs/namespace.c:3335 | path_mount+0x496/0xb30 fs/namespace.c:3662 | do_mount fs/namespace.c:3675 [inline] | __do_sys_mount fs/namespace.c:3884 [inline] | [...] | | read-write to 0xffff888130fb3d48 of 4 bytes by task 15563 on cpu 1: | p9_fd_open net/9p/trans_fd.c:842 [inline] | p9_fd_create+0x210/0x250 net/9p/trans_fd.c:1092 | p9_client_create+0x595/0xa70 net/9p/client.c:1010 | v9fs_session_init+0xf9/0xd90 fs/9p/v9fs.c:410 | v9fs_mount+0x69/0x630 fs/9p/vfs_super.c:123 | legacy_get_tree+0x74/0xd0 fs/fs_context.c:611 | vfs_get_tree+0x51/0x190 fs/super.c:1519 | do_new_mount+0x203/0x660 fs/namespace.c:3335 | path_mount+0x496/0xb30 fs/namespace.c:3662 | do_mount fs/namespace.c:3675 [inline] | __do_sys_mount fs/namespace.c:3884 [inline] | [...] | | value changed: 0x00008002 -> 0x00008802 Within p9_fd_open(), O_NONBLOCK is added to f_flags of the read and write files. This may happen concurrently if e.g. mounting process modifies the fd in another thread. Mark the plain read-modify-writes as intentional data-races, with the assumption that the result of executing the accesses concurrently will always result in the same result despite the accesses themselves not being atomic. Reported-by: syzbot+e441aeeb422763cc5511@syzkaller.appspotmail.com Signed-off-by: Marco Elver Link: https://lore.kernel.org/r/ZO38mqkS0TYUlpFp@elver.google.com Signed-off-by: Dominique Martinet Message-ID: <20231025103445.1248103-1-asmadeus@codewreck.org> Signed-off-by: Sasha Levin --- net/9p/trans_fd.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index c4015f30f9fa..d0eb03ada704 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -832,14 +832,21 @@ static int p9_fd_open(struct p9_client *client, int rfd, int wfd) goto out_free_ts; if (!(ts->rd->f_mode & FMODE_READ)) goto out_put_rd; - /* prevent workers from hanging on IO when fd is a pipe */ - ts->rd->f_flags |= O_NONBLOCK; + /* Prevent workers from hanging on IO when fd is a pipe. + * It's technically possible for userspace or concurrent mounts to + * modify this flag concurrently, which will likely result in a + * broken filesystem. However, just having bad flags here should + * not crash the kernel or cause any other sort of bug, so mark this + * particular data race as intentional so that tooling (like KCSAN) + * can allow it and detect further problems. + */ + data_race(ts->rd->f_flags |= O_NONBLOCK); ts->wr = fget(wfd); if (!ts->wr) goto out_put_rd; if (!(ts->wr->f_mode & FMODE_WRITE)) goto out_put_wr; - ts->wr->f_flags |= O_NONBLOCK; + data_race(ts->wr->f_flags |= O_NONBLOCK); client->trans = ts; client->status = Connected; From 35663b6b49a30d2303ccef33bdd10cb1fcf4a101 Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Wed, 25 Oct 2023 19:34:44 +0900 Subject: [PATCH 0130/1397] 9p: v9fs_listxattr: fix %s null argument warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9b5c6281838fc84683dd99b47302d81fce399918 ] W=1 warns about null argument to kprintf: In file included from fs/9p/xattr.c:12: In function ‘v9fs_xattr_get’, inlined from ‘v9fs_listxattr’ at fs/9p/xattr.c:142:9: include/net/9p/9p.h:55:2: error: ‘%s’ directive argument is null [-Werror=format-overflow=] 55 | _p9_debug(level, __func__, fmt, ##__VA_ARGS__) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Use an empty string instead of : - this is ok 9p-wise because p9pdu_vwritef serializes a null string and an empty string the same way (one '0' word for length) - since this degrades the print statements, add new single quotes for xattr's name delimter (Old: "file = (null)", new: "file = ''") Link: https://lore.kernel.org/r/20231008060138.517057-1-suhui@nfschina.com Suggested-by: Su Hui Signed-off-by: Dominique Martinet Acked-by: Christian Schoenebeck Message-ID: <20231025103445.1248103-2-asmadeus@codewreck.org> Signed-off-by: Sasha Levin --- fs/9p/xattr.c | 5 +++-- net/9p/client.c | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c index e00cf8109b3f..3c4572ef3a48 100644 --- a/fs/9p/xattr.c +++ b/fs/9p/xattr.c @@ -68,7 +68,7 @@ ssize_t v9fs_xattr_get(struct dentry *dentry, const char *name, struct p9_fid *fid; int ret; - p9_debug(P9_DEBUG_VFS, "name = %s value_len = %zu\n", + p9_debug(P9_DEBUG_VFS, "name = '%s' value_len = %zu\n", name, buffer_size); fid = v9fs_fid_lookup(dentry); if (IS_ERR(fid)) @@ -139,7 +139,8 @@ int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name, ssize_t v9fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) { - return v9fs_xattr_get(dentry, NULL, buffer, buffer_size); + /* Txattrwalk with an empty string lists xattrs instead */ + return v9fs_xattr_get(dentry, "", buffer, buffer_size); } static int v9fs_xattr_handler_get(const struct xattr_handler *handler, diff --git a/net/9p/client.c b/net/9p/client.c index b0e7cb7e1a54..e265a0ca6bdd 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1981,7 +1981,7 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid, goto error; } p9_debug(P9_DEBUG_9P, - ">>> TXATTRWALK file_fid %d, attr_fid %d name %s\n", + ">>> TXATTRWALK file_fid %d, attr_fid %d name '%s'\n", file_fid->fid, attr_fid->fid, attr_name); req = p9_client_rpc(clnt, P9_TXATTRWALK, "dds", From 4c86cb2321bd9c72d3b945ce7f747961beda8e65 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Thu, 21 Sep 2023 08:56:56 +0300 Subject: [PATCH 0131/1397] i3c: mipi-i3c-hci: Fix out of bounds access in hci_dma_irq_handler [ Upstream commit 45a832f989e520095429589d5b01b0c65da9b574 ] Do not loop over ring headers in hci_dma_irq_handler() that are not allocated and enabled in hci_dma_init(). Otherwise out of bounds access will occur from rings->headers[i] access when i >= number of allocated ring headers. Signed-off-by: Jarkko Nikula Link: https://lore.kernel.org/r/20230921055704.1087277-5-jarkko.nikula@linux.intel.com Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin --- drivers/i3c/master/mipi-i3c-hci/dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i3c/master/mipi-i3c-hci/dma.c b/drivers/i3c/master/mipi-i3c-hci/dma.c index 2990ac9eaade..71b5dbe45c45 100644 --- a/drivers/i3c/master/mipi-i3c-hci/dma.c +++ b/drivers/i3c/master/mipi-i3c-hci/dma.c @@ -734,7 +734,7 @@ static bool hci_dma_irq_handler(struct i3c_hci *hci, unsigned int mask) unsigned int i; bool handled = false; - for (i = 0; mask && i < 8; i++) { + for (i = 0; mask && i < rings->total; i++) { struct hci_rh_data *rh; u32 status; From c001527728fa88e4be32958b004201ccb17d5e54 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Mon, 2 Oct 2023 11:28:04 +0300 Subject: [PATCH 0132/1397] i2c: i801: Add support for Intel Birch Stream SoC [ Upstream commit 8c56f9ef25a33e51f09a448d25cf863b61c9658d ] Add SMBus PCI ID on Intel Birch Stream SoC. Signed-off-by: Jarkko Nikula Reviewed-by: Andi Shyti Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- Documentation/i2c/busses/i2c-i801.rst | 1 + drivers/i2c/busses/Kconfig | 1 + drivers/i2c/busses/i2c-i801.c | 3 +++ 3 files changed, 5 insertions(+) diff --git a/Documentation/i2c/busses/i2c-i801.rst b/Documentation/i2c/busses/i2c-i801.rst index e76e68ccf718..10eced6c2e46 100644 --- a/Documentation/i2c/busses/i2c-i801.rst +++ b/Documentation/i2c/busses/i2c-i801.rst @@ -47,6 +47,7 @@ Supported adapters: * Intel Alder Lake (PCH) * Intel Raptor Lake (PCH) * Intel Meteor Lake (SOC and PCH) + * Intel Birch Stream (SOC) Datasheets: Publicly available at the Intel website diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 6644eebedaf3..97d27e01a6ee 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -158,6 +158,7 @@ config I2C_I801 Alder Lake (PCH) Raptor Lake (PCH) Meteor Lake (SOC and PCH) + Birch Stream (SOC) This driver can also be built as a module. If so, the module will be called i2c-i801. diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 1d855258a45d..89631fdf6e2f 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -79,6 +79,7 @@ * Meteor Lake-P (SOC) 0x7e22 32 hard yes yes yes * Meteor Lake SoC-S (SOC) 0xae22 32 hard yes yes yes * Meteor Lake PCH-S (PCH) 0x7f23 32 hard yes yes yes + * Birch Stream (SOC) 0x5796 32 hard yes yes yes * * Features supported by this driver: * Software PEC no @@ -231,6 +232,7 @@ #define PCI_DEVICE_ID_INTEL_JASPER_LAKE_SMBUS 0x4da3 #define PCI_DEVICE_ID_INTEL_ALDER_LAKE_P_SMBUS 0x51a3 #define PCI_DEVICE_ID_INTEL_ALDER_LAKE_M_SMBUS 0x54a3 +#define PCI_DEVICE_ID_INTEL_BIRCH_STREAM_SMBUS 0x5796 #define PCI_DEVICE_ID_INTEL_BROXTON_SMBUS 0x5ad4 #define PCI_DEVICE_ID_INTEL_RAPTOR_LAKE_S_SMBUS 0x7a23 #define PCI_DEVICE_ID_INTEL_ALDER_LAKE_S_SMBUS 0x7aa3 @@ -1044,6 +1046,7 @@ static const struct pci_device_id i801_ids[] = { { PCI_DEVICE_DATA(INTEL, METEOR_LAKE_P_SMBUS, FEATURES_ICH5 | FEATURE_TCO_CNL) }, { PCI_DEVICE_DATA(INTEL, METEOR_LAKE_SOC_S_SMBUS, FEATURES_ICH5 | FEATURE_TCO_CNL) }, { PCI_DEVICE_DATA(INTEL, METEOR_LAKE_PCH_S_SMBUS, FEATURES_ICH5 | FEATURE_TCO_CNL) }, + { PCI_DEVICE_DATA(INTEL, BIRCH_STREAM_SMBUS, FEATURES_ICH5 | FEATURE_TCO_CNL) }, { 0, } }; From bd62916b7a72c09fde12fcd0199906051fb9ed76 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 29 Sep 2023 11:19:52 +0200 Subject: [PATCH 0133/1397] i2c: fix memleak in i2c_new_client_device() [ Upstream commit 6af79f7fe748fe6a3c5c3a63d7f35981a82c2769 ] Yang Yingliang reported a memleak: === I got memory leak as follows when doing fault injection test: unreferenced object 0xffff888014aec078 (size 8): comm "xrun", pid 356, jiffies 4294910619 (age 16.332s) hex dump (first 8 bytes): 31 2d 30 30 31 63 00 00 1-001c.. backtrace: [<00000000eb56c0a9>] __kmalloc_track_caller+0x1a6/0x300 [<000000000b220ea3>] kvasprintf+0xad/0x140 [<00000000b83203e5>] kvasprintf_const+0x62/0x190 [<000000002a5eab37>] kobject_set_name_vargs+0x56/0x140 [<00000000300ac279>] dev_set_name+0xb0/0xe0 [<00000000b66ebd6f>] i2c_new_client_device+0x7e4/0x9a0 If device_register() returns error in i2c_new_client_device(), the name allocated by i2c_dev_set_name() need be freed. As comment of device_register() says, it should use put_device() to give up the reference in the error path. === I think this solution is less intrusive and more robust than he originally proposed solutions, though. Reported-by: Yang Yingliang Closes: http://patchwork.ozlabs.org/project/linux-i2c/patch/20221124085448.3620240-1-yangyingliang@huawei.com/ Signed-off-by: Wolfram Sang Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/i2c-core-base.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 60746652fd52..7f30bcceebae 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -931,8 +931,9 @@ int i2c_dev_irq_from_resources(const struct resource *resources, struct i2c_client * i2c_new_client_device(struct i2c_adapter *adap, struct i2c_board_info const *info) { - struct i2c_client *client; - int status; + struct i2c_client *client; + bool need_put = false; + int status; client = kzalloc(sizeof *client, GFP_KERNEL); if (!client) @@ -970,7 +971,6 @@ i2c_new_client_device(struct i2c_adapter *adap, struct i2c_board_info const *inf client->dev.fwnode = info->fwnode; device_enable_async_suspend(&client->dev); - i2c_dev_set_name(adap, client, info); if (info->swnode) { status = device_add_software_node(&client->dev, info->swnode); @@ -982,6 +982,7 @@ i2c_new_client_device(struct i2c_adapter *adap, struct i2c_board_info const *inf } } + i2c_dev_set_name(adap, client, info); status = device_register(&client->dev); if (status) goto out_remove_swnode; @@ -993,6 +994,7 @@ i2c_new_client_device(struct i2c_adapter *adap, struct i2c_board_info const *inf out_remove_swnode: device_remove_software_node(&client->dev); + need_put = true; out_err_put_of_node: of_node_put(info->of_node); out_err: @@ -1000,7 +1002,10 @@ i2c_new_client_device(struct i2c_adapter *adap, struct i2c_board_info const *inf "Failed to register i2c client %s at 0x%02x (%d)\n", client->name, client->addr, status); out_err_silent: - kfree(client); + if (need_put) + put_device(&client->dev); + else + kfree(client); return ERR_PTR(status); } EXPORT_SYMBOL_GPL(i2c_new_client_device); From a5a583fbb002ae8115f53f8ec364f22c7aa40442 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 13 Apr 2016 08:54:30 +0800 Subject: [PATCH 0134/1397] i2c: sun6i-p2wi: Prevent potential division by zero [ Upstream commit 5ac61d26b8baff5b2e5a9f3dc1ef63297e4b53e7 ] Make sure we don't OOPS in case clock-frequency is set to 0 in a DT. The variable set here is later used as a divisor. Signed-off-by: Axel Lin Acked-by: Boris Brezillon Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-sun6i-p2wi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/i2c/busses/i2c-sun6i-p2wi.c b/drivers/i2c/busses/i2c-sun6i-p2wi.c index fa6020dced59..85e035e7a1d7 100644 --- a/drivers/i2c/busses/i2c-sun6i-p2wi.c +++ b/drivers/i2c/busses/i2c-sun6i-p2wi.c @@ -201,6 +201,11 @@ static int p2wi_probe(struct platform_device *pdev) return -EINVAL; } + if (clk_freq == 0) { + dev_err(dev, "clock-frequency is set to 0 in DT\n"); + return -EINVAL; + } + if (of_get_child_count(np) > 1) { dev_err(dev, "P2WI only supports one slave device\n"); return -EINVAL; From d667fe301dcbcb12d1d6494fc4b8abee2cb75d90 Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Mon, 4 Sep 2023 14:10:45 +0800 Subject: [PATCH 0135/1397] virtio-blk: fix implicit overflow on virtio_max_dma_size [ Upstream commit fafb51a67fb883eb2dde352539df939a251851be ] The following codes have an implicit conversion from size_t to u32: (u32)max_size = (size_t)virtio_max_dma_size(vdev); This may lead overflow, Ex (size_t)4G -> (u32)0. Once virtio_max_dma_size() has a larger size than U32_MAX, use U32_MAX instead. Signed-off-by: zhenwei pi Message-Id: <20230904061045.510460-1-pizhenwei@bytedance.com> Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/block/virtio_blk.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 1fe011676d07..4a4b9bad551e 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -1313,6 +1313,7 @@ static int virtblk_probe(struct virtio_device *vdev) u16 min_io_size; u8 physical_block_exp, alignment_offset; unsigned int queue_depth; + size_t max_dma_size; if (!vdev->config->get) { dev_err(&vdev->dev, "%s failure: config access disabled\n", @@ -1411,7 +1412,8 @@ static int virtblk_probe(struct virtio_device *vdev) /* No real sector limit. */ blk_queue_max_hw_sectors(q, UINT_MAX); - max_size = virtio_max_dma_size(vdev); + max_dma_size = virtio_max_dma_size(vdev); + max_size = max_dma_size > U32_MAX ? U32_MAX : max_dma_size; /* Host can optionally specify maximum segment size and number of * segments. */ From eed74230435c61eeb58abaa275b1820e6a4b7f02 Mon Sep 17 00:00:00 2001 From: Billy Tsai Date: Mon, 23 Oct 2023 16:02:37 +0800 Subject: [PATCH 0136/1397] i3c: master: mipi-i3c-hci: Fix a kernel panic for accessing DAT_data. [ Upstream commit b53e9758a31c683fc8615df930262192ed5f034b ] The `i3c_master_bus_init` function may attach the I2C devices before the I3C bus initialization. In this flow, the DAT `alloc_entry`` will be used before the DAT `init`. Additionally, if the `i3c_master_bus_init` fails, the DAT `cleanup` will execute before the device is detached, which will execue DAT `free_entry` function. The above scenario can cause the driver to use DAT_data when it is NULL. Signed-off-by: Billy Tsai Link: https://lore.kernel.org/r/20231023080237.560936-1-billy_tsai@aspeedtech.com Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin --- drivers/i3c/master/mipi-i3c-hci/dat_v1.c | 29 ++++++++++++++++-------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/drivers/i3c/master/mipi-i3c-hci/dat_v1.c b/drivers/i3c/master/mipi-i3c-hci/dat_v1.c index 97bb49ff5b53..47b9b4d4ed3f 100644 --- a/drivers/i3c/master/mipi-i3c-hci/dat_v1.c +++ b/drivers/i3c/master/mipi-i3c-hci/dat_v1.c @@ -64,15 +64,17 @@ static int hci_dat_v1_init(struct i3c_hci *hci) return -EOPNOTSUPP; } - /* use a bitmap for faster free slot search */ - hci->DAT_data = bitmap_zalloc(hci->DAT_entries, GFP_KERNEL); - if (!hci->DAT_data) - return -ENOMEM; - - /* clear them */ - for (dat_idx = 0; dat_idx < hci->DAT_entries; dat_idx++) { - dat_w0_write(dat_idx, 0); - dat_w1_write(dat_idx, 0); + if (!hci->DAT_data) { + /* use a bitmap for faster free slot search */ + hci->DAT_data = bitmap_zalloc(hci->DAT_entries, GFP_KERNEL); + if (!hci->DAT_data) + return -ENOMEM; + + /* clear them */ + for (dat_idx = 0; dat_idx < hci->DAT_entries; dat_idx++) { + dat_w0_write(dat_idx, 0); + dat_w1_write(dat_idx, 0); + } } return 0; @@ -87,7 +89,13 @@ static void hci_dat_v1_cleanup(struct i3c_hci *hci) static int hci_dat_v1_alloc_entry(struct i3c_hci *hci) { unsigned int dat_idx; + int ret; + if (!hci->DAT_data) { + ret = hci_dat_v1_init(hci); + if (ret) + return ret; + } dat_idx = find_first_zero_bit(hci->DAT_data, hci->DAT_entries); if (dat_idx >= hci->DAT_entries) return -ENOENT; @@ -103,7 +111,8 @@ static void hci_dat_v1_free_entry(struct i3c_hci *hci, unsigned int dat_idx) { dat_w0_write(dat_idx, 0); dat_w1_write(dat_idx, 0); - __clear_bit(dat_idx, hci->DAT_data); + if (hci->DAT_data) + __clear_bit(dat_idx, hci->DAT_data); } static void hci_dat_v1_set_dynamic_addr(struct i3c_hci *hci, From e2d7149b913d14352c82624e723ce1c211ca06d3 Mon Sep 17 00:00:00 2001 From: Rajeshwar R Shinde Date: Wed, 30 Aug 2023 13:14:01 +0530 Subject: [PATCH 0137/1397] media: gspca: cpia1: shift-out-of-bounds in set_flicker [ Upstream commit 099be1822d1f095433f4b08af9cc9d6308ec1953 ] Syzkaller reported the following issue: UBSAN: shift-out-of-bounds in drivers/media/usb/gspca/cpia1.c:1031:27 shift exponent 245 is too large for 32-bit type 'int' When the value of the variable "sd->params.exposure.gain" exceeds the number of bits in an integer, a shift-out-of-bounds error is reported. It is triggered because the variable "currentexp" cannot be left-shifted by more than the number of bits in an integer. In order to avoid invalid range during left-shift, the conditional expression is added. Reported-by: syzbot+e27f3dbdab04e43b9f73@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/20230818164522.12806-1-coolrrsh@gmail.com Link: https://syzkaller.appspot.com/bug?extid=e27f3dbdab04e43b9f73 Signed-off-by: Rajeshwar R Shinde Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/usb/gspca/cpia1.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/media/usb/gspca/cpia1.c b/drivers/media/usb/gspca/cpia1.c index 46ed95483e22..5f5fa851ca64 100644 --- a/drivers/media/usb/gspca/cpia1.c +++ b/drivers/media/usb/gspca/cpia1.c @@ -18,6 +18,7 @@ #include #include +#include #include "gspca.h" @@ -1028,6 +1029,8 @@ static int set_flicker(struct gspca_dev *gspca_dev, int on, int apply) sd->params.exposure.expMode = 2; sd->exposure_status = EXPOSURE_NORMAL; } + if (sd->params.exposure.gain >= BITS_PER_TYPE(currentexp)) + return -EINVAL; currentexp = currentexp << sd->params.exposure.gain; sd->params.exposure.gain = 0; /* round down current exposure to nearest value */ From 92152c4c56cc07b160fc5774da27360ff0d8a0f6 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Sat, 23 Sep 2023 17:20:48 +0200 Subject: [PATCH 0138/1397] media: vivid: avoid integer overflow [ Upstream commit 4567ebf8e8f9546b373e78e3b7d584cc30b62028 ] Fixes these compiler warnings: drivers/media/test-drivers/vivid/vivid-rds-gen.c: In function 'vivid_rds_gen_fill': drivers/media/test-drivers/vivid/vivid-rds-gen.c:147:56: warning: '.' directive output may be truncated writing 1 byte into a region of size between 0 and 3 [-Wformat-truncation=] 147 | snprintf(rds->psname, sizeof(rds->psname), "%6d.%1d", | ^ drivers/media/test-drivers/vivid/vivid-rds-gen.c:147:52: note: directive argument in the range [0, 9] 147 | snprintf(rds->psname, sizeof(rds->psname), "%6d.%1d", | ^~~~~~~~~ drivers/media/test-drivers/vivid/vivid-rds-gen.c:147:9: note: 'snprintf' output between 9 and 12 bytes into a destination of size 9 147 | snprintf(rds->psname, sizeof(rds->psname), "%6d.%1d", | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 148 | freq / 16, ((freq & 0xf) * 10) / 16); | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Hans Verkuil Acked-by: Arnd Bergmann Signed-off-by: Sasha Levin --- drivers/media/test-drivers/vivid/vivid-rds-gen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/test-drivers/vivid/vivid-rds-gen.c b/drivers/media/test-drivers/vivid/vivid-rds-gen.c index b5b104ee64c9..c57771119a34 100644 --- a/drivers/media/test-drivers/vivid/vivid-rds-gen.c +++ b/drivers/media/test-drivers/vivid/vivid-rds-gen.c @@ -145,7 +145,7 @@ void vivid_rds_gen_fill(struct vivid_rds_gen *rds, unsigned freq, rds->ta = alt; rds->ms = true; snprintf(rds->psname, sizeof(rds->psname), "%6d.%1d", - freq / 16, ((freq & 0xf) * 10) / 16); + (freq / 16) % 1000000, (((freq & 0xf) * 10) / 16) % 10); if (alt) strscpy(rds->radiotext, " The Radio Data System can switch between different Radio Texts ", From 6985eb221b80b70445a834e20332fa41806856a5 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Sat, 23 Sep 2023 17:20:49 +0200 Subject: [PATCH 0139/1397] media: ipu-bridge: increase sensor_name size [ Upstream commit 83d0d4cc1423194b580356966107379490edd02e ] Fixes this compiler warning: In file included from include/linux/property.h:14, from include/linux/acpi.h:16, from drivers/media/pci/intel/ipu-bridge.c:4: In function 'ipu_bridge_init_swnode_names', inlined from 'ipu_bridge_create_connection_swnodes' at drivers/media/pci/intel/ipu-bridge.c:445:2, inlined from 'ipu_bridge_connect_sensor' at drivers/media/pci/intel/ipu-bridge.c:656:3: include/linux/fwnode.h:81:49: warning: '%u' directive output may be truncated writing between 1 and 3 bytes into a region of size 2 [-Wformat-truncation=] 81 | #define SWNODE_GRAPH_PORT_NAME_FMT "port@%u" | ^~~~~~~~~ drivers/media/pci/intel/ipu-bridge.c:384:18: note: in expansion of macro 'SWNODE_GRAPH_PORT_NAME_FMT' 384 | SWNODE_GRAPH_PORT_NAME_FMT, sensor->link); | ^~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/fwnode.h: In function 'ipu_bridge_connect_sensor': include/linux/fwnode.h:81:55: note: format string is defined here 81 | #define SWNODE_GRAPH_PORT_NAME_FMT "port@%u" | ^~ In function 'ipu_bridge_init_swnode_names', inlined from 'ipu_bridge_create_connection_swnodes' at drivers/media/pci/intel/ipu-bridge.c:445:2, inlined from 'ipu_bridge_connect_sensor' at drivers/media/pci/intel/ipu-bridge.c:656:3: include/linux/fwnode.h:81:49: note: directive argument in the range [0, 255] 81 | #define SWNODE_GRAPH_PORT_NAME_FMT "port@%u" | ^~~~~~~~~ drivers/media/pci/intel/ipu-bridge.c:384:18: note: in expansion of macro 'SWNODE_GRAPH_PORT_NAME_FMT' 384 | SWNODE_GRAPH_PORT_NAME_FMT, sensor->link); | ^~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/media/pci/intel/ipu-bridge.c:382:9: note: 'snprintf' output between 7 and 9 bytes into a destination of size 7 382 | snprintf(sensor->node_names.remote_port, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 383 | sizeof(sensor->node_names.remote_port), | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 384 | SWNODE_GRAPH_PORT_NAME_FMT, sensor->link); | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Hans Verkuil Acked-by: Arnd Bergmann Signed-off-by: Sasha Levin --- include/media/ipu-bridge.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/media/ipu-bridge.h b/include/media/ipu-bridge.h index bdc654a45521..783bda6d5cc3 100644 --- a/include/media/ipu-bridge.h +++ b/include/media/ipu-bridge.h @@ -108,7 +108,7 @@ struct ipu_node_names { char ivsc_sensor_port[7]; char ivsc_ipu_port[7]; char endpoint[11]; - char remote_port[7]; + char remote_port[9]; char vcm[16]; }; From 119565e566f91ff3588ffcd5812f0c8061586c6b Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 21 Sep 2023 08:46:43 -0500 Subject: [PATCH 0140/1397] gfs2: ignore negated quota changes [ Upstream commit 4c6a08125f2249531ec01783a5f4317d7342add5 ] When lots of quota changes are made, there may be cases in which an inode's quota information is increased and then decreased, such as when blocks are added to a file, then deleted from it. If the timing is right, function do_qc can add pending quota changes to a transaction, then later, another call to do_qc can negate those changes, resulting in a net gain of 0. The quota_change information is recorded in the qc buffer (and qd element of the inode as well). The buffer is added to the transaction by the first call to do_qc, but a subsequent call changes the value from non-zero back to zero. At that point it's too late to remove the buffer_head from the transaction. Later, when the quota sync code is called, the zero-change qd element is discovered and flagged as an assert warning. If the fs is mounted with errors=panic, the kernel will panic. This is usually seen when files are truncated and the quota changes are negated by punch_hole/truncate which uses gfs2_quota_hold and gfs2_quota_unhold rather than block allocations that use gfs2_quota_lock and gfs2_quota_unlock which automatically do quota sync. This patch solves the problem by adding a check to qd_check_sync such that net-zero quota changes already added to the transaction are no longer deemed necessary to be synced, and skipped. In this case references are taken for the qd and the slot from do_qc so those need to be put. The normal sequence of events for a normal non-zero quota change is as follows: gfs2_quota_change do_qc qd_hold slot_hold Later, when the changes are to be synced: gfs2_quota_sync qd_fish qd_check_sync gets qd ref via lockref_get_not_dead do_sync do_qc(QC_SYNC) qd_put lockref_put_or_lock qd_unlock qd_put lockref_put_or_lock In the net-zero change case, we add a check to qd_check_sync so it puts the qd and slot references acquired in gfs2_quota_change and skip the unneeded sync. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher Signed-off-by: Sasha Levin --- fs/gfs2/quota.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 171b2713d2e5..41d0232532a0 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -457,6 +457,17 @@ static int qd_check_sync(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd, (sync_gen && (qd->qd_sync_gen >= *sync_gen))) return 0; + /* + * If qd_change is 0 it means a pending quota change was negated. + * We should not sync it, but we still have a qd reference and slot + * reference taken by gfs2_quota_change -> do_qc that need to be put. + */ + if (!qd->qd_change && test_and_clear_bit(QDF_CHANGE, &qd->qd_flags)) { + slot_put(qd); + qd_put(qd); + return 0; + } + if (!lockref_get_not_dead(&qd->qd_lockref)) return 0; From 7f9b5e974cbb500ea88643b385913ab41cc6560b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 2 Oct 2023 03:33:44 +0100 Subject: [PATCH 0141/1397] gfs2: fix an oops in gfs2_permission [ Upstream commit 0abd1557e21c617bd13fc18f7725fc6363c05913 ] In RCU mode, we might race with gfs2_evict_inode(), which zeroes ->i_gl. Freeing of the object it points to is RCU-delayed, so if we manage to fetch the pointer before it's been replaced with NULL, we are fine. Check if we'd fetched NULL and treat that as "bail out and tell the caller to get out of RCU mode". Signed-off-by: Al Viro Signed-off-by: Andreas Gruenbacher Signed-off-by: Sasha Levin --- fs/gfs2/inode.c | 11 +++++++++-- fs/gfs2/super.c | 2 +- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 0eac04507904..eb4bbe1728c0 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1868,14 +1868,21 @@ int gfs2_permission(struct mnt_idmap *idmap, struct inode *inode, { struct gfs2_inode *ip; struct gfs2_holder i_gh; + struct gfs2_glock *gl; int error; gfs2_holder_mark_uninitialized(&i_gh); ip = GFS2_I(inode); - if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { + gl = rcu_dereference(ip->i_gl); + if (unlikely(!gl)) { + /* inode is getting torn down, must be RCU mode */ + WARN_ON_ONCE(!(mask & MAY_NOT_BLOCK)); + return -ECHILD; + } + if (gfs2_glock_is_locked_by_me(gl) == NULL) { if (mask & MAY_NOT_BLOCK) return -ECHILD; - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); + error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); if (error) return error; } diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 02d93da21b2b..0dd5641990b9 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1550,7 +1550,7 @@ static void gfs2_evict_inode(struct inode *inode) wait_on_bit_io(&ip->i_flags, GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE); gfs2_glock_add_to_lru(ip->i_gl); gfs2_glock_put_eventually(ip->i_gl); - ip->i_gl = NULL; + rcu_assign_pointer(ip->i_gl, NULL); } } From 86e281fcdc6f8b23ce707488d0dd8af745866fe8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 13 Sep 2023 15:27:40 +0300 Subject: [PATCH 0142/1397] media: cobalt: Use FIELD_GET() to extract Link Width MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f301fedbeecfdce91cb898d6fa5e62f269801fee ] Use FIELD_GET() to extract PCIe Negotiated and Maximum Link Width fields instead of custom masking and shifting. Signed-off-by: Ilpo Järvinen Reviewed-by: Jonathan Cameron Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/pci/cobalt/cobalt-driver.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/media/pci/cobalt/cobalt-driver.c b/drivers/media/pci/cobalt/cobalt-driver.c index 74edcc76d12f..6e1a0614e6d0 100644 --- a/drivers/media/pci/cobalt/cobalt-driver.c +++ b/drivers/media/pci/cobalt/cobalt-driver.c @@ -8,6 +8,7 @@ * All rights reserved. */ +#include #include #include #include @@ -210,17 +211,17 @@ void cobalt_pcie_status_show(struct cobalt *cobalt) pcie_capability_read_word(pci_dev, PCI_EXP_LNKSTA, &stat); cobalt_info("PCIe link capability 0x%08x: %s per lane and %u lanes\n", capa, get_link_speed(capa), - (capa & PCI_EXP_LNKCAP_MLW) >> 4); + FIELD_GET(PCI_EXP_LNKCAP_MLW, capa)); cobalt_info("PCIe link control 0x%04x\n", ctrl); cobalt_info("PCIe link status 0x%04x: %s per lane and %u lanes\n", stat, get_link_speed(stat), - (stat & PCI_EXP_LNKSTA_NLW) >> 4); + FIELD_GET(PCI_EXP_LNKSTA_NLW, stat)); /* Bus */ pcie_capability_read_dword(pci_bus_dev, PCI_EXP_LNKCAP, &capa); cobalt_info("PCIe bus link capability 0x%08x: %s per lane and %u lanes\n", capa, get_link_speed(capa), - (capa & PCI_EXP_LNKCAP_MLW) >> 4); + FIELD_GET(PCI_EXP_LNKCAP_MLW, capa)); /* Slot */ pcie_capability_read_dword(pci_dev, PCI_EXP_SLTCAP, &capa); @@ -239,7 +240,7 @@ static unsigned pcie_link_get_lanes(struct cobalt *cobalt) if (!pci_is_pcie(pci_dev)) return 0; pcie_capability_read_word(pci_dev, PCI_EXP_LNKSTA, &link); - return (link & PCI_EXP_LNKSTA_NLW) >> 4; + return FIELD_GET(PCI_EXP_LNKSTA_NLW, link); } static unsigned pcie_bus_link_get_lanes(struct cobalt *cobalt) @@ -250,7 +251,7 @@ static unsigned pcie_bus_link_get_lanes(struct cobalt *cobalt) if (!pci_is_pcie(pci_dev)) return 0; pcie_capability_read_dword(pci_dev, PCI_EXP_LNKCAP, &link); - return (link & PCI_EXP_LNKCAP_MLW) >> 4; + return FIELD_GET(PCI_EXP_LNKCAP_MLW, link); } static void msi_config_show(struct cobalt *cobalt, struct pci_dev *pci_dev) From e943d65c3247d1824d0274b0a9fd1116afa335ed Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Thu, 24 Aug 2023 15:18:18 +0300 Subject: [PATCH 0143/1397] media: ccs: Fix driver quirk struct documentation [ Upstream commit 441b5c63d71ec9ec5453328f7e83384ecc1dddd9 ] Fix documentation for struct ccs_quirk, a device specific struct for managing deviations from the standard. The flags field was drifted away from where it should have been. Signed-off-by: Sakari Ailus Reviewed-by: Laurent Pinchart Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/i2c/ccs/ccs-quirk.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/media/i2c/ccs/ccs-quirk.h b/drivers/media/i2c/ccs/ccs-quirk.h index 5838fcda92fd..0b1a64958d71 100644 --- a/drivers/media/i2c/ccs/ccs-quirk.h +++ b/drivers/media/i2c/ccs/ccs-quirk.h @@ -32,12 +32,10 @@ struct ccs_sensor; * @reg: Pointer to the register to access * @value: Register value, set by the caller on write, or * by the quirk on read - * - * @flags: Quirk flags - * * @return: 0 on success, -ENOIOCTLCMD if no register * access may be done by the caller (default read * value is zero), else negative error code on error + * @flags: Quirk flags */ struct ccs_quirk { int (*limits)(struct ccs_sensor *sensor); From 2a493a34bd6e496c55fabedd82b957193ace178f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 22 Sep 2023 14:38:07 +0200 Subject: [PATCH 0144/1397] media: imon: fix access to invalid resource for the second interface [ Upstream commit a1766a4fd83befa0b34d932d532e7ebb7fab1fa7 ] imon driver probes two USB interfaces, and at the probe of the second interface, the driver assumes blindly that the first interface got bound with the same imon driver. It's usually true, but it's still possible that the first interface is bound with another driver via a malformed descriptor. Then it may lead to a memory corruption, as spotted by syzkaller; imon driver accesses the data from drvdata as struct imon_context object although it's a completely different one that was assigned by another driver. This patch adds a sanity check -- whether the first interface is really bound with the imon driver or not -- for avoiding the problem above at the probe time. Reported-by: syzbot+59875ffef5cb9c9b29e9@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/000000000000a838aa0603cc74d6@google.com/ Tested-by: Ricardo B. Marliere Link: https://lore.kernel.org/r/20230922005152.163640-1-ricardo@marliere.net Signed-off-by: Takashi Iwai Signed-off-by: Sean Young Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/rc/imon.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/media/rc/imon.c b/drivers/media/rc/imon.c index 74546f7e3469..5719dda6e0f0 100644 --- a/drivers/media/rc/imon.c +++ b/drivers/media/rc/imon.c @@ -2427,6 +2427,12 @@ static int imon_probe(struct usb_interface *interface, goto fail; } + if (first_if->dev.driver != interface->dev.driver) { + dev_err(&interface->dev, "inconsistent driver matching\n"); + ret = -EINVAL; + goto fail; + } + if (ifnum == 0) { ictx = imon_init_intf0(interface, id); if (!ictx) { From df8bc953eed72371e43ca407bd063507f760cf89 Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Fri, 8 Sep 2023 10:14:49 +0800 Subject: [PATCH 0145/1397] drm/amd/display: Avoid NULL dereference of timing generator [ Upstream commit b1904ed480cee3f9f4036ea0e36d139cb5fee2d6 ] [Why & How] Check whether assigned timing generator is NULL or not before accessing its funcs to prevent NULL dereference. Reviewed-by: Jun Lei Acked-by: Hersen Wu Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/core/dc_stream.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 01fe2d2fd241..ebe571fcefe3 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -582,7 +582,7 @@ uint32_t dc_stream_get_vblank_counter(const struct dc_stream_state *stream) for (i = 0; i < MAX_PIPES; i++) { struct timing_generator *tg = res_ctx->pipe_ctx[i].stream_res.tg; - if (res_ctx->pipe_ctx[i].stream != stream) + if (res_ctx->pipe_ctx[i].stream != stream || !tg) continue; return tg->funcs->get_frame_count(tg); @@ -641,7 +641,7 @@ bool dc_stream_get_scanoutpos(const struct dc_stream_state *stream, for (i = 0; i < MAX_PIPES; i++) { struct timing_generator *tg = res_ctx->pipe_ctx[i].stream_res.tg; - if (res_ctx->pipe_ctx[i].stream != stream) + if (res_ctx->pipe_ctx[i].stream != stream || !tg) continue; tg->funcs->get_scanoutpos(tg, From 08a28272faa750d4357ea2cb48d2baefd778ea81 Mon Sep 17 00:00:00 2001 From: Juntong Deng Date: Mon, 30 Oct 2023 05:10:06 +0800 Subject: [PATCH 0146/1397] gfs2: Fix slab-use-after-free in gfs2_qd_dealloc [ Upstream commit bdcb8aa434c6d36b5c215d02a9ef07551be25a37 ] In gfs2_put_super(), whether withdrawn or not, the quota should be cleaned up by gfs2_quota_cleanup(). Otherwise, struct gfs2_sbd will be freed before gfs2_qd_dealloc (rcu callback) has run for all gfs2_quota_data objects, resulting in use-after-free. Also, gfs2_destroy_threads() and gfs2_quota_cleanup() is already called by gfs2_make_fs_ro(), so in gfs2_put_super(), after calling gfs2_make_fs_ro(), there is no need to call them again. Reported-by: syzbot+29c47e9e51895928698c@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=29c47e9e51895928698c Signed-off-by: Juntong Deng Signed-off-by: Andreas Gruenbacher Signed-off-by: Sasha Levin --- fs/gfs2/super.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 0dd5641990b9..5f4ebe279aaa 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -602,13 +602,15 @@ static void gfs2_put_super(struct super_block *sb) } spin_unlock(&sdp->sd_jindex_spin); - if (!sb_rdonly(sb)) { + if (!sb_rdonly(sb)) gfs2_make_fs_ro(sdp); - } - if (gfs2_withdrawn(sdp)) { - gfs2_destroy_threads(sdp); + else { + if (gfs2_withdrawn(sdp)) + gfs2_destroy_threads(sdp); + gfs2_quota_cleanup(sdp); } + WARN_ON(gfs2_withdrawing(sdp)); /* At this point, we're through modifying the disk */ From ef4e484ab0136e5f8616c45ad13b2f67dc922998 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 22 Aug 2023 13:19:46 -0700 Subject: [PATCH 0147/1397] kgdb: Flush console before entering kgdb on panic [ Upstream commit dd712d3d45807db9fcae28a522deee85c1f2fde6 ] When entering kdb/kgdb on a kernel panic, it was be observed that the console isn't flushed before the `kdb` prompt came up. Specifically, when using the buddy lockup detector on arm64 and running: echo HARDLOCKUP > /sys/kernel/debug/provoke-crash/DIRECT I could see: [ 26.161099] lkdtm: Performing direct entry HARDLOCKUP [ 32.499881] watchdog: Watchdog detected hard LOCKUP on cpu 6 [ 32.552865] Sending NMI from CPU 5 to CPUs 6: [ 32.557359] NMI backtrace for cpu 6 ... [backtrace for cpu 6] ... [ 32.558353] NMI backtrace for cpu 5 ... [backtrace for cpu 5] ... [ 32.867471] Sending NMI from CPU 5 to CPUs 0-4,7: [ 32.872321] NMI backtrace forP cpuANC: Hard LOCKUP Entering kdb (current=..., pid 0) on processor 5 due to Keyboard Entry [5]kdb> As you can see, backtraces for the other CPUs start printing and get interleaved with the kdb PANIC print. Let's replicate the commands to flush the console in the kdb panic entry point to avoid this. Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20230822131945.1.I5b460ae8f954e4c4f628a373d6e74713c06dd26f@changeid Signed-off-by: Daniel Thompson Signed-off-by: Sasha Levin --- kernel/debug/debug_core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index 621037a0aa87..ce1bb2301c06 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -1006,6 +1006,9 @@ void kgdb_panic(const char *msg) if (panic_timeout) return; + debug_locks_off(); + console_flush_on_panic(CONSOLE_FLUSH_PENDING); + if (dbg_kdb_mode) kdb_printf("PANIC: %s\n", msg); From eff53aea3855f71992c043cebb1c00988c17ee20 Mon Sep 17 00:00:00 2001 From: Deepak Gupta Date: Wed, 27 Sep 2023 22:47:59 +0000 Subject: [PATCH 0148/1397] riscv: VMAP_STACK overflow detection thread-safe [ Upstream commit be97d0db5f44c0674480cb79ac6f5b0529b84c76 ] commit 31da94c25aea ("riscv: add VMAP_STACK overflow detection") added support for CONFIG_VMAP_STACK. If overflow is detected, CPU switches to `shadow_stack` temporarily before switching finally to per-cpu `overflow_stack`. If two CPUs/harts are racing and end up in over flowing kernel stack, one or both will end up corrupting each other state because `shadow_stack` is not per-cpu. This patch optimizes per-cpu overflow stack switch by directly picking per-cpu `overflow_stack` and gets rid of `shadow_stack`. Following are the changes in this patch - Defines an asm macro to obtain per-cpu symbols in destination register. - In entry.S, when overflow is detected, per-cpu overflow stack is located using per-cpu asm macro. Computing per-cpu symbol requires a temporary register. x31 is saved away into CSR_SCRATCH (CSR_SCRATCH is anyways zero since we're in kernel). Please see Links for additional relevant disccussion and alternative solution. Tested by `echo EXHAUST_STACK > /sys/kernel/debug/provoke-crash/DIRECT` Kernel crash log below Insufficient stack space to handle exception!/debug/provoke-crash/DIRECT Task stack: [0xff20000010a98000..0xff20000010a9c000] Overflow stack: [0xff600001f7d98370..0xff600001f7d99370] CPU: 1 PID: 205 Comm: bash Not tainted 6.1.0-rc2-00001-g328a1f96f7b9 #34 Hardware name: riscv-virtio,qemu (DT) epc : __memset+0x60/0xfc ra : recursive_loop+0x48/0xc6 [lkdtm] epc : ffffffff808de0e4 ra : ffffffff0163a752 sp : ff20000010a97e80 gp : ffffffff815c0330 tp : ff600000820ea280 t0 : ff20000010a97e88 t1 : 000000000000002e t2 : 3233206874706564 s0 : ff20000010a982b0 s1 : 0000000000000012 a0 : ff20000010a97e88 a1 : 0000000000000000 a2 : 0000000000000400 a3 : ff20000010a98288 a4 : 0000000000000000 a5 : 0000000000000000 a6 : fffffffffffe43f0 a7 : 00007fffffffffff s2 : ff20000010a97e88 s3 : ffffffff01644680 s4 : ff20000010a9be90 s5 : ff600000842ba6c0 s6 : 00aaaaaac29e42b0 s7 : 00fffffff0aa3684 s8 : 00aaaaaac2978040 s9 : 0000000000000065 s10: 00ffffff8a7cad10 s11: 00ffffff8a76a4e0 t3 : ffffffff815dbaf4 t4 : ffffffff815dbaf4 t5 : ffffffff815dbab8 t6 : ff20000010a9bb48 status: 0000000200000120 badaddr: ff20000010a97e88 cause: 000000000000000f Kernel panic - not syncing: Kernel stack overflow CPU: 1 PID: 205 Comm: bash Not tainted 6.1.0-rc2-00001-g328a1f96f7b9 #34 Hardware name: riscv-virtio,qemu (DT) Call Trace: [] dump_backtrace+0x30/0x38 [] show_stack+0x40/0x4c [] dump_stack_lvl+0x44/0x5c [] dump_stack+0x18/0x20 [] panic+0x126/0x2fe [] walk_stackframe+0x0/0xf0 [] recursive_loop+0x48/0xc6 [lkdtm] SMP: stopping secondary CPUs ---[ end Kernel panic - not syncing: Kernel stack overflow ]--- Cc: Guo Ren Cc: Jisheng Zhang Link: https://lore.kernel.org/linux-riscv/Y347B0x4VUNOd6V7@xhacker/T/#t Link: https://lore.kernel.org/lkml/20221124094845.1907443-1-debug@rivosinc.com/ Signed-off-by: Deepak Gupta Co-developed-by: Sami Tolvanen Signed-off-by: Sami Tolvanen Acked-by: Guo Ren Tested-by: Nathan Chancellor Link: https://lore.kernel.org/r/20230927224757.1154247-9-samitolvanen@google.com Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- arch/riscv/include/asm/asm-prototypes.h | 1 - arch/riscv/include/asm/asm.h | 22 ++++++++ arch/riscv/include/asm/thread_info.h | 3 -- arch/riscv/kernel/asm-offsets.c | 1 + arch/riscv/kernel/entry.S | 70 ++++--------------------- arch/riscv/kernel/traps.c | 36 +------------ 6 files changed, 34 insertions(+), 99 deletions(-) diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h index 61ba8ed43d8f..36b955c762ba 100644 --- a/arch/riscv/include/asm/asm-prototypes.h +++ b/arch/riscv/include/asm/asm-prototypes.h @@ -25,7 +25,6 @@ DECLARE_DO_ERROR_INFO(do_trap_ecall_s); DECLARE_DO_ERROR_INFO(do_trap_ecall_m); DECLARE_DO_ERROR_INFO(do_trap_break); -asmlinkage unsigned long get_overflow_stack(void); asmlinkage void handle_bad_stack(struct pt_regs *regs); asmlinkage void do_page_fault(struct pt_regs *regs); asmlinkage void do_irq(struct pt_regs *regs); diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h index 114bbadaef41..bfb4c26f113c 100644 --- a/arch/riscv/include/asm/asm.h +++ b/arch/riscv/include/asm/asm.h @@ -82,6 +82,28 @@ .endr .endm +#ifdef CONFIG_SMP +#ifdef CONFIG_32BIT +#define PER_CPU_OFFSET_SHIFT 2 +#else +#define PER_CPU_OFFSET_SHIFT 3 +#endif + +.macro asm_per_cpu dst sym tmp + REG_L \tmp, TASK_TI_CPU_NUM(tp) + slli \tmp, \tmp, PER_CPU_OFFSET_SHIFT + la \dst, __per_cpu_offset + add \dst, \dst, \tmp + REG_L \tmp, 0(\dst) + la \dst, \sym + add \dst, \dst, \tmp +.endm +#else /* CONFIG_SMP */ +.macro asm_per_cpu dst sym tmp + la \dst, \sym +.endm +#endif /* CONFIG_SMP */ + /* save all GPs except x1 ~ x5 */ .macro save_from_x6_to_x31 REG_S x6, PT_T1(sp) diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h index 1833beb00489..d18ce0113ca1 100644 --- a/arch/riscv/include/asm/thread_info.h +++ b/arch/riscv/include/asm/thread_info.h @@ -34,9 +34,6 @@ #ifndef __ASSEMBLY__ -extern long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE / sizeof(long)]; -extern unsigned long spin_shadow_stack; - #include #include diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c index d6a75aac1d27..9f535d5de33f 100644 --- a/arch/riscv/kernel/asm-offsets.c +++ b/arch/riscv/kernel/asm-offsets.c @@ -39,6 +39,7 @@ void asm_offsets(void) OFFSET(TASK_TI_KERNEL_SP, task_struct, thread_info.kernel_sp); OFFSET(TASK_TI_USER_SP, task_struct, thread_info.user_sp); + OFFSET(TASK_TI_CPU_NUM, task_struct, thread_info.cpu); OFFSET(TASK_THREAD_F0, task_struct, thread.fstate.f[0]); OFFSET(TASK_THREAD_F1, task_struct, thread.fstate.f[1]); OFFSET(TASK_THREAD_F2, task_struct, thread.fstate.f[2]); diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 143a2bb3e697..3d11aa3af105 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -10,9 +10,11 @@ #include #include #include +#include #include #include #include +#include SYM_CODE_START(handle_exception) /* @@ -170,67 +172,15 @@ SYM_CODE_END(ret_from_exception) #ifdef CONFIG_VMAP_STACK SYM_CODE_START_LOCAL(handle_kernel_stack_overflow) - /* - * Takes the psuedo-spinlock for the shadow stack, in case multiple - * harts are concurrently overflowing their kernel stacks. We could - * store any value here, but since we're overflowing the kernel stack - * already we only have SP to use as a scratch register. So we just - * swap in the address of the spinlock, as that's definately non-zero. - * - * Pairs with a store_release in handle_bad_stack(). - */ -1: la sp, spin_shadow_stack - REG_AMOSWAP_AQ sp, sp, (sp) - bnez sp, 1b - - la sp, shadow_stack - addi sp, sp, SHADOW_OVERFLOW_STACK_SIZE - - //save caller register to shadow stack - addi sp, sp, -(PT_SIZE_ON_STACK) - REG_S x1, PT_RA(sp) - REG_S x5, PT_T0(sp) - REG_S x6, PT_T1(sp) - REG_S x7, PT_T2(sp) - REG_S x10, PT_A0(sp) - REG_S x11, PT_A1(sp) - REG_S x12, PT_A2(sp) - REG_S x13, PT_A3(sp) - REG_S x14, PT_A4(sp) - REG_S x15, PT_A5(sp) - REG_S x16, PT_A6(sp) - REG_S x17, PT_A7(sp) - REG_S x28, PT_T3(sp) - REG_S x29, PT_T4(sp) - REG_S x30, PT_T5(sp) - REG_S x31, PT_T6(sp) - - la ra, restore_caller_reg - tail get_overflow_stack - -restore_caller_reg: - //save per-cpu overflow stack - REG_S a0, -8(sp) - //restore caller register from shadow_stack - REG_L x1, PT_RA(sp) - REG_L x5, PT_T0(sp) - REG_L x6, PT_T1(sp) - REG_L x7, PT_T2(sp) - REG_L x10, PT_A0(sp) - REG_L x11, PT_A1(sp) - REG_L x12, PT_A2(sp) - REG_L x13, PT_A3(sp) - REG_L x14, PT_A4(sp) - REG_L x15, PT_A5(sp) - REG_L x16, PT_A6(sp) - REG_L x17, PT_A7(sp) - REG_L x28, PT_T3(sp) - REG_L x29, PT_T4(sp) - REG_L x30, PT_T5(sp) - REG_L x31, PT_T6(sp) + /* we reach here from kernel context, sscratch must be 0 */ + csrrw x31, CSR_SCRATCH, x31 + asm_per_cpu sp, overflow_stack, x31 + li x31, OVERFLOW_STACK_SIZE + add sp, sp, x31 + /* zero out x31 again and restore x31 */ + xor x31, x31, x31 + csrrw x31, CSR_SCRATCH, x31 - //load per-cpu overflow stack - REG_L sp, -8(sp) addi sp, sp, -(PT_SIZE_ON_STACK) //save context to overflow stack diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index fae8f610d867..67d0073fb624 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -410,48 +410,14 @@ int is_valid_bugaddr(unsigned long pc) #endif /* CONFIG_GENERIC_BUG */ #ifdef CONFIG_VMAP_STACK -/* - * Extra stack space that allows us to provide panic messages when the kernel - * has overflowed its stack. - */ -static DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], +DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack)__aligned(16); -/* - * A temporary stack for use by handle_kernel_stack_overflow. This is used so - * we can call into C code to get the per-hart overflow stack. Usage of this - * stack must be protected by spin_shadow_stack. - */ -long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE/sizeof(long)] __aligned(16); - -/* - * A pseudo spinlock to protect the shadow stack from being used by multiple - * harts concurrently. This isn't a real spinlock because the lock side must - * be taken without a valid stack and only a single register, it's only taken - * while in the process of panicing anyway so the performance and error - * checking a proper spinlock gives us doesn't matter. - */ -unsigned long spin_shadow_stack; - -asmlinkage unsigned long get_overflow_stack(void) -{ - return (unsigned long)this_cpu_ptr(overflow_stack) + - OVERFLOW_STACK_SIZE; -} asmlinkage void handle_bad_stack(struct pt_regs *regs) { unsigned long tsk_stk = (unsigned long)current->stack; unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack); - /* - * We're done with the shadow stack by this point, as we're on the - * overflow stack. Tell any other concurrent overflowing harts that - * they can proceed with panicing by releasing the pseudo-spinlock. - * - * This pairs with an amoswap.aq in handle_kernel_stack_overflow. - */ - smp_store_release(&spin_shadow_stack, 0); - console_verbose(); pr_emerg("Insufficient stack space to handle exception!\n"); From bef76b8544939518dafa3325bcd438b111136437 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Thu, 2 Nov 2023 20:26:13 +0100 Subject: [PATCH 0149/1397] i2c: dev: copy userspace array safely [ Upstream commit cc9c54232f04aef3a5d7f64a0ece7df00f1aaa3d ] i2c-dev.c utilizes memdup_user() to copy a userspace array. This is done without an overflow check. Use the new wrapper memdup_array_user() to copy the array more safely. Suggested-by: Dave Airlie Signed-off-by: Philipp Stanner Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/i2c-dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index a01b59e3599b..7d337380a05d 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c @@ -450,8 +450,8 @@ static long i2cdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (rdwr_arg.nmsgs > I2C_RDWR_IOCTL_MAX_MSGS) return -EINVAL; - rdwr_pa = memdup_user(rdwr_arg.msgs, - rdwr_arg.nmsgs * sizeof(struct i2c_msg)); + rdwr_pa = memdup_array_user(rdwr_arg.msgs, + rdwr_arg.nmsgs, sizeof(struct i2c_msg)); if (IS_ERR(rdwr_pa)) return PTR_ERR(rdwr_pa); From dc0cbcba24a8c867bfa43ff11746019a4dc5b894 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 30 Oct 2023 07:23:38 +0200 Subject: [PATCH 0150/1397] ASoC: ti: omap-mcbsp: Fix runtime PM underflow warnings [ Upstream commit fbb74e56378d8306f214658e3d525a8b3f000c5a ] We need to check for an active device as otherwise we get warnings for some mcbsp instances for "Runtime PM usage count underflow!". Reported-by: Andreas Kemnade Signed-off-by: Tony Lindgren Link: https://lore.kernel.org/r/20231030052340.13415-1-tony@atomide.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/ti/omap-mcbsp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/soc/ti/omap-mcbsp.c b/sound/soc/ti/omap-mcbsp.c index fdabed5133e8..b399d86f2277 100644 --- a/sound/soc/ti/omap-mcbsp.c +++ b/sound/soc/ti/omap-mcbsp.c @@ -74,14 +74,16 @@ static int omap2_mcbsp_set_clks_src(struct omap_mcbsp *mcbsp, u8 fck_src_id) return 0; } - pm_runtime_put_sync(mcbsp->dev); + if (mcbsp->active) + pm_runtime_put_sync(mcbsp->dev); r = clk_set_parent(mcbsp->fclk, fck_src); if (r) dev_err(mcbsp->dev, "CLKS: could not clk_set_parent() to %s\n", src); - pm_runtime_get_sync(mcbsp->dev); + if (mcbsp->active) + pm_runtime_get_sync(mcbsp->dev); clk_put(fck_src); From 01834d420e27f2e51aa5ee8ae62926371654c5db Mon Sep 17 00:00:00 2001 From: Zongmin Zhou Date: Tue, 1 Aug 2023 10:53:09 +0800 Subject: [PATCH 0151/1397] drm/qxl: prevent memory leak [ Upstream commit 0e8b9f258baed25f1c5672613699247c76b007b5 ] The allocated memory for qdev->dumb_heads should be released in qxl_destroy_monitors_object before qxl suspend. otherwise,qxl_create_monitors_object will be called to reallocate memory for qdev->dumb_heads after qxl resume, it will cause memory leak. Signed-off-by: Zongmin Zhou Link: https://lore.kernel.org/r/20230801025309.4049813-1-zhouzongmin@kylinos.cn Reviewed-by: Dave Airlie Signed-off-by: Maxime Ripard Signed-off-by: Sasha Levin --- drivers/gpu/drm/qxl/qxl_display.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c index 6492a70e3c39..404b0483bb7c 100644 --- a/drivers/gpu/drm/qxl/qxl_display.c +++ b/drivers/gpu/drm/qxl/qxl_display.c @@ -1229,6 +1229,9 @@ int qxl_destroy_monitors_object(struct qxl_device *qdev) if (!qdev->monitors_config_bo) return 0; + kfree(qdev->dumb_heads); + qdev->dumb_heads = NULL; + qdev->monitors_config = NULL; qdev->ram_header->monitors_config = 0; From 8d6c4d60f35286e806e6c11740019997cc68212f Mon Sep 17 00:00:00 2001 From: Alex Spataru Date: Sat, 4 Nov 2023 16:01:52 -0500 Subject: [PATCH 0152/1397] ALSA: hda/realtek: Add quirk for ASUS UX7602ZM [ Upstream commit 26fd31ef9c02a5e91cdb8eea127b056bd7cf0b3b ] Enables the SPI-connected CSC35L41 audio amplifier for this laptop model. As of BIOS version 303 it's still necessary to modify the ACPI table to add the related _DSD properties: https://github.com/alex-spataru/asus_zenbook_ux7602zm_sound/ Signed-off-by: Alex Spataru Link: https://lore.kernel.org/r/DS7PR07MB7621BB5BB14F5473D181624CE3A4A@DS7PR07MB7621.namprd07.prod.outlook.com Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 3ed2cba5ee8a..76639738ae31 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9874,6 +9874,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1d4e, "ASUS TM420", ALC256_FIXUP_ASUS_HPE), SND_PCI_QUIRK(0x1043, 0x1e02, "ASUS UX3402ZA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x16a3, "ASUS UX3402VA", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x1f62, "ASUS UX7602ZM", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1e11, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA502), SND_PCI_QUIRK(0x1043, 0x1e12, "ASUS UM3402", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1e51, "ASUS Zephyrus M15", ALC294_FIXUP_ASUS_GU502_PINS), From de1c09598b8de52a0f0ee5fd9b1f22079f78330e Mon Sep 17 00:00:00 2001 From: Vitaly Prosyak Date: Wed, 11 Oct 2023 19:31:48 -0400 Subject: [PATCH 0153/1397] drm/amdgpu: fix software pci_unplug on some chips [ Upstream commit 4638e0c29a3f2294d5de0d052a4b8c9f33ccb957 ] When software 'pci unplug' using IGT is executed we got a sysfs directory entry is NULL for differant ras blocks like hdp, umc, etc. Before call 'sysfs_remove_file_from_group' and 'sysfs_remove_group' check that 'sd' is not NULL. [ +0.000001] RIP: 0010:sysfs_remove_group+0x83/0x90 [ +0.000002] Code: 31 c0 31 d2 31 f6 31 ff e9 9a a8 b4 00 4c 89 e7 e8 f2 a2 ff ff eb c2 49 8b 55 00 48 8b 33 48 c7 c7 80 65 94 82 e8 cd 82 bb ff <0f> 0b eb cc 66 0f 1f 84 00 00 00 00 00 90 90 90 90 90 90 90 90 90 [ +0.000001] RSP: 0018:ffffc90002067c90 EFLAGS: 00010246 [ +0.000002] RAX: 0000000000000000 RBX: ffffffff824ea180 RCX: 0000000000000000 [ +0.000001] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 [ +0.000001] RBP: ffffc90002067ca8 R08: 0000000000000000 R09: 0000000000000000 [ +0.000001] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 [ +0.000001] R13: ffff88810a395f48 R14: ffff888101aab0d0 R15: 0000000000000000 [ +0.000001] FS: 00007f5ddaa43a00(0000) GS:ffff88841e800000(0000) knlGS:0000000000000000 [ +0.000002] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ +0.000001] CR2: 00007f8ffa61ba50 CR3: 0000000106432000 CR4: 0000000000350ef0 [ +0.000001] Call Trace: [ +0.000001] [ +0.000001] ? show_regs+0x72/0x90 [ +0.000002] ? sysfs_remove_group+0x83/0x90 [ +0.000002] ? __warn+0x8d/0x160 [ +0.000001] ? sysfs_remove_group+0x83/0x90 [ +0.000001] ? report_bug+0x1bb/0x1d0 [ +0.000003] ? handle_bug+0x46/0x90 [ +0.000001] ? exc_invalid_op+0x19/0x80 [ +0.000002] ? asm_exc_invalid_op+0x1b/0x20 [ +0.000003] ? sysfs_remove_group+0x83/0x90 [ +0.000001] dpm_sysfs_remove+0x61/0x70 [ +0.000002] device_del+0xa3/0x3d0 [ +0.000002] ? ktime_get_mono_fast_ns+0x46/0xb0 [ +0.000002] device_unregister+0x18/0x70 [ +0.000001] i2c_del_adapter+0x26d/0x330 [ +0.000002] arcturus_i2c_control_fini+0x25/0x50 [amdgpu] [ +0.000236] smu_sw_fini+0x38/0x260 [amdgpu] [ +0.000241] amdgpu_device_fini_sw+0x116/0x670 [amdgpu] [ +0.000186] ? mutex_lock+0x13/0x50 [ +0.000003] amdgpu_driver_release_kms+0x16/0x40 [amdgpu] [ +0.000192] drm_minor_release+0x4f/0x80 [drm] [ +0.000025] drm_release+0xfe/0x150 [drm] [ +0.000027] __fput+0x9f/0x290 [ +0.000002] ____fput+0xe/0x20 [ +0.000002] task_work_run+0x61/0xa0 [ +0.000002] exit_to_user_mode_prepare+0x150/0x170 [ +0.000002] syscall_exit_to_user_mode+0x2a/0x50 Cc: Hawking Zhang Cc: Luben Tuikov Cc: Alex Deucher Cc: Christian Koenig Signed-off-by: Vitaly Prosyak Reviewed-by: Luben Tuikov Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 163445baa4fc..6f6341f70278 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1373,7 +1373,8 @@ static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - sysfs_remove_file_from_group(&adev->dev->kobj, + if (adev->dev->kobj.sd) + sysfs_remove_file_from_group(&adev->dev->kobj, &con->badpages_attr.attr, RAS_FS_NAME); } @@ -1390,7 +1391,8 @@ static int amdgpu_ras_sysfs_remove_feature_node(struct amdgpu_device *adev) .attrs = attrs, }; - sysfs_remove_group(&adev->dev->kobj, &group); + if (adev->dev->kobj.sd) + sysfs_remove_group(&adev->dev->kobj, &group); return 0; } @@ -1437,7 +1439,8 @@ int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev, if (!obj || !obj->attr_inuse) return -EINVAL; - sysfs_remove_file_from_group(&adev->dev->kobj, + if (adev->dev->kobj.sd) + sysfs_remove_file_from_group(&adev->dev->kobj, &obj->sysfs_attr.attr, RAS_FS_NAME); obj->attr_inuse = 0; From 45d0a298e05adee521f6fe605d6a88341ba07edd Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 25 Oct 2023 14:58:18 +0300 Subject: [PATCH 0154/1397] pwm: Fix double shift bug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d27abbfd4888d79dd24baf50e774631046ac4732 ] These enums are passed to set/test_bit(). The set/test_bit() functions take a bit number instead of a shifted value. Passing a shifted value is a double shift bug like doing BIT(BIT(1)). The double shift bug doesn't cause a problem here because we are only checking 0 and 1 but if the value was 5 or above then it can lead to a buffer overflow. Signed-off-by: Dan Carpenter Reviewed-by: Uwe Kleine-König Reviewed-by: Sam Protsenko Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- include/linux/pwm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/pwm.h b/include/linux/pwm.h index d2f9f690a9c1..fe0f38ce1bde 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -41,8 +41,8 @@ struct pwm_args { }; enum { - PWMF_REQUESTED = 1 << 0, - PWMF_EXPORTED = 1 << 1, + PWMF_REQUESTED = 0, + PWMF_EXPORTED = 1, }; /* From 1e0bb46893483215481bcb1b48ed14a7be819b83 Mon Sep 17 00:00:00 2001 From: Yi Yang Date: Mon, 21 Aug 2023 16:40:46 +0800 Subject: [PATCH 0155/1397] mtd: rawnand: tegra: add missing check for platform_get_irq() [ Upstream commit 0a1166c27d4e53186e6bf9147ea6db9cd1d65847 ] Add the missing check for platform_get_irq() and return error code if it fails. Fixes: d7d9f8ec77fe ("mtd: rawnand: add NVIDIA Tegra NAND Flash controller driver") Signed-off-by: Yi Yang Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20230821084046.217025-1-yiyang13@huawei.com Signed-off-by: Sasha Levin --- drivers/mtd/nand/raw/tegra_nand.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/mtd/nand/raw/tegra_nand.c b/drivers/mtd/nand/raw/tegra_nand.c index eb0b9d16e8da..a553e3ac8ff4 100644 --- a/drivers/mtd/nand/raw/tegra_nand.c +++ b/drivers/mtd/nand/raw/tegra_nand.c @@ -1197,6 +1197,10 @@ static int tegra_nand_probe(struct platform_device *pdev) init_completion(&ctrl->dma_complete); ctrl->irq = platform_get_irq(pdev, 0); + if (ctrl->irq < 0) { + err = ctrl->irq; + goto err_put_pm; + } err = devm_request_irq(&pdev->dev, ctrl->irq, tegra_nand_irq, 0, dev_name(&pdev->dev), ctrl); if (err) { From a96ef1ffc60709d59527d5c9752ab8824e42ad47 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Wed, 13 Sep 2023 14:56:45 +0300 Subject: [PATCH 0156/1397] wifi: iwlwifi: Use FW rate for non-data frames [ Upstream commit 499d02790495958506a64f37ceda7e97345a50a8 ] Currently we are setting the rate in the tx cmd for mgmt frames (e.g. during connection establishment). This was problematic when sending mgmt frames in eSR mode, as we don't know what link this frame will be sent on (This is decided by the FW), so we don't know what is the lowest rate. Fix this by not setting the rate in tx cmd and rely on FW to choose the right one. Set rate only for injected frames with fixed rate, or when no sta is given. Also set for important frames (EAPOL etc.) the High Priority flag. Fixes: 055b22e770dd ("iwlwifi: mvm: Set Tx rate and flags when there is not station") Signed-off-by: Miri Korenblit Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230913145231.6c7e59620ee0.I6eaed3ccdd6dd62b9e664facc484081fc5275843@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index 2ede69132fee..177a4628a913 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -536,16 +536,20 @@ iwl_mvm_set_tx_params(struct iwl_mvm *mvm, struct sk_buff *skb, flags |= IWL_TX_FLAGS_ENCRYPT_DIS; /* - * For data packets rate info comes from the fw. Only - * set rate/antenna during connection establishment or in case - * no station is given. + * For data and mgmt packets rate info comes from the fw. Only + * set rate/antenna for injected frames with fixed rate, or + * when no sta is given. */ - if (!sta || !ieee80211_is_data(hdr->frame_control) || - mvmsta->sta_state < IEEE80211_STA_AUTHORIZED) { + if (unlikely(!sta || + info->control.flags & IEEE80211_TX_CTRL_RATE_INJECT)) { flags |= IWL_TX_FLAGS_CMD_RATE; rate_n_flags = iwl_mvm_get_tx_rate_n_flags(mvm, info, sta, hdr->frame_control); + } else if (!ieee80211_is_data(hdr->frame_control) || + mvmsta->sta_state < IEEE80211_STA_AUTHORIZED) { + /* These are important frames */ + flags |= IWL_TX_FLAGS_HIGH_PRI; } if (mvm->trans->trans_cfg->device_family >= From bc13958889403b31cc0e92bbf69391b2df19e3ae Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Fri, 15 Sep 2023 15:47:11 +1000 Subject: [PATCH 0157/1397] sched/core: Optimize in_task() and in_interrupt() a bit [ Upstream commit 87c3a5893e865739ce78aa7192d36011022e0af7 ] Except on x86, preempt_count is always accessed with READ_ONCE(). Repeated invocations in macros like irq_count() produce repeated loads. These redundant instructions appear in various fast paths. In the one shown below, for example, irq_count() is evaluated during kernel entry if !tick_nohz_full_cpu(smp_processor_id()). 0001ed0a : 1ed0a: 4e56 0000 linkw %fp,#0 1ed0e: 200f movel %sp,%d0 1ed10: 0280 ffff e000 andil #-8192,%d0 1ed16: 2040 moveal %d0,%a0 1ed18: 2028 0008 movel %a0@(8),%d0 1ed1c: 0680 0001 0000 addil #65536,%d0 1ed22: 2140 0008 movel %d0,%a0@(8) 1ed26: 082a 0001 000f btst #1,%a2@(15) 1ed2c: 670c beqs 1ed3a 1ed2e: 2028 0008 movel %a0@(8),%d0 1ed32: 2028 0008 movel %a0@(8),%d0 1ed36: 2028 0008 movel %a0@(8),%d0 1ed3a: 4e5e unlk %fp 1ed3c: 4e75 rts This patch doesn't prevent the pointless btst and beqs instructions above, but it does eliminate 2 of the 3 pointless move instructions here and elsewhere. On x86, preempt_count is per-cpu data and the problem does not arise presumably because the compiler is free to optimize more effectively. This patch was tested on m68k and x86. I was expecting no changes to object code for x86 and mostly that's what I saw. However, there were a few places where code generation was perturbed for some reason. The performance issue addressed here is minor on uniprocessor m68k. I got a 0.01% improvement from this patch for a simple "find /sys -false" benchmark. For architectures and workloads susceptible to cache line bounce the improvement is expected to be larger. The only SMP architecture I have is x86, and as x86 unaffected I have not done any further measurements. Fixes: 15115830c887 ("preempt: Cleanup the macro maze a bit") Signed-off-by: Finn Thain Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/0a403120a682a525e6db2d81d1a3ffcc137c3742.1694756831.git.fthain@linux-m68k.org Signed-off-by: Sasha Levin --- include/linux/preempt.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 1424670df161..9aa6358a1a16 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -99,14 +99,21 @@ static __always_inline unsigned char interrupt_context_level(void) return level; } +/* + * These macro definitions avoid redundant invocations of preempt_count() + * because such invocations would result in redundant loads given that + * preempt_count() is commonly implemented with READ_ONCE(). + */ + #define nmi_count() (preempt_count() & NMI_MASK) #define hardirq_count() (preempt_count() & HARDIRQ_MASK) #ifdef CONFIG_PREEMPT_RT # define softirq_count() (current->softirq_disable_cnt & SOFTIRQ_MASK) +# define irq_count() ((preempt_count() & (NMI_MASK | HARDIRQ_MASK)) | softirq_count()) #else # define softirq_count() (preempt_count() & SOFTIRQ_MASK) +# define irq_count() (preempt_count() & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_MASK)) #endif -#define irq_count() (nmi_count() | hardirq_count() | softirq_count()) /* * Macros to retrieve the current execution context: @@ -119,7 +126,11 @@ static __always_inline unsigned char interrupt_context_level(void) #define in_nmi() (nmi_count()) #define in_hardirq() (hardirq_count()) #define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) -#define in_task() (!(in_nmi() | in_hardirq() | in_serving_softirq())) +#ifdef CONFIG_PREEMPT_RT +# define in_task() (!((preempt_count() & (NMI_MASK | HARDIRQ_MASK)) | in_serving_softirq())) +#else +# define in_task() (!(preempt_count() & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET))) +#endif /* * The following macros are deprecated and should not be used in new code: From 948189f679ac9820c57c7801d592442b7d122972 Mon Sep 17 00:00:00 2001 From: Jinghao Jia Date: Sun, 17 Sep 2023 16:42:19 -0500 Subject: [PATCH 0158/1397] samples/bpf: syscall_tp_user: Rename num_progs into nr_tests [ Upstream commit 0ee352fe0d28015cab161b04d202fa3231c0ba3b ] The variable name num_progs causes confusion because that variable really controls the number of rounds the test should be executed. Rename num_progs into nr_tests for the sake of clarity. Signed-off-by: Jinghao Jia Signed-off-by: Ruowen Qin Signed-off-by: Jinghao Jia Link: https://lore.kernel.org/r/20230917214220.637721-3-jinghao7@illinois.edu Signed-off-by: Alexei Starovoitov Stable-dep-of: 9220c3ef6fef ("samples/bpf: syscall_tp_user: Fix array out-of-bound access") Signed-off-by: Sasha Levin --- samples/bpf/syscall_tp_user.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/samples/bpf/syscall_tp_user.c b/samples/bpf/syscall_tp_user.c index 7a788bb837fc..18c94c7e8a40 100644 --- a/samples/bpf/syscall_tp_user.c +++ b/samples/bpf/syscall_tp_user.c @@ -17,9 +17,9 @@ static void usage(const char *cmd) { - printf("USAGE: %s [-i num_progs] [-h]\n", cmd); - printf(" -i num_progs # number of progs of the test\n"); - printf(" -h # help\n"); + printf("USAGE: %s [-i nr_tests] [-h]\n", cmd); + printf(" -i nr_tests # rounds of test to run\n"); + printf(" -h # help\n"); } static void verify_map(int map_id) @@ -45,14 +45,14 @@ static void verify_map(int map_id) } } -static int test(char *filename, int num_progs) +static int test(char *filename, int nr_tests) { - int map0_fds[num_progs], map1_fds[num_progs], fd, i, j = 0; - struct bpf_link *links[num_progs * 4]; - struct bpf_object *objs[num_progs]; + int map0_fds[nr_tests], map1_fds[nr_tests], fd, i, j = 0; + struct bpf_link *links[nr_tests * 4]; + struct bpf_object *objs[nr_tests]; struct bpf_program *prog; - for (i = 0; i < num_progs; i++) { + for (i = 0; i < nr_tests; i++) { objs[i] = bpf_object__open_file(filename, NULL); if (libbpf_get_error(objs[i])) { fprintf(stderr, "opening BPF object file failed\n"); @@ -101,7 +101,7 @@ static int test(char *filename, int num_progs) close(fd); /* verify the map */ - for (i = 0; i < num_progs; i++) { + for (i = 0; i < nr_tests; i++) { verify_map(map0_fds[i]); verify_map(map1_fds[i]); } @@ -117,13 +117,13 @@ static int test(char *filename, int num_progs) int main(int argc, char **argv) { - int opt, num_progs = 1; + int opt, nr_tests = 1; char filename[256]; while ((opt = getopt(argc, argv, "i:h")) != -1) { switch (opt) { case 'i': - num_progs = atoi(optarg); + nr_tests = atoi(optarg); break; case 'h': default: @@ -134,5 +134,5 @@ int main(int argc, char **argv) snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - return test(filename, num_progs); + return test(filename, nr_tests); } From de4825a444560f8cb78b03dda3ba873fab88bc4f Mon Sep 17 00:00:00 2001 From: Jinghao Jia Date: Sun, 17 Sep 2023 16:42:20 -0500 Subject: [PATCH 0159/1397] samples/bpf: syscall_tp_user: Fix array out-of-bound access [ Upstream commit 9220c3ef6fefbf18f24aeedb1142a642b3de0596 ] Commit 06744f24696e ("samples/bpf: Add openat2() enter/exit tracepoint to syscall_tp sample") added two more eBPF programs to support the openat2() syscall. However, it did not increase the size of the array that holds the corresponding bpf_links. This leads to an out-of-bound access on that array in the bpf_object__for_each_program loop and could corrupt other variables on the stack. On our testing QEMU, it corrupts the map1_fds array and causes the sample to fail: # ./syscall_tp prog #0: map ids 4 5 verify map:4 val: 5 map_lookup failed: Bad file descriptor Dynamically allocate the array based on the number of programs reported by libbpf to prevent similar inconsistencies in the future Fixes: 06744f24696e ("samples/bpf: Add openat2() enter/exit tracepoint to syscall_tp sample") Signed-off-by: Jinghao Jia Signed-off-by: Ruowen Qin Signed-off-by: Jinghao Jia Link: https://lore.kernel.org/r/20230917214220.637721-4-jinghao7@illinois.edu Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- samples/bpf/syscall_tp_user.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/samples/bpf/syscall_tp_user.c b/samples/bpf/syscall_tp_user.c index 18c94c7e8a40..7a09ac74fac0 100644 --- a/samples/bpf/syscall_tp_user.c +++ b/samples/bpf/syscall_tp_user.c @@ -48,7 +48,7 @@ static void verify_map(int map_id) static int test(char *filename, int nr_tests) { int map0_fds[nr_tests], map1_fds[nr_tests], fd, i, j = 0; - struct bpf_link *links[nr_tests * 4]; + struct bpf_link **links = NULL; struct bpf_object *objs[nr_tests]; struct bpf_program *prog; @@ -60,6 +60,19 @@ static int test(char *filename, int nr_tests) goto cleanup; } + /* One-time initialization */ + if (!links) { + int nr_progs = 0; + + bpf_object__for_each_program(prog, objs[i]) + nr_progs += 1; + + links = calloc(nr_progs * nr_tests, sizeof(struct bpf_link *)); + + if (!links) + goto cleanup; + } + /* load BPF program */ if (bpf_object__load(objs[i])) { fprintf(stderr, "loading BPF object file failed\n"); @@ -107,8 +120,12 @@ static int test(char *filename, int nr_tests) } cleanup: - for (j--; j >= 0; j--) - bpf_link__destroy(links[j]); + if (links) { + for (j--; j >= 0; j--) + bpf_link__destroy(links[j]); + + free(links); + } for (i--; i >= 0; i--) bpf_object__close(objs[i]); From 8159c843e4d2d3bd0d6d058a603610c40ce7c5df Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 5 Oct 2023 11:32:46 +0200 Subject: [PATCH 0160/1397] dt-bindings: serial: fix regex pattern for matching serial node children [ Upstream commit 42851dfd4dbe38e34724a00063a9fad5cfc48dcd ] The regular expression pattern for matching serial node children should accept only nodes starting and ending with the set of words: bluetooth, gnss, gps or mcu. Add missing brackets to enforce such matching. Fixes: 0c559bc8abfb ("dt-bindings: serial: restrict possible child node names") Reported-by: Andreas Kemnade Closes: https://lore.kernel.org/all/20231004170021.36b32465@aktux/ Signed-off-by: Krzysztof Kozlowski Acked-by: Rob Herring Link: https://lore.kernel.org/r/20231005093247.128166-1-krzysztof.kozlowski@linaro.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- Documentation/devicetree/bindings/serial/serial.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/serial/serial.yaml b/Documentation/devicetree/bindings/serial/serial.yaml index ea277560a596..5727bd549dec 100644 --- a/Documentation/devicetree/bindings/serial/serial.yaml +++ b/Documentation/devicetree/bindings/serial/serial.yaml @@ -96,7 +96,7 @@ then: rts-gpios: false patternProperties: - "^bluetooth|gnss|gps|mcu$": + "^(bluetooth|gnss|gps|mcu)$": if: type: object then: From d4f51690e6554c623b721552af09b14f4fa33b9f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 17 Sep 2023 09:06:05 -0400 Subject: [PATCH 0161/1397] SUNRPC: ECONNRESET might require a rebind [ Upstream commit 4b09ca1508a60be30b2e3940264e93d7aeb5c97e ] If connect() is returning ECONNRESET, it usually means that nothing is listening on that port. If so, a rebind might be required in order to obtain the new port on which the RPC service is listening. Fixes: fd01b2597941 ("SUNRPC: ECONNREFUSED should cause a rebind.") Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- net/sunrpc/clnt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 9c210273d06b..f6074a4b9eab 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2171,6 +2171,7 @@ call_connect_status(struct rpc_task *task) task->tk_status = 0; switch (status) { case -ECONNREFUSED: + case -ECONNRESET: /* A positive refusal suggests a rebind is needed. */ if (RPC_IS_SOFTCONN(task)) break; @@ -2179,7 +2180,6 @@ call_connect_status(struct rpc_task *task) goto out_retry; } fallthrough; - case -ECONNRESET: case -ECONNABORTED: case -ENETDOWN: case -ENETUNREACH: From 0338bb4363a640bf2797f140f6e9c97dda2f7c59 Mon Sep 17 00:00:00 2001 From: Yi Yang Date: Thu, 19 Oct 2023 06:55:37 +0000 Subject: [PATCH 0162/1397] mtd: rawnand: intel: check return value of devm_kasprintf() [ Upstream commit 74ac5b5e2375f1e8ef797ac7770887e9969f2516 ] devm_kasprintf() returns a pointer to dynamically allocated memory which can be NULL upon failure. Ensure the allocation was successful by checking the pointer validity. Fixes: 0b1039f016e8 ("mtd: rawnand: Add NAND controller support on Intel LGM SoC") Signed-off-by: Yi Yang Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20231019065537.318391-1-yiyang13@huawei.com Signed-off-by: Sasha Levin --- drivers/mtd/nand/raw/intel-nand-controller.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/mtd/nand/raw/intel-nand-controller.c b/drivers/mtd/nand/raw/intel-nand-controller.c index cb5d88f42297..f0ad2308f6d5 100644 --- a/drivers/mtd/nand/raw/intel-nand-controller.c +++ b/drivers/mtd/nand/raw/intel-nand-controller.c @@ -619,6 +619,11 @@ static int ebu_nand_probe(struct platform_device *pdev) ebu_host->cs_num = cs; resname = devm_kasprintf(dev, GFP_KERNEL, "nand_cs%d", cs); + if (!resname) { + ret = -ENOMEM; + goto err_of_node_put; + } + ebu_host->cs[cs].chipaddr = devm_platform_ioremap_resource_byname(pdev, resname); if (IS_ERR(ebu_host->cs[cs].chipaddr)) { @@ -649,6 +654,11 @@ static int ebu_nand_probe(struct platform_device *pdev) } resname = devm_kasprintf(dev, GFP_KERNEL, "addr_sel%d", cs); + if (!resname) { + ret = -ENOMEM; + goto err_cleanup_dma; + } + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, resname); if (!res) { ret = -EINVAL; From 4cdc83be3ab8513271fe2b62c448da458c4a222f Mon Sep 17 00:00:00 2001 From: Yi Yang Date: Thu, 19 Oct 2023 06:55:48 +0000 Subject: [PATCH 0163/1397] mtd: rawnand: meson: check return value of devm_kasprintf() [ Upstream commit 5a985960a4dd041c21dbe9956958c1633d2da706 ] devm_kasprintf() returns a pointer to dynamically allocated memory which can be NULL upon failure. Ensure the allocation was successful by checking the pointer validity. Fixes: 1e4d3ba66888 ("mtd: rawnand: meson: fix the clock") Signed-off-by: Yi Yang Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20231019065548.318443-1-yiyang13@huawei.com Signed-off-by: Sasha Levin --- drivers/mtd/nand/raw/meson_nand.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mtd/nand/raw/meson_nand.c b/drivers/mtd/nand/raw/meson_nand.c index 25e3c1cb605e..a506e658d462 100644 --- a/drivers/mtd/nand/raw/meson_nand.c +++ b/drivers/mtd/nand/raw/meson_nand.c @@ -1134,6 +1134,9 @@ static int meson_nfc_clk_init(struct meson_nfc *nfc) init.name = devm_kasprintf(nfc->dev, GFP_KERNEL, "%s#div", dev_name(nfc->dev)); + if (!init.name) + return -ENOMEM; + init.ops = &clk_divider_ops; nfc_divider_parent_data[0].fw_name = "device"; init.parent_data = nfc_divider_parent_data; From 9b0cebc70d47ff3f13c2ec4948f4a198aa27ef0f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 16 Oct 2023 22:10:04 +0200 Subject: [PATCH 0164/1397] drm/i915/mtl: avoid stringop-overflow warning [ Upstream commit 390001d648ffa027b750b7dceb5d43f4c1d1a39e ] The newly added memset() causes a warning for some reason I could not figure out: In file included from arch/x86/include/asm/string.h:3, from drivers/gpu/drm/i915/gt/intel_rc6.c:6: In function 'rc6_res_reg_init', inlined from 'intel_rc6_init' at drivers/gpu/drm/i915/gt/intel_rc6.c:610:2: arch/x86/include/asm/string_32.h:195:29: error: '__builtin_memset' writing 16 bytes into a region of size 0 overflows the destination [-Werror=stringop-overflow=] 195 | #define memset(s, c, count) __builtin_memset(s, c, count) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/i915/gt/intel_rc6.c:584:9: note: in expansion of macro 'memset' 584 | memset(rc6->res_reg, INVALID_MMIO_REG.reg, sizeof(rc6->res_reg)); | ^~~~~~ In function 'intel_rc6_init': Change it to an normal initializer and an added memcpy() that does not have this problem. Fixes: 4bb9ca7ee074 ("drm/i915/mtl: C6 residency and C state type for MTL SAMedia") Signed-off-by: Arnd Bergmann Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20231016201012.1022812-1-arnd@kernel.org (cherry picked from commit 0520b30b219053cd789909bca45b3c486ef3ee09) Signed-off-by: Jani Nikula Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/gt/intel_rc6.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 58bb1c55294c..ccdc1afbf11b 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -584,19 +584,23 @@ static void __intel_rc6_disable(struct intel_rc6 *rc6) static void rc6_res_reg_init(struct intel_rc6 *rc6) { - memset(rc6->res_reg, INVALID_MMIO_REG.reg, sizeof(rc6->res_reg)); + i915_reg_t res_reg[INTEL_RC6_RES_MAX] = { + [0 ... INTEL_RC6_RES_MAX - 1] = INVALID_MMIO_REG, + }; switch (rc6_to_gt(rc6)->type) { case GT_MEDIA: - rc6->res_reg[INTEL_RC6_RES_RC6] = MTL_MEDIA_MC6; + res_reg[INTEL_RC6_RES_RC6] = MTL_MEDIA_MC6; break; default: - rc6->res_reg[INTEL_RC6_RES_RC6_LOCKED] = GEN6_GT_GFX_RC6_LOCKED; - rc6->res_reg[INTEL_RC6_RES_RC6] = GEN6_GT_GFX_RC6; - rc6->res_reg[INTEL_RC6_RES_RC6p] = GEN6_GT_GFX_RC6p; - rc6->res_reg[INTEL_RC6_RES_RC6pp] = GEN6_GT_GFX_RC6pp; + res_reg[INTEL_RC6_RES_RC6_LOCKED] = GEN6_GT_GFX_RC6_LOCKED; + res_reg[INTEL_RC6_RES_RC6] = GEN6_GT_GFX_RC6; + res_reg[INTEL_RC6_RES_RC6p] = GEN6_GT_GFX_RC6p; + res_reg[INTEL_RC6_RES_RC6pp] = GEN6_GT_GFX_RC6pp; break; } + + memcpy(rc6->res_reg, res_reg, sizeof(res_reg)); } void intel_rc6_init(struct intel_rc6 *rc6) From ec809219077d5e7df5c152c7a73b444aef474a56 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 15 Sep 2023 15:21:16 -0400 Subject: [PATCH 0165/1397] NFSv4.1: fix handling NFS4ERR_DELAY when testing for session trunking [ Upstream commit 6bd1a77dc72dea0b0d8b6014f231143984d18f6d ] Currently when client sends an EXCHANGE_ID for a possible trunked connection, for any error that happened, the trunk will be thrown out. However, an NFS4ERR_DELAY is a transient error that should be retried instead. Fixes: e818bd085baf ("NFSv4.1 remove xprt from xprt_switch if session trunking test fails") Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- fs/nfs/nfs4proc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5ee283eb9660..c3ff98eb6177 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8934,6 +8934,7 @@ void nfs4_test_session_trunk(struct rpc_clnt *clnt, struct rpc_xprt *xprt, sp4_how = (adata->clp->cl_sp4_flags == 0 ? SP4_NONE : SP4_MACH_CRED); +try_again: /* Test connection for session trunking. Async exchange_id call */ task = nfs4_run_exchange_id(adata->clp, adata->cred, sp4_how, xprt); if (IS_ERR(task)) @@ -8946,11 +8947,15 @@ void nfs4_test_session_trunk(struct rpc_clnt *clnt, struct rpc_xprt *xprt, if (status == 0) rpc_clnt_xprt_switch_add_xprt(clnt, xprt); - else if (rpc_clnt_xprt_switch_has_addr(clnt, + else if (status != -NFS4ERR_DELAY && rpc_clnt_xprt_switch_has_addr(clnt, (struct sockaddr *)&xprt->addr)) rpc_clnt_xprt_switch_remove_xprt(clnt, xprt); rpc_put_task(task); + if (status == -NFS4ERR_DELAY) { + ssleep(1); + goto try_again; + } } EXPORT_SYMBOL_GPL(nfs4_test_session_trunk); From 4069da49f8ae9837ca0f47aff4b6a0d89f4d22e1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 11 Oct 2023 11:00:22 +0300 Subject: [PATCH 0166/1397] SUNRPC: Add an IS_ERR() check back to where it was [ Upstream commit 4f3ed837186fc0d2722ba8d2457a594322e9c2ef ] This IS_ERR() check was deleted during in a cleanup because, at the time, the rpcb_call_async() function could not return an error pointer. That changed in commit 25cf32ad5dba ("SUNRPC: Handle allocation failure in rpc_new_task()") and now it can return an error pointer. Put the check back. A related revert was done in commit 13bd90141804 ("Revert "SUNRPC: Remove unreachable error condition""). Fixes: 037e910b52b0 ("SUNRPC: Remove unreachable error condition in rpcb_getport_async()") Signed-off-by: Dan Carpenter Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- net/sunrpc/rpcb_clnt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 5988a5c5ff3f..102c3818bc54 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -769,6 +769,10 @@ void rpcb_getport_async(struct rpc_task *task) child = rpcb_call_async(rpcb_clnt, map, proc); rpc_release_client(rpcb_clnt); + if (IS_ERR(child)) { + /* rpcb_map_release() has freed the arguments */ + return; + } xprt->stat.bind_count++; rpc_put_task(child); From dfdd2663247e330210e58c32dd2fced421cc8bcf Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 13 Oct 2023 11:04:10 -0400 Subject: [PATCH 0167/1397] NFSv4.1: fix SP4_MACH_CRED protection for pnfs IO [ Upstream commit 5cc7688bae7f0757c39c1d3dfdd827b724061067 ] If the client is doing pnfs IO and Kerberos is configured and EXCHANGEID successfully negotiated SP4_MACH_CRED and WRITE/COMMIT are on the list of state protected operations, then we need to make sure to choose the DS's rpc_client structure instead of the MDS's one. Fixes: fb91fb0ee7b2 ("NFS: Move call to nfs4_state_protect_write() to nfs4_write_setup()") Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- fs/nfs/nfs4proc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c3ff98eb6177..0ff913b4e9e0 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5622,7 +5622,7 @@ static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr, msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE]; nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0, 0); - nfs4_state_protect_write(server->nfs_client, clnt, msg, hdr); + nfs4_state_protect_write(hdr->ds_clp ? hdr->ds_clp : server->nfs_client, clnt, msg, hdr); } static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) @@ -5663,7 +5663,8 @@ static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_mess data->res.server = server; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT]; nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1, 0); - nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_COMMIT, clnt, msg); + nfs4_state_protect(data->ds_clp ? data->ds_clp : server->nfs_client, + NFS_SP4_MACH_CRED_COMMIT, clnt, msg); } static int _nfs4_proc_commit(struct file *dst, struct nfs_commitargs *args, From cc2e7ebbeb1d0601f7f3c8d93b78fcc03a95e44a Mon Sep 17 00:00:00 2001 From: felix Date: Mon, 23 Oct 2023 09:40:19 +0800 Subject: [PATCH 0168/1397] SUNRPC: Fix RPC client cleaned up the freed pipefs dentries [ Upstream commit bfca5fb4e97c46503ddfc582335917b0cc228264 ] RPC client pipefs dentries cleanup is in separated rpc_remove_pipedir() workqueue,which takes care about pipefs superblock locking. In some special scenarios, when kernel frees the pipefs sb of the current client and immediately alloctes a new pipefs sb, rpc_remove_pipedir function would misjudge the existence of pipefs sb which is not the one it used to hold. As a result, the rpc_remove_pipedir would clean the released freed pipefs dentries. To fix this issue, rpc_remove_pipedir should check whether the current pipefs sb is consistent with the original pipefs sb. This error can be catched by KASAN: ========================================================= [ 250.497700] BUG: KASAN: slab-use-after-free in dget_parent+0x195/0x200 [ 250.498315] Read of size 4 at addr ffff88800a2ab804 by task kworker/0:18/106503 [ 250.500549] Workqueue: events rpc_free_client_work [ 250.501001] Call Trace: [ 250.502880] kasan_report+0xb6/0xf0 [ 250.503209] ? dget_parent+0x195/0x200 [ 250.503561] dget_parent+0x195/0x200 [ 250.503897] ? __pfx_rpc_clntdir_depopulate+0x10/0x10 [ 250.504384] rpc_rmdir_depopulate+0x1b/0x90 [ 250.504781] rpc_remove_client_dir+0xf5/0x150 [ 250.505195] rpc_free_client_work+0xe4/0x230 [ 250.505598] process_one_work+0x8ee/0x13b0 ... [ 22.039056] Allocated by task 244: [ 22.039390] kasan_save_stack+0x22/0x50 [ 22.039758] kasan_set_track+0x25/0x30 [ 22.040109] __kasan_slab_alloc+0x59/0x70 [ 22.040487] kmem_cache_alloc_lru+0xf0/0x240 [ 22.040889] __d_alloc+0x31/0x8e0 [ 22.041207] d_alloc+0x44/0x1f0 [ 22.041514] __rpc_lookup_create_exclusive+0x11c/0x140 [ 22.041987] rpc_mkdir_populate.constprop.0+0x5f/0x110 [ 22.042459] rpc_create_client_dir+0x34/0x150 [ 22.042874] rpc_setup_pipedir_sb+0x102/0x1c0 [ 22.043284] rpc_client_register+0x136/0x4e0 [ 22.043689] rpc_new_client+0x911/0x1020 [ 22.044057] rpc_create_xprt+0xcb/0x370 [ 22.044417] rpc_create+0x36b/0x6c0 ... [ 22.049524] Freed by task 0: [ 22.049803] kasan_save_stack+0x22/0x50 [ 22.050165] kasan_set_track+0x25/0x30 [ 22.050520] kasan_save_free_info+0x2b/0x50 [ 22.050921] __kasan_slab_free+0x10e/0x1a0 [ 22.051306] kmem_cache_free+0xa5/0x390 [ 22.051667] rcu_core+0x62c/0x1930 [ 22.051995] __do_softirq+0x165/0x52a [ 22.052347] [ 22.052503] Last potentially related work creation: [ 22.052952] kasan_save_stack+0x22/0x50 [ 22.053313] __kasan_record_aux_stack+0x8e/0xa0 [ 22.053739] __call_rcu_common.constprop.0+0x6b/0x8b0 [ 22.054209] dentry_free+0xb2/0x140 [ 22.054540] __dentry_kill+0x3be/0x540 [ 22.054900] shrink_dentry_list+0x199/0x510 [ 22.055293] shrink_dcache_parent+0x190/0x240 [ 22.055703] do_one_tree+0x11/0x40 [ 22.056028] shrink_dcache_for_umount+0x61/0x140 [ 22.056461] generic_shutdown_super+0x70/0x590 [ 22.056879] kill_anon_super+0x3a/0x60 [ 22.057234] rpc_kill_sb+0x121/0x200 Fixes: 0157d021d23a ("SUNRPC: handle RPC client pipefs dentries by network namespace aware routines") Signed-off-by: felix Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- include/linux/sunrpc/clnt.h | 1 + net/sunrpc/clnt.c | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index af7358277f1c..e9d4377d03c6 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -92,6 +92,7 @@ struct rpc_clnt { }; const struct cred *cl_cred; unsigned int cl_max_connect; /* max number of transports not to the same IP */ + struct super_block *pipefs_sb; }; /* diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index f6074a4b9eab..339dfc5b9224 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -111,7 +111,8 @@ static void rpc_clnt_remove_pipedir(struct rpc_clnt *clnt) pipefs_sb = rpc_get_sb_net(net); if (pipefs_sb) { - __rpc_clnt_remove_pipedir(clnt); + if (pipefs_sb == clnt->pipefs_sb) + __rpc_clnt_remove_pipedir(clnt); rpc_put_sb_net(net); } } @@ -151,6 +152,8 @@ rpc_setup_pipedir(struct super_block *pipefs_sb, struct rpc_clnt *clnt) { struct dentry *dentry; + clnt->pipefs_sb = pipefs_sb; + if (clnt->cl_program->pipe_dir_name != NULL) { dentry = rpc_setup_pipedir_sb(pipefs_sb, clnt); if (IS_ERR(dentry)) From 81e25896ccf56f89be0a0e83a5f5def6873a014b Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 10 Oct 2023 18:51:02 +0200 Subject: [PATCH 0169/1397] RISC-V: hwprobe: Fix vDSO SIGSEGV [ Upstream commit e1c05b3bf80f829ced464bdca90f1dfa96e8d251 ] A hwprobe pair key is signed, but the hwprobe vDSO function was only checking that the upper bound was valid. In order to help avoid this type of problem in the future, and in anticipation of this check becoming more complicated with sparse keys, introduce and use a "key is valid" predicate function for the check. Fixes: aa5af0aa90ba ("RISC-V: Add hwprobe vDSO function and data") Signed-off-by: Andrew Jones Reviewed-by: Evan Green Link: https://lore.kernel.org/r/20231010165101.14942-2-ajones@ventanamicro.com Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- arch/riscv/include/asm/hwprobe.h | 5 +++++ arch/riscv/kernel/vdso/hwprobe.c | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h index 78936f4ff513..7cad513538d8 100644 --- a/arch/riscv/include/asm/hwprobe.h +++ b/arch/riscv/include/asm/hwprobe.h @@ -10,4 +10,9 @@ #define RISCV_HWPROBE_MAX_KEY 5 +static inline bool riscv_hwprobe_key_is_valid(__s64 key) +{ + return key >= 0 && key <= RISCV_HWPROBE_MAX_KEY; +} + #endif diff --git a/arch/riscv/kernel/vdso/hwprobe.c b/arch/riscv/kernel/vdso/hwprobe.c index d40bec6ac078..cadf725ef798 100644 --- a/arch/riscv/kernel/vdso/hwprobe.c +++ b/arch/riscv/kernel/vdso/hwprobe.c @@ -37,7 +37,7 @@ int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count, /* This is something we can handle, fill out the pairs. */ while (p < end) { - if (p->key <= RISCV_HWPROBE_MAX_KEY) { + if (riscv_hwprobe_key_is_valid(p->key)) { p->value = avd->all_cpu_hwprobe_values[p->key]; } else { From f9e1d36a66d539041f93065ae6851b9801305872 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Tue, 29 Aug 2023 10:36:15 +0200 Subject: [PATCH 0170/1397] riscv: provide riscv-specific is_trap_insn() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b701f9e726f0a30a94ea6af596b74c1f07b95b6b ] uprobes expects is_trap_insn() to return true for any trap instructions, not just the one used for installing uprobe. The current default implementation only returns true for 16-bit c.ebreak if C extension is enabled. This can confuse uprobes if a 32-bit ebreak generates a trap exception from userspace: uprobes asks is_trap_insn() who says there is no trap, so uprobes assume a probe was there before but has been removed, and return to the trap instruction. This causes an infinite loop of entering and exiting trap handler. Instead of using the default implementation, implement this function speficially for riscv with checks for both ebreak and c.ebreak. Fixes: 74784081aac8 ("riscv: Add uprobes supported") Signed-off-by: Nam Cao Tested-by: Björn Töpel Reviewed-by: Guo Ren Link: https://lore.kernel.org/r/20230829083614.117748-1-namcaov@gmail.com Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- arch/riscv/kernel/probes/uprobes.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/riscv/kernel/probes/uprobes.c b/arch/riscv/kernel/probes/uprobes.c index 194f166b2cc4..4b3dc8beaf77 100644 --- a/arch/riscv/kernel/probes/uprobes.c +++ b/arch/riscv/kernel/probes/uprobes.c @@ -3,6 +3,7 @@ #include #include #include +#include #include "decode-insn.h" @@ -17,6 +18,11 @@ bool is_swbp_insn(uprobe_opcode_t *insn) #endif } +bool is_trap_insn(uprobe_opcode_t *insn) +{ + return riscv_insn_is_ebreak(*insn) || riscv_insn_is_c_ebreak(*insn); +} + unsigned long uprobe_get_swbp_addr(struct pt_regs *regs) { return instruction_pointer(regs); From c921e1ee70a5f6837df07c830b8e9285916d3a9a Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 30 Oct 2023 22:06:05 +0100 Subject: [PATCH 0171/1397] gfs2: Silence "suspicious RCU usage in gfs2_permission" warning [ Upstream commit 074d7306a4fe22fcac0b53f699f92757ab1cee99 ] Commit 0abd1557e21c added rcu_dereference() for dereferencing ip->i_gl in gfs2_permission. This now causes lockdep to complain when gfs2_permission is called in non-RCU context: WARNING: suspicious RCU usage in gfs2_permission Switch to rcu_dereference_check() and check for the MAY_NOT_BLOCK flag to shut up lockdep when we know that dereferencing ip->i_gl is safe. Fixes: 0abd1557e21c ("gfs2: fix an oops in gfs2_permission") Reported-by: syzbot+3e5130844b0c0e2b4948@syzkaller.appspotmail.com Signed-off-by: Andreas Gruenbacher Signed-off-by: Sasha Levin --- fs/gfs2/inode.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index eb4bbe1728c0..4e63fbb63151 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1866,6 +1866,7 @@ static const char *gfs2_get_link(struct dentry *dentry, int gfs2_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { + int may_not_block = mask & MAY_NOT_BLOCK; struct gfs2_inode *ip; struct gfs2_holder i_gh; struct gfs2_glock *gl; @@ -1873,14 +1874,14 @@ int gfs2_permission(struct mnt_idmap *idmap, struct inode *inode, gfs2_holder_mark_uninitialized(&i_gh); ip = GFS2_I(inode); - gl = rcu_dereference(ip->i_gl); + gl = rcu_dereference_check(ip->i_gl, !may_not_block); if (unlikely(!gl)) { /* inode is getting torn down, must be RCU mode */ - WARN_ON_ONCE(!(mask & MAY_NOT_BLOCK)); + WARN_ON_ONCE(!may_not_block); return -ECHILD; } if (gfs2_glock_is_locked_by_me(gl) == NULL) { - if (mask & MAY_NOT_BLOCK) + if (may_not_block) return -ECHILD; error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); if (error) From 8ebe2d452fd0c7e6ac2583e3f41022f60e0f1fd1 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Thu, 26 Oct 2023 14:56:36 +0200 Subject: [PATCH 0172/1397] drm/i915/tc: Fix -Wformat-truncation in intel_tc_port_init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9506fba463fcbdf8c8b7af3ec9ee34360df843fe ] Fix below compiler warning: intel_tc.c:1879:11: error: ‘%d’ directive output may be truncated writing between 1 and 11 bytes into a region of size 3 [-Werror=format-truncation=] "%c/TC#%d", port_name(port), tc_port + 1); ^~ intel_tc.c:1878:2: note: ‘snprintf’ output between 7 and 17 bytes into a destination of size 8 snprintf(tc->port_name, sizeof(tc->port_name), ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "%c/TC#%d", port_name(port), tc_port + 1); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ v2: use kasprintf(Imre) v3: use const for port_name, and fix tc mem leak(Imre) Fixes: 3eafcddf766b ("drm/i915/tc: Move TC port fields to a new intel_tc_port struct") Cc: Mika Kahola Cc: Imre Deak Cc: Jani Nikula Signed-off-by: Nirmoy Das Reviewed-by: Andrzej Hajda Reviewed-by: Imre Deak Reviewed-by: Mika Kahola Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20231026125636.5080-1-nirmoy.das@intel.com (cherry picked from commit 70a3cbbe620ee66afb0c066624196077767e61b2) Signed-off-by: Jani Nikula Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/display/intel_tc.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index 3ebf41859043..cdf2455440be 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -58,7 +58,7 @@ struct intel_tc_port { struct delayed_work link_reset_work; int link_refcount; bool legacy_port:1; - char port_name[8]; + const char *port_name; enum tc_port_mode mode; enum tc_port_mode init_mode; enum phy_fia phy_fia; @@ -1841,8 +1841,12 @@ int intel_tc_port_init(struct intel_digital_port *dig_port, bool is_legacy) else tc->phy_ops = &icl_tc_phy_ops; - snprintf(tc->port_name, sizeof(tc->port_name), - "%c/TC#%d", port_name(port), tc_port + 1); + tc->port_name = kasprintf(GFP_KERNEL, "%c/TC#%d", port_name(port), + tc_port + 1); + if (!tc->port_name) { + kfree(tc); + return -ENOMEM; + } mutex_init(&tc->lock); /* TODO: Combine the two works */ @@ -1863,6 +1867,7 @@ void intel_tc_port_cleanup(struct intel_digital_port *dig_port) { intel_tc_port_suspend(dig_port); + kfree(dig_port->tc->port_name); kfree(dig_port->tc); dig_port->tc = NULL; } From b4ad5617278afd485bcfe3f402539b19d8b42000 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 28 Oct 2023 17:51:01 +0200 Subject: [PATCH 0173/1397] riscv: split cache ops out of dma-noncoherent.c [ Upstream commit 946bb33d330251966223f770f64885c79448b1a1 ] The cache ops are also used by the pmem code which is unconditionally built into the kernel. Move them into a separate file that is built based on the correct config option. Fixes: fd962781270e ("riscv: RISCV_NONSTANDARD_CACHE_OPS shouldn't depend on RISCV_DMA_NONCOHERENT") Reported-by: kernel test robot Signed-off-by: Christoph Hellwig Reviewed-by: Conor Dooley Tested-by: Conor Dooley Reviewed-by: Lad Prabhakar Tested-by: Lad Prabhakar # Link: https://lore.kernel.org/r/20231028155101.1039049-1-hch@lst.de Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- arch/riscv/mm/Makefile | 1 + arch/riscv/mm/cache-ops.c | 17 +++++++++++++++++ arch/riscv/mm/dma-noncoherent.c | 15 --------------- 3 files changed, 18 insertions(+), 15 deletions(-) create mode 100644 arch/riscv/mm/cache-ops.c diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index 9c454f90fd3d..3a4dfc8babcf 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -36,3 +36,4 @@ endif obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o obj-$(CONFIG_RISCV_DMA_NONCOHERENT) += dma-noncoherent.o +obj-$(CONFIG_RISCV_NONSTANDARD_CACHE_OPS) += cache-ops.o diff --git a/arch/riscv/mm/cache-ops.c b/arch/riscv/mm/cache-ops.c new file mode 100644 index 000000000000..a993ad11d0ec --- /dev/null +++ b/arch/riscv/mm/cache-ops.c @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021 Western Digital Corporation or its affiliates. + */ + +#include + +struct riscv_nonstd_cache_ops noncoherent_cache_ops __ro_after_init; + +void +riscv_noncoherent_register_cache_ops(const struct riscv_nonstd_cache_ops *ops) +{ + if (!ops) + return; + noncoherent_cache_ops = *ops; +} +EXPORT_SYMBOL_GPL(riscv_noncoherent_register_cache_ops); diff --git a/arch/riscv/mm/dma-noncoherent.c b/arch/riscv/mm/dma-noncoherent.c index b76e7e192eb1..341bd6706b4c 100644 --- a/arch/riscv/mm/dma-noncoherent.c +++ b/arch/riscv/mm/dma-noncoherent.c @@ -15,12 +15,6 @@ static bool noncoherent_supported __ro_after_init; int dma_cache_alignment __ro_after_init = ARCH_DMA_MINALIGN; EXPORT_SYMBOL_GPL(dma_cache_alignment); -struct riscv_nonstd_cache_ops noncoherent_cache_ops __ro_after_init = { - .wback = NULL, - .inv = NULL, - .wback_inv = NULL, -}; - static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size) { void *vaddr = phys_to_virt(paddr); @@ -162,12 +156,3 @@ void __init riscv_set_dma_cache_alignment(void) if (!noncoherent_supported) dma_cache_alignment = 1; } - -void riscv_noncoherent_register_cache_ops(const struct riscv_nonstd_cache_ops *ops) -{ - if (!ops) - return; - - noncoherent_cache_ops = *ops; -} -EXPORT_SYMBOL_GPL(riscv_noncoherent_register_cache_ops); From 941fcbb0f06c2df81f65e1c79c44a2667247ae09 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 31 Oct 2023 15:43:39 +0100 Subject: [PATCH 0174/1397] vdpa_sim_blk: allocate the buffer zeroed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 0d82410252ea324f0064e75b9865bb74cccc1dda ] Deleting and recreating a device can lead to having the same content as the old device, so let's always allocate buffers completely zeroed out. Fixes: abebb16254b3 ("vdpa_sim_blk: support shared backend") Suggested-by: Qing Wang Signed-off-by: Stefano Garzarella Message-Id: <20231031144339.121453-1-sgarzare@redhat.com> Signed-off-by: Michael S. Tsirkin Acked-by: Eugenio Pérez Acked-by: Jason Wang Signed-off-by: Sasha Levin --- drivers/vdpa/vdpa_sim/vdpa_sim_blk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c index b3a3cb165795..b137f3679343 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c @@ -437,7 +437,7 @@ static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, if (blk->shared_backend) { blk->buffer = shared_buffer; } else { - blk->buffer = kvmalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT, + blk->buffer = kvzalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT, GFP_KERNEL); if (!blk->buffer) { ret = -ENOMEM; @@ -495,7 +495,7 @@ static int __init vdpasim_blk_init(void) goto parent_err; if (shared_backend) { - shared_buffer = kvmalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT, + shared_buffer = kvzalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT, GFP_KERNEL); if (!shared_buffer) { ret = -ENOMEM; From bf04132cd64ccde4e9e9765d489c83fe83c09b7f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 27 Oct 2023 15:12:54 +0300 Subject: [PATCH 0175/1397] vhost-vdpa: fix use after free in vhost_vdpa_probe() [ Upstream commit e07754e0a1ea2d63fb29574253d1fd7405607343 ] The put_device() calls vhost_vdpa_release_dev() which calls ida_simple_remove() and frees "v". So this call to ida_simple_remove() is a use after free and a double free. Fixes: ebe6a354fa7e ("vhost-vdpa: Call ida_simple_remove() when failed") Signed-off-by: Dan Carpenter Message-Id: Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Signed-off-by: Sasha Levin --- drivers/vhost/vdpa.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 78379ffd2336..fb590e346e43 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -1511,7 +1511,6 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa) err: put_device(&v->dev); - ida_simple_remove(&vhost_vdpa_ida, v->minor); return r; } From 2e8b4e0992e16d7967095773040cd9919171d8db Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 4 Nov 2023 13:43:37 -0700 Subject: [PATCH 0176/1397] gcc-plugins: randstruct: Only warn about true flexible arrays [ Upstream commit 1ee60356c2dca938362528404af95b8ef3e49b6a ] The randstruct GCC plugin tried to discover "fake" flexible arrays to issue warnings about them in randomized structs. In the future LSM overhead reduction series, it would be legal to have a randomized struct with a 1-element array, and this should _not_ be treated as a flexible array, especially since commit df8fc4e934c1 ("kbuild: Enable -fstrict-flex-arrays=3"). Disable the 0-sized and 1-element array discovery logic in the plugin, but keep the "true" flexible array check. Cc: KP Singh Cc: linux-hardening@vger.kernel.org Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202311021532.iBwuZUZ0-lkp@intel.com/ Fixes: df8fc4e934c1 ("kbuild: Enable -fstrict-flex-arrays=3") Reviewed-by: Bill Wendling Acked-by: "Gustavo A. R. Silva" Link: https://lore.kernel.org/r/20231104204334.work.160-kees@kernel.org Signed-off-by: Kees Cook Signed-off-by: Sasha Levin --- scripts/gcc-plugins/randomize_layout_plugin.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/scripts/gcc-plugins/randomize_layout_plugin.c b/scripts/gcc-plugins/randomize_layout_plugin.c index 951b74ba1b24..5e5744b65f8a 100644 --- a/scripts/gcc-plugins/randomize_layout_plugin.c +++ b/scripts/gcc-plugins/randomize_layout_plugin.c @@ -273,8 +273,6 @@ static bool is_flexible_array(const_tree field) { const_tree fieldtype; const_tree typesize; - const_tree elemtype; - const_tree elemsize; fieldtype = TREE_TYPE(field); typesize = TYPE_SIZE(fieldtype); @@ -282,20 +280,12 @@ static bool is_flexible_array(const_tree field) if (TREE_CODE(fieldtype) != ARRAY_TYPE) return false; - elemtype = TREE_TYPE(fieldtype); - elemsize = TYPE_SIZE(elemtype); - /* size of type is represented in bits */ if (typesize == NULL_TREE && TYPE_DOMAIN(fieldtype) != NULL_TREE && TYPE_MAX_VALUE(TYPE_DOMAIN(fieldtype)) == NULL_TREE) return true; - if (typesize != NULL_TREE && - (TREE_CONSTANT(typesize) && (!tree_to_uhwi(typesize) || - tree_to_uhwi(typesize) == tree_to_uhwi(elemsize)))) - return true; - return false; } From 3c49b49d794b64000ef5f8d7364af19cb2876adf Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 9 Nov 2023 16:26:36 -0800 Subject: [PATCH 0177/1397] bpf: handle ldimm64 properly in check_cfg() [ Upstream commit 3feb263bb516ee7e1da0acd22b15afbb9a7daa19 ] ldimm64 instructions are 16-byte long, and so have to be handled appropriately in check_cfg(), just like the rest of BPF verifier does. This has implications in three places: - when determining next instruction for non-jump instructions; - when determining next instruction for callback address ldimm64 instructions (in visit_func_call_insn()); - when checking for unreachable instructions, where second half of ldimm64 is expected to be unreachable; We take this also as an opportunity to report jump into the middle of ldimm64. And adjust few test_verifier tests accordingly. Acked-by: Eduard Zingerman Reported-by: Hao Sun Fixes: 475fb78fbf48 ("bpf: verifier (add branch/goto checks)") Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231110002638.4168352-2-andrii@kernel.org Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- include/linux/bpf.h | 8 ++++-- kernel/bpf/verifier.c | 27 ++++++++++++++----- .../testing/selftests/bpf/verifier/ld_imm64.c | 8 +++--- 3 files changed, 30 insertions(+), 13 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 76055186d624..392f581af2ce 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -903,10 +903,14 @@ bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size) aux->ctx_field_size = size; } +static bool bpf_is_ldimm64(const struct bpf_insn *insn) +{ + return insn->code == (BPF_LD | BPF_IMM | BPF_DW); +} + static inline bool bpf_pseudo_func(const struct bpf_insn *insn) { - return insn->code == (BPF_LD | BPF_IMM | BPF_DW) && - insn->src_reg == BPF_PSEUDO_FUNC; + return bpf_is_ldimm64(insn) && insn->src_reg == BPF_PSEUDO_FUNC; } struct bpf_prog_ops { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 6542685d6772..0b3a2534196e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -14787,15 +14787,16 @@ static int visit_func_call_insn(int t, struct bpf_insn *insns, struct bpf_verifier_env *env, bool visit_callee) { - int ret; + int ret, insn_sz; - ret = push_insn(t, t + 1, FALLTHROUGH, env, false); + insn_sz = bpf_is_ldimm64(&insns[t]) ? 2 : 1; + ret = push_insn(t, t + insn_sz, FALLTHROUGH, env, false); if (ret) return ret; - mark_prune_point(env, t + 1); + mark_prune_point(env, t + insn_sz); /* when we exit from subprog, we need to record non-linear history */ - mark_jmp_point(env, t + 1); + mark_jmp_point(env, t + insn_sz); if (visit_callee) { mark_prune_point(env, t); @@ -14817,15 +14818,17 @@ static int visit_func_call_insn(int t, struct bpf_insn *insns, static int visit_insn(int t, struct bpf_verifier_env *env) { struct bpf_insn *insns = env->prog->insnsi, *insn = &insns[t]; - int ret, off; + int ret, off, insn_sz; if (bpf_pseudo_func(insn)) return visit_func_call_insn(t, insns, env, true); /* All non-branch instructions have a single fall-through edge. */ if (BPF_CLASS(insn->code) != BPF_JMP && - BPF_CLASS(insn->code) != BPF_JMP32) - return push_insn(t, t + 1, FALLTHROUGH, env, false); + BPF_CLASS(insn->code) != BPF_JMP32) { + insn_sz = bpf_is_ldimm64(insn) ? 2 : 1; + return push_insn(t, t + insn_sz, FALLTHROUGH, env, false); + } switch (BPF_OP(insn->code)) { case BPF_EXIT: @@ -14944,11 +14947,21 @@ static int check_cfg(struct bpf_verifier_env *env) } for (i = 0; i < insn_cnt; i++) { + struct bpf_insn *insn = &env->prog->insnsi[i]; + if (insn_state[i] != EXPLORED) { verbose(env, "unreachable insn %d\n", i); ret = -EINVAL; goto err_free; } + if (bpf_is_ldimm64(insn)) { + if (insn_state[i + 1] != 0) { + verbose(env, "jump into the middle of ldimm64 insn %d\n", i); + ret = -EINVAL; + goto err_free; + } + i++; /* skip second half of ldimm64 */ + } } ret = 0; /* cfg looks good */ diff --git a/tools/testing/selftests/bpf/verifier/ld_imm64.c b/tools/testing/selftests/bpf/verifier/ld_imm64.c index f9297900cea6..78f19c255f20 100644 --- a/tools/testing/selftests/bpf/verifier/ld_imm64.c +++ b/tools/testing/selftests/bpf/verifier/ld_imm64.c @@ -9,8 +9,8 @@ BPF_MOV64_IMM(BPF_REG_0, 2), BPF_EXIT_INSN(), }, - .errstr = "invalid BPF_LD_IMM insn", - .errstr_unpriv = "R1 pointer comparison", + .errstr = "jump into the middle of ldimm64 insn 1", + .errstr_unpriv = "jump into the middle of ldimm64 insn 1", .result = REJECT, }, { @@ -23,8 +23,8 @@ BPF_LD_IMM64(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - .errstr = "invalid BPF_LD_IMM insn", - .errstr_unpriv = "R1 pointer comparison", + .errstr = "jump into the middle of ldimm64 insn 1", + .errstr_unpriv = "jump into the middle of ldimm64 insn 1", .result = REJECT, }, { From 2595f4eb3461fc72be1df129e4f71addc92f785f Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 9 Nov 2023 16:26:37 -0800 Subject: [PATCH 0178/1397] bpf: fix precision backtracking instruction iteration [ Upstream commit 4bb7ea946a370707315ab774432963ce47291946 ] Fix an edge case in __mark_chain_precision() which prematurely stops backtracking instructions in a state if it happens that state's first and last instruction indexes are the same. This situations doesn't necessarily mean that there were no instructions simulated in a state, but rather that we starting from the instruction, jumped around a bit, and then ended up at the same instruction before checkpointing or marking precision. To distinguish between these two possible situations, we need to consult jump history. If it's empty or contain a single record "bridging" parent state and first instruction of processed state, then we indeed backtracked all instructions in this state. But if history is not empty, we are definitely not done yet. Move this logic inside get_prev_insn_idx() to contain it more nicely. Use -ENOENT return code to denote "we are out of instructions" situation. This bug was exposed by verifier_loop1.c's bounded_recursion subtest, once the next fix in this patch set is applied. Acked-by: Eduard Zingerman Fixes: b5dc0163d8fd ("bpf: precise scalar_value tracking") Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231110002638.4168352-3-andrii@kernel.org Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- kernel/bpf/verifier.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 0b3a2534196e..a35e9b52280b 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3201,12 +3201,29 @@ static int push_jmp_history(struct bpf_verifier_env *env, /* Backtrack one insn at a time. If idx is not at the top of recorded * history then previous instruction came from straight line execution. + * Return -ENOENT if we exhausted all instructions within given state. + * + * It's legal to have a bit of a looping with the same starting and ending + * insn index within the same state, e.g.: 3->4->5->3, so just because current + * instruction index is the same as state's first_idx doesn't mean we are + * done. If there is still some jump history left, we should keep going. We + * need to take into account that we might have a jump history between given + * state's parent and itself, due to checkpointing. In this case, we'll have + * history entry recording a jump from last instruction of parent state and + * first instruction of given state. */ static int get_prev_insn_idx(struct bpf_verifier_state *st, int i, u32 *history) { u32 cnt = *history; + if (i == st->first_insn_idx) { + if (cnt == 0) + return -ENOENT; + if (cnt == 1 && st->jmp_history[0].idx == i) + return -ENOENT; + } + if (cnt && st->jmp_history[cnt - 1].idx == i) { i = st->jmp_history[cnt - 1].prev_idx; (*history)--; @@ -4081,10 +4098,10 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno) * Nothing to be tracked further in the parent state. */ return 0; - if (i == first_idx) - break; subseq_idx = i; i = get_prev_insn_idx(st, i, &history); + if (i == -ENOENT) + break; if (i >= env->prog->len) { /* This can happen if backtracking reached insn 0 * and there are still reg_mask or stack_mask From 9549f53abc1fa1048f31fc279e109e430ef3324c Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 9 Nov 2023 22:14:10 -0800 Subject: [PATCH 0179/1397] bpf: fix control-flow graph checking in privileged mode [ Upstream commit 10e14e9652bf9e8104151bfd9200433083deae3d ] When BPF program is verified in privileged mode, BPF verifier allows bounded loops. This means that from CFG point of view there are definitely some back-edges. Original commit adjusted check_cfg() logic to not detect back-edges in control flow graph if they are resulting from conditional jumps, which the idea that subsequent full BPF verification process will determine whether such loops are bounded or not, and either accept or reject the BPF program. At least that's my reading of the intent. Unfortunately, the implementation of this idea doesn't work correctly in all possible situations. Conditional jump might not result in immediate back-edge, but just a few unconditional instructions later we can arrive at back-edge. In such situations check_cfg() would reject BPF program even in privileged mode, despite it might be bounded loop. Next patch adds one simple program demonstrating such scenario. To keep things simple, instead of trying to detect back edges in privileged mode, just assume every back edge is valid and let subsequent BPF verification prove or reject bounded loops. Note a few test changes. For unknown reason, we have a few tests that are specified to detect a back-edge in a privileged mode, but looking at their code it seems like the right outcome is passing check_cfg() and letting subsequent verification to make a decision about bounded or not bounded looping. Bounded recursion case is also interesting. The example should pass, as recursion is limited to just a few levels and so we never reach maximum number of nested frames and never exhaust maximum stack depth. But the way that max stack depth logic works today it falsely detects this as exceeding max nested frame count. This patch series doesn't attempt to fix this orthogonal problem, so we just adjust expected verifier failure. Suggested-by: Alexei Starovoitov Fixes: 2589726d12a1 ("bpf: introduce bounded loops") Reported-by: Hao Sun Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231110061412.2995786-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- kernel/bpf/verifier.c | 23 +++++++------------ .../selftests/bpf/progs/verifier_loops1.c | 9 +++++--- tools/testing/selftests/bpf/verifier/calls.c | 6 ++--- 3 files changed, 17 insertions(+), 21 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a35e9b52280b..3e6bc21d6b17 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -14751,8 +14751,7 @@ enum { * w - next instruction * e - edge */ -static int push_insn(int t, int w, int e, struct bpf_verifier_env *env, - bool loop_ok) +static int push_insn(int t, int w, int e, struct bpf_verifier_env *env) { int *insn_stack = env->cfg.insn_stack; int *insn_state = env->cfg.insn_state; @@ -14784,7 +14783,7 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env, insn_stack[env->cfg.cur_stack++] = w; return KEEP_EXPLORING; } else if ((insn_state[w] & 0xF0) == DISCOVERED) { - if (loop_ok && env->bpf_capable) + if (env->bpf_capable) return DONE_EXPLORING; verbose_linfo(env, t, "%d: ", t); verbose_linfo(env, w, "%d: ", w); @@ -14807,7 +14806,7 @@ static int visit_func_call_insn(int t, struct bpf_insn *insns, int ret, insn_sz; insn_sz = bpf_is_ldimm64(&insns[t]) ? 2 : 1; - ret = push_insn(t, t + insn_sz, FALLTHROUGH, env, false); + ret = push_insn(t, t + insn_sz, FALLTHROUGH, env); if (ret) return ret; @@ -14817,12 +14816,7 @@ static int visit_func_call_insn(int t, struct bpf_insn *insns, if (visit_callee) { mark_prune_point(env, t); - ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env, - /* It's ok to allow recursion from CFG point of - * view. __check_func_call() will do the actual - * check. - */ - bpf_pseudo_func(insns + t)); + ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env); } return ret; } @@ -14844,7 +14838,7 @@ static int visit_insn(int t, struct bpf_verifier_env *env) if (BPF_CLASS(insn->code) != BPF_JMP && BPF_CLASS(insn->code) != BPF_JMP32) { insn_sz = bpf_is_ldimm64(insn) ? 2 : 1; - return push_insn(t, t + insn_sz, FALLTHROUGH, env, false); + return push_insn(t, t + insn_sz, FALLTHROUGH, env); } switch (BPF_OP(insn->code)) { @@ -14891,8 +14885,7 @@ static int visit_insn(int t, struct bpf_verifier_env *env) off = insn->imm; /* unconditional jump with single edge */ - ret = push_insn(t, t + off + 1, FALLTHROUGH, env, - true); + ret = push_insn(t, t + off + 1, FALLTHROUGH, env); if (ret) return ret; @@ -14905,11 +14898,11 @@ static int visit_insn(int t, struct bpf_verifier_env *env) /* conditional jump with two edges */ mark_prune_point(env, t); - ret = push_insn(t, t + 1, FALLTHROUGH, env, true); + ret = push_insn(t, t + 1, FALLTHROUGH, env); if (ret) return ret; - return push_insn(t, t + insn->off + 1, BRANCH, env, true); + return push_insn(t, t + insn->off + 1, BRANCH, env); } } diff --git a/tools/testing/selftests/bpf/progs/verifier_loops1.c b/tools/testing/selftests/bpf/progs/verifier_loops1.c index 5bc86af80a9a..71735dbf33d4 100644 --- a/tools/testing/selftests/bpf/progs/verifier_loops1.c +++ b/tools/testing/selftests/bpf/progs/verifier_loops1.c @@ -75,9 +75,10 @@ l0_%=: r0 += 1; \ " ::: __clobber_all); } -SEC("tracepoint") +SEC("socket") __description("bounded loop, start in the middle") -__failure __msg("back-edge") +__success +__failure_unpriv __msg_unpriv("back-edge") __naked void loop_start_in_the_middle(void) { asm volatile (" \ @@ -136,7 +137,9 @@ l0_%=: exit; \ SEC("tracepoint") __description("bounded recursion") -__failure __msg("back-edge") +__failure +/* verifier limitation in detecting max stack depth */ +__msg("the call stack of 8 frames is too deep !") __naked void bounded_recursion(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index 1bdf2b43e49e..3d5cd51071f0 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -442,7 +442,7 @@ BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .errstr = "back-edge from insn 0 to 0", + .errstr = "the call stack of 9 frames is too deep", .result = REJECT, }, { @@ -799,7 +799,7 @@ BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .errstr = "back-edge", + .errstr = "the call stack of 9 frames is too deep", .result = REJECT, }, { @@ -811,7 +811,7 @@ BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .errstr = "back-edge", + .errstr = "the call stack of 9 frames is too deep", .result = REJECT, }, { From cf0453f3b42a966394738e285707cba80d3741ce Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 8 Nov 2023 13:13:25 -0800 Subject: [PATCH 0180/1397] net: set SOCK_RCU_FREE before inserting socket into hashtable [ Upstream commit 871019b22d1bcc9fab2d1feba1b9a564acbb6e99 ] We've started to see the following kernel traces: WARNING: CPU: 83 PID: 0 at net/core/filter.c:6641 sk_lookup+0x1bd/0x1d0 Call Trace: __bpf_skc_lookup+0x10d/0x120 bpf_sk_lookup+0x48/0xd0 bpf_sk_lookup_tcp+0x19/0x20 bpf_prog_+0x37c/0x16a3 cls_bpf_classify+0x205/0x2e0 tcf_classify+0x92/0x160 __netif_receive_skb_core+0xe52/0xf10 __netif_receive_skb_list_core+0x96/0x2b0 napi_complete_done+0x7b5/0xb70 _poll+0x94/0xb0 net_rx_action+0x163/0x1d70 __do_softirq+0xdc/0x32e asm_call_irq_on_stack+0x12/0x20 do_softirq_own_stack+0x36/0x50 do_softirq+0x44/0x70 __inet_hash can race with lockless (rcu) readers on the other cpus: __inet_hash __sk_nulls_add_node_rcu <- (bpf triggers here) sock_set_flag(SOCK_RCU_FREE) Let's move the SOCK_RCU_FREE part up a bit, before we are inserting the socket into hashtables. Note, that the race is really harmless; the bpf callers are handling this situation (where listener socket doesn't have SOCK_RCU_FREE set) correctly, so the only annoyance is a WARN_ONCE. More details from Eric regarding SOCK_RCU_FREE timeline: Commit 3b24d854cb35 ("tcp/dccp: do not touch listener sk_refcnt under synflood") added SOCK_RCU_FREE. At that time, the precise location of sock_set_flag(sk, SOCK_RCU_FREE) did not matter, because the thread calling __inet_hash() owns a reference on sk. SOCK_RCU_FREE was only tested at dismantle time. Commit 6acc9b432e67 ("bpf: Add helper to retrieve socket in BPF") started checking SOCK_RCU_FREE _after_ the lookup to infer whether the refcount has been taken care of. Fixes: 6acc9b432e67 ("bpf: Add helper to retrieve socket in BPF") Reviewed-by: Eric Dumazet Signed-off-by: Stanislav Fomichev Reviewed-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/inet_hashtables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 598c1b114d2c..a532f749e477 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -751,12 +751,12 @@ int __inet_hash(struct sock *sk, struct sock *osk) if (err) goto unlock; } + sock_set_flag(sk, SOCK_RCU_FREE); if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport && sk->sk_family == AF_INET6) __sk_nulls_add_node_tail_rcu(sk, &ilb2->nulls_head); else __sk_nulls_add_node_rcu(sk, &ilb2->nulls_head); - sock_set_flag(sk, SOCK_RCU_FREE); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); unlock: spin_unlock(&ilb2->lock); From 03cddc4df8c6be47fd27c8f8b87e5f9a989e1458 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Nov 2023 15:22:41 +0000 Subject: [PATCH 0181/1397] ipvlan: add ipvlan_route_v6_outbound() helper [ Upstream commit 18f039428c7df183b09c69ebf10ffd4e521035d2 ] Inspired by syzbot reports using a stack of multiple ipvlan devices. Reduce stack size needed in ipvlan_process_v6_outbound() by moving the flowi6 struct used for the route lookup in an non inlined helper. ipvlan_route_v6_outbound() needs 120 bytes on the stack, immediately reclaimed. Also make sure ipvlan_process_v4_outbound() is not inlined. We might also have to lower MAX_NEST_DEV, because only syzbot uses setups with more than four stacked devices. BUG: TASK stack guard page was hit at ffffc9000e803ff8 (stack is ffffc9000e804000..ffffc9000e808000) stack guard page: 0000 [#1] SMP KASAN CPU: 0 PID: 13442 Comm: syz-executor.4 Not tainted 6.1.52-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/09/2023 RIP: 0010:kasan_check_range+0x4/0x2a0 mm/kasan/generic.c:188 Code: 48 01 c6 48 89 c7 e8 db 4e c1 03 31 c0 5d c3 cc 0f 0b eb 02 0f 0b b8 ea ff ff ff 5d c3 cc 00 00 cc cc 00 00 cc cc 55 48 89 e5 <41> 57 41 56 41 55 41 54 53 b0 01 48 85 f6 0f 84 a4 01 00 00 48 89 RSP: 0018:ffffc9000e804000 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffffff817e5bf2 RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffffffff887c6568 RBP: ffffc9000e804000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: dffffc0000000001 R12: 1ffff92001d0080c R13: dffffc0000000000 R14: ffffffff87e6b100 R15: 0000000000000000 FS: 00007fd0c55826c0(0000) GS:ffff8881f6800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffc9000e803ff8 CR3: 0000000170ef7000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: <#DF> [] __kasan_check_read+0x11/0x20 mm/kasan/shadow.c:31 [] instrument_atomic_read include/linux/instrumented.h:72 [inline] [] _test_bit include/asm-generic/bitops/instrumented-non-atomic.h:141 [inline] [] cpumask_test_cpu include/linux/cpumask.h:506 [inline] [] cpu_online include/linux/cpumask.h:1092 [inline] [] trace_lock_acquire include/trace/events/lock.h:24 [inline] [] lock_acquire+0xe2/0x590 kernel/locking/lockdep.c:5632 [] rcu_lock_acquire+0x2e/0x40 include/linux/rcupdate.h:306 [] rcu_read_lock include/linux/rcupdate.h:747 [inline] [] ip6_pol_route+0x15d/0x1440 net/ipv6/route.c:2221 [] ip6_pol_route_output+0x50/0x80 net/ipv6/route.c:2606 [] pol_lookup_func include/net/ip6_fib.h:584 [inline] [] fib6_rule_lookup+0x265/0x620 net/ipv6/fib6_rules.c:116 [] ip6_route_output_flags_noref+0x2d9/0x3a0 net/ipv6/route.c:2638 [] ip6_route_output_flags+0xca/0x340 net/ipv6/route.c:2651 [] ip6_route_output include/net/ip6_route.h:100 [inline] [] ipvlan_process_v6_outbound drivers/net/ipvlan/ipvlan_core.c:473 [inline] [] ipvlan_process_outbound drivers/net/ipvlan/ipvlan_core.c:529 [inline] [] ipvlan_xmit_mode_l3 drivers/net/ipvlan/ipvlan_core.c:602 [inline] [] ipvlan_queue_xmit+0xc33/0x1be0 drivers/net/ipvlan/ipvlan_core.c:677 [] ipvlan_start_xmit+0x49/0x100 drivers/net/ipvlan/ipvlan_main.c:229 [] netdev_start_xmit include/linux/netdevice.h:4966 [inline] [] xmit_one net/core/dev.c:3644 [inline] [] dev_hard_start_xmit+0x320/0x980 net/core/dev.c:3660 [] __dev_queue_xmit+0x16b2/0x3370 net/core/dev.c:4324 [] dev_queue_xmit include/linux/netdevice.h:3067 [inline] [] neigh_hh_output include/net/neighbour.h:529 [inline] [] neigh_output include/net/neighbour.h:543 [inline] [] ip6_finish_output2+0x160d/0x1ae0 net/ipv6/ip6_output.c:139 [] __ip6_finish_output net/ipv6/ip6_output.c:200 [inline] [] ip6_finish_output+0x6c6/0xb10 net/ipv6/ip6_output.c:211 [] NF_HOOK_COND include/linux/netfilter.h:298 [inline] [] ip6_output+0x2bc/0x3d0 net/ipv6/ip6_output.c:232 [] dst_output include/net/dst.h:444 [inline] [] ip6_local_out+0x10f/0x140 net/ipv6/output_core.c:161 [] ipvlan_process_v6_outbound drivers/net/ipvlan/ipvlan_core.c:483 [inline] [] ipvlan_process_outbound drivers/net/ipvlan/ipvlan_core.c:529 [inline] [] ipvlan_xmit_mode_l3 drivers/net/ipvlan/ipvlan_core.c:602 [inline] [] ipvlan_queue_xmit+0x1174/0x1be0 drivers/net/ipvlan/ipvlan_core.c:677 [] ipvlan_start_xmit+0x49/0x100 drivers/net/ipvlan/ipvlan_main.c:229 [] netdev_start_xmit include/linux/netdevice.h:4966 [inline] [] xmit_one net/core/dev.c:3644 [inline] [] dev_hard_start_xmit+0x320/0x980 net/core/dev.c:3660 [] __dev_queue_xmit+0x16b2/0x3370 net/core/dev.c:4324 [] dev_queue_xmit include/linux/netdevice.h:3067 [inline] [] neigh_hh_output include/net/neighbour.h:529 [inline] [] neigh_output include/net/neighbour.h:543 [inline] [] ip6_finish_output2+0x160d/0x1ae0 net/ipv6/ip6_output.c:139 [] __ip6_finish_output net/ipv6/ip6_output.c:200 [inline] [] ip6_finish_output+0x6c6/0xb10 net/ipv6/ip6_output.c:211 [] NF_HOOK_COND include/linux/netfilter.h:298 [inline] [] ip6_output+0x2bc/0x3d0 net/ipv6/ip6_output.c:232 [] dst_output include/net/dst.h:444 [inline] [] ip6_local_out+0x10f/0x140 net/ipv6/output_core.c:161 [] ipvlan_process_v6_outbound drivers/net/ipvlan/ipvlan_core.c:483 [inline] [] ipvlan_process_outbound drivers/net/ipvlan/ipvlan_core.c:529 [inline] [] ipvlan_xmit_mode_l3 drivers/net/ipvlan/ipvlan_core.c:602 [inline] [] ipvlan_queue_xmit+0x1174/0x1be0 drivers/net/ipvlan/ipvlan_core.c:677 [] ipvlan_start_xmit+0x49/0x100 drivers/net/ipvlan/ipvlan_main.c:229 [] netdev_start_xmit include/linux/netdevice.h:4966 [inline] [] xmit_one net/core/dev.c:3644 [inline] [] dev_hard_start_xmit+0x320/0x980 net/core/dev.c:3660 [] __dev_queue_xmit+0x16b2/0x3370 net/core/dev.c:4324 [] dev_queue_xmit include/linux/netdevice.h:3067 [inline] [] neigh_hh_output include/net/neighbour.h:529 [inline] [] neigh_output include/net/neighbour.h:543 [inline] [] ip6_finish_output2+0x160d/0x1ae0 net/ipv6/ip6_output.c:139 [] __ip6_finish_output net/ipv6/ip6_output.c:200 [inline] [] ip6_finish_output+0x6c6/0xb10 net/ipv6/ip6_output.c:211 [] NF_HOOK_COND include/linux/netfilter.h:298 [inline] [] ip6_output+0x2bc/0x3d0 net/ipv6/ip6_output.c:232 [] dst_output include/net/dst.h:444 [inline] [] ip6_local_out+0x10f/0x140 net/ipv6/output_core.c:161 [] ipvlan_process_v6_outbound drivers/net/ipvlan/ipvlan_core.c:483 [inline] [] ipvlan_process_outbound drivers/net/ipvlan/ipvlan_core.c:529 [inline] [] ipvlan_xmit_mode_l3 drivers/net/ipvlan/ipvlan_core.c:602 [inline] [] ipvlan_queue_xmit+0x1174/0x1be0 drivers/net/ipvlan/ipvlan_core.c:677 [] ipvlan_start_xmit+0x49/0x100 drivers/net/ipvlan/ipvlan_main.c:229 [] netdev_start_xmit include/linux/netdevice.h:4966 [inline] [] xmit_one net/core/dev.c:3644 [inline] [] dev_hard_start_xmit+0x320/0x980 net/core/dev.c:3660 [] __dev_queue_xmit+0x16b2/0x3370 net/core/dev.c:4324 [] dev_queue_xmit include/linux/netdevice.h:3067 [inline] [] neigh_hh_output include/net/neighbour.h:529 [inline] [] neigh_output include/net/neighbour.h:543 [inline] [] ip6_finish_output2+0x160d/0x1ae0 net/ipv6/ip6_output.c:139 [] __ip6_finish_output net/ipv6/ip6_output.c:200 [inline] [] ip6_finish_output+0x6c6/0xb10 net/ipv6/ip6_output.c:211 [] NF_HOOK_COND include/linux/netfilter.h:298 [inline] [] ip6_output+0x2bc/0x3d0 net/ipv6/ip6_output.c:232 [] dst_output include/net/dst.h:444 [inline] [] ip6_local_out+0x10f/0x140 net/ipv6/output_core.c:161 [] ipvlan_process_v6_outbound drivers/net/ipvlan/ipvlan_core.c:483 [inline] [] ipvlan_process_outbound drivers/net/ipvlan/ipvlan_core.c:529 [inline] [] ipvlan_xmit_mode_l3 drivers/net/ipvlan/ipvlan_core.c:602 [inline] [] ipvlan_queue_xmit+0x1174/0x1be0 drivers/net/ipvlan/ipvlan_core.c:677 [] ipvlan_start_xmit+0x49/0x100 drivers/net/ipvlan/ipvlan_main.c:229 [] netdev_start_xmit include/linux/netdevice.h:4966 [inline] [] xmit_one net/core/dev.c:3644 [inline] [] dev_hard_start_xmit+0x320/0x980 net/core/dev.c:3660 [] __dev_queue_xmit+0x16b2/0x3370 net/core/dev.c:4324 [] dev_queue_xmit include/linux/netdevice.h:3067 [inline] [] neigh_resolve_output+0x64e/0x750 net/core/neighbour.c:1560 [] neigh_output include/net/neighbour.h:545 [inline] [] ip6_finish_output2+0x1643/0x1ae0 net/ipv6/ip6_output.c:139 [] __ip6_finish_output net/ipv6/ip6_output.c:200 [inline] [] ip6_finish_output+0x6c6/0xb10 net/ipv6/ip6_output.c:211 [] NF_HOOK_COND include/linux/netfilter.h:298 [inline] [] ip6_output+0x2bc/0x3d0 net/ipv6/ip6_output.c:232 [] dst_output include/net/dst.h:444 [inline] [] NF_HOOK include/linux/netfilter.h:309 [inline] [] ip6_xmit+0x11a4/0x1b20 net/ipv6/ip6_output.c:352 [] sctp_v6_xmit+0x9ae/0x1230 net/sctp/ipv6.c:250 [] sctp_packet_transmit+0x25de/0x2bc0 net/sctp/output.c:653 [] sctp_packet_singleton+0x202/0x310 net/sctp/outqueue.c:783 [] sctp_outq_flush_ctrl net/sctp/outqueue.c:914 [inline] [] sctp_outq_flush+0x661/0x3d40 net/sctp/outqueue.c:1212 [] sctp_outq_uncork+0x79/0xb0 net/sctp/outqueue.c:764 [] sctp_side_effects net/sctp/sm_sideeffect.c:1199 [inline] [] sctp_do_sm+0x55c0/0x5c30 net/sctp/sm_sideeffect.c:1170 [] sctp_primitive_ASSOCIATE+0x97/0xc0 net/sctp/primitive.c:73 [] sctp_sendmsg_to_asoc+0xf62/0x17b0 net/sctp/socket.c:1839 [] sctp_sendmsg+0x212e/0x33b0 net/sctp/socket.c:2029 [] inet_sendmsg+0x149/0x310 net/ipv4/af_inet.c:849 [] sock_sendmsg_nosec net/socket.c:716 [inline] [] sock_sendmsg net/socket.c:736 [inline] [] ____sys_sendmsg+0x572/0x8c0 net/socket.c:2504 [] ___sys_sendmsg net/socket.c:2558 [inline] [] __sys_sendmsg+0x271/0x360 net/socket.c:2587 [] __do_sys_sendmsg net/socket.c:2596 [inline] [] __se_sys_sendmsg net/socket.c:2594 [inline] [] __x64_sys_sendmsg+0x7f/0x90 net/socket.c:2594 [] do_syscall_x64 arch/x86/entry/common.c:51 [inline] [] do_syscall_64+0x53/0x80 arch/x86/entry/common.c:84 [] entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: 2ad7bf363841 ("ipvlan: Initial check-in of the IPVLAN driver.") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Mahesh Bandewar Cc: Willem de Bruijn Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ipvlan/ipvlan_core.c | 41 +++++++++++++++++++------------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 21e9cac73121..2d5b021b4ea6 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -411,7 +411,7 @@ struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port, void *lyr3h, return addr; } -static int ipvlan_process_v4_outbound(struct sk_buff *skb) +static noinline_for_stack int ipvlan_process_v4_outbound(struct sk_buff *skb) { const struct iphdr *ip4h = ip_hdr(skb); struct net_device *dev = skb->dev; @@ -453,13 +453,11 @@ static int ipvlan_process_v4_outbound(struct sk_buff *skb) } #if IS_ENABLED(CONFIG_IPV6) -static int ipvlan_process_v6_outbound(struct sk_buff *skb) + +static noinline_for_stack int +ipvlan_route_v6_outbound(struct net_device *dev, struct sk_buff *skb) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); - struct net_device *dev = skb->dev; - struct net *net = dev_net(dev); - struct dst_entry *dst; - int err, ret = NET_XMIT_DROP; struct flowi6 fl6 = { .flowi6_oif = dev->ifindex, .daddr = ip6h->daddr, @@ -469,27 +467,38 @@ static int ipvlan_process_v6_outbound(struct sk_buff *skb) .flowi6_mark = skb->mark, .flowi6_proto = ip6h->nexthdr, }; + struct dst_entry *dst; + int err; - dst = ip6_route_output(net, NULL, &fl6); - if (dst->error) { - ret = dst->error; + dst = ip6_route_output(dev_net(dev), NULL, &fl6); + err = dst->error; + if (err) { dst_release(dst); - goto err; + return err; } skb_dst_set(skb, dst); + return 0; +} + +static int ipvlan_process_v6_outbound(struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + int err, ret = NET_XMIT_DROP; + + err = ipvlan_route_v6_outbound(dev, skb); + if (unlikely(err)) { + DEV_STATS_INC(dev, tx_errors); + kfree_skb(skb); + return err; + } memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); - err = ip6_local_out(net, skb->sk, skb); + err = ip6_local_out(dev_net(dev), skb->sk, skb); if (unlikely(net_xmit_eval(err))) DEV_STATS_INC(dev, tx_errors); else ret = NET_XMIT_SUCCESS; - goto out; -err: - DEV_STATS_INC(dev, tx_errors); - kfree_skb(skb); -out: return ret; } #else From 30007a74cb41d75ce926fcef52bbbd074e116336 Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Thu, 9 Nov 2023 00:44:20 +0900 Subject: [PATCH 0182/1397] tty: Fix uninit-value access in ppp_sync_receive() [ Upstream commit 719639853d88071dfdfd8d9971eca9c283ff314c ] KMSAN reported the following uninit-value access issue: ===================================================== BUG: KMSAN: uninit-value in ppp_sync_input drivers/net/ppp/ppp_synctty.c:690 [inline] BUG: KMSAN: uninit-value in ppp_sync_receive+0xdc9/0xe70 drivers/net/ppp/ppp_synctty.c:334 ppp_sync_input drivers/net/ppp/ppp_synctty.c:690 [inline] ppp_sync_receive+0xdc9/0xe70 drivers/net/ppp/ppp_synctty.c:334 tiocsti+0x328/0x450 drivers/tty/tty_io.c:2295 tty_ioctl+0x808/0x1920 drivers/tty/tty_io.c:2694 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:871 [inline] __se_sys_ioctl+0x211/0x400 fs/ioctl.c:857 __x64_sys_ioctl+0x97/0xe0 fs/ioctl.c:857 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x44/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b Uninit was created at: __alloc_pages+0x75d/0xe80 mm/page_alloc.c:4591 __alloc_pages_node include/linux/gfp.h:238 [inline] alloc_pages_node include/linux/gfp.h:261 [inline] __page_frag_cache_refill+0x9a/0x2c0 mm/page_alloc.c:4691 page_frag_alloc_align+0x91/0x5d0 mm/page_alloc.c:4722 page_frag_alloc include/linux/gfp.h:322 [inline] __netdev_alloc_skb+0x215/0x6d0 net/core/skbuff.c:728 netdev_alloc_skb include/linux/skbuff.h:3225 [inline] dev_alloc_skb include/linux/skbuff.h:3238 [inline] ppp_sync_input drivers/net/ppp/ppp_synctty.c:669 [inline] ppp_sync_receive+0x237/0xe70 drivers/net/ppp/ppp_synctty.c:334 tiocsti+0x328/0x450 drivers/tty/tty_io.c:2295 tty_ioctl+0x808/0x1920 drivers/tty/tty_io.c:2694 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:871 [inline] __se_sys_ioctl+0x211/0x400 fs/ioctl.c:857 __x64_sys_ioctl+0x97/0xe0 fs/ioctl.c:857 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x44/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b CPU: 0 PID: 12950 Comm: syz-executor.1 Not tainted 6.6.0-14500-g1c41041124bd #10 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-1.fc38 04/01/2014 ===================================================== ppp_sync_input() checks the first 2 bytes of the data are PPP_ALLSTATIONS and PPP_UI. However, if the data length is 1 and the first byte is PPP_ALLSTATIONS, an access to an uninitialized value occurs when checking PPP_UI. This patch resolves this issue by checking the data length. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Shigeru Yoshida Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ppp/ppp_synctty.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ppp/ppp_synctty.c b/drivers/net/ppp/ppp_synctty.c index ebcdffdf4f0e..ea261a628786 100644 --- a/drivers/net/ppp/ppp_synctty.c +++ b/drivers/net/ppp/ppp_synctty.c @@ -687,7 +687,7 @@ ppp_sync_input(struct syncppp *ap, const u8 *buf, const u8 *flags, int count) /* strip address/control field if present */ p = skb->data; - if (p[0] == PPP_ALLSTATIONS && p[1] == PPP_UI) { + if (skb->len >= 2 && p[0] == PPP_ALLSTATIONS && p[1] == PPP_UI) { /* chop off address/control */ if (skb->len < 3) goto err; From 70151949c97e0e5cbf8d1327df3a71ef47b2d0ac Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 10 Nov 2023 17:13:02 +0100 Subject: [PATCH 0183/1397] net: ti: icssg-prueth: Add missing icss_iep_put to error path [ Upstream commit e409d7346648c9acff84c3cc8d291767ee2d5326 ] Analogously to prueth_remove, just also taking care for NULL'ing the iep pointers. Fixes: 186734c15886 ("net: ti: icssg-prueth: add packet timestamping and ptp support") Fixes: 443a2367ba3c ("net: ti: icssg-prueth: am65x SR2.0 add 10M full duplex support") Signed-off-by: Jan Kiszka Reviewed-by: Wojciech Drewek Reviewed-by: MD Danish Anwar Reviewed-by: Roger Quadros Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index 4914d0ef58e9..71d3001ec1ef 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -2092,10 +2092,7 @@ static int prueth_probe(struct platform_device *pdev) prueth->iep1 = icss_iep_get_idx(np, 1); if (IS_ERR(prueth->iep1)) { ret = dev_err_probe(dev, PTR_ERR(prueth->iep1), "iep1 get failed\n"); - icss_iep_put(prueth->iep0); - prueth->iep0 = NULL; - prueth->iep1 = NULL; - goto free_pool; + goto put_iep0; } if (prueth->pdata.quirk_10m_link_issue) { @@ -2185,6 +2182,12 @@ static int prueth_probe(struct platform_device *pdev) exit_iep: if (prueth->pdata.quirk_10m_link_issue) icss_iep_exit_fw(prueth->iep1); + icss_iep_put(prueth->iep1); + +put_iep0: + icss_iep_put(prueth->iep0); + prueth->iep0 = NULL; + prueth->iep1 = NULL; free_pool: gen_pool_free(prueth->sram_pool, From 920114bf3d63de03d46dc7545257f49bf883c5e7 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 10 Nov 2023 17:13:08 +0100 Subject: [PATCH 0184/1397] net: ti: icssg-prueth: Fix error cleanup on failing pruss_request_mem_region [ Upstream commit 2bd5b559a1f391f05927bbb0b31381fa71c61e26 ] We were just continuing in this case, surely not desired. Fixes: 128d5874c082 ("net: ti: icssg-prueth: Add ICSSG ethernet driver") Signed-off-by: Jan Kiszka Reviewed-by: Wojciech Drewek Reviewed-by: Roger Quadros Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index 71d3001ec1ef..c09ecb3da772 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -2050,7 +2050,7 @@ static int prueth_probe(struct platform_device *pdev) &prueth->shram); if (ret) { dev_err(dev, "unable to get PRUSS SHRD RAM2: %d\n", ret); - pruss_put(prueth->pruss); + goto put_pruss; } prueth->sram_pool = of_gen_pool_get(np, "sram", 0); @@ -2195,6 +2195,8 @@ static int prueth_probe(struct platform_device *pdev) put_mem: pruss_release_mem_region(prueth->pruss, &prueth->shram); + +put_pruss: pruss_put(prueth->pruss); put_cores: From 1be00f836478b6f962605cc03006c3d0140541b0 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 24 Oct 2023 13:51:36 +0200 Subject: [PATCH 0185/1397] xen/events: avoid using info_for_irq() in xen_send_IPI_one() [ Upstream commit e64e7c74b99ec9e439abca75f522f4b98f220bd1 ] xen_send_IPI_one() is being used by cpuhp_report_idle_dead() after it calls rcu_report_dead(), meaning that any RCU usage by xen_send_IPI_one() is a bad idea. Unfortunately xen_send_IPI_one() is using notify_remote_via_irq() today, which is using irq_get_chip_data() via info_for_irq(). And irq_get_chip_data() in turn is using a maple-tree lookup requiring RCU. Avoid this problem by caching the ipi event channels in another percpu variable, allowing the use notify_remote_via_evtchn() in xen_send_IPI_one(). Fixes: 721255b9826b ("genirq: Use a maple tree for interrupt descriptor management") Reported-by: David Woodhouse Signed-off-by: Juergen Gross Tested-by: David Woodhouse Acked-by: Stefano Stabellini Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- drivers/xen/events/events_base.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 1b2136fe0fa5..2cf0c2b69386 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -164,6 +164,8 @@ static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1}; /* IRQ <-> IPI mapping */ static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1}; +/* Cache for IPI event channels - needed for hot cpu unplug (avoid RCU usage). */ +static DEFINE_PER_CPU(evtchn_port_t [XEN_NR_IPIS], ipi_to_evtchn) = {[0 ... XEN_NR_IPIS-1] = 0}; /* Event channel distribution data */ static atomic_t channels_on_cpu[NR_CPUS]; @@ -366,6 +368,7 @@ static int xen_irq_info_ipi_setup(unsigned cpu, info->u.ipi = ipi; per_cpu(ipi_to_irq, cpu)[ipi] = irq; + per_cpu(ipi_to_evtchn, cpu)[ipi] = evtchn; return xen_irq_info_common_setup(info, irq, IRQT_IPI, evtchn, 0); } @@ -981,6 +984,7 @@ static void __unbind_from_irq(unsigned int irq) break; case IRQT_IPI: per_cpu(ipi_to_irq, cpu)[ipi_from_irq(irq)] = -1; + per_cpu(ipi_to_evtchn, cpu)[ipi_from_irq(irq)] = 0; break; case IRQT_EVTCHN: dev = info->u.interdomain; @@ -1631,7 +1635,7 @@ EXPORT_SYMBOL_GPL(evtchn_put); void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) { - int irq; + evtchn_port_t evtchn; #ifdef CONFIG_X86 if (unlikely(vector == XEN_NMI_VECTOR)) { @@ -1642,9 +1646,9 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) return; } #endif - irq = per_cpu(ipi_to_irq, cpu)[vector]; - BUG_ON(irq < 0); - notify_remote_via_irq(irq); + evtchn = per_cpu(ipi_to_evtchn, cpu)[vector]; + BUG_ON(evtchn == 0); + notify_remote_via_evtchn(evtchn); } struct evtchn_loop_ctrl { From 914424419264c667cbebf68eb00cc7e4598894c4 Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Fri, 10 Nov 2023 17:37:07 +0800 Subject: [PATCH 0186/1397] net: hns3: fix add VLAN fail issue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 472a2ff63efb30234cbf6b2cdaf8117f21b4f8bc ] The hclge_sync_vlan_filter is called in periodic task, trying to remove VLAN from vlan_del_fail_bmap. It can be concurrence with VLAN adding operation from user. So once user failed to delete a VLAN id, and add it again soon, it may be removed by the periodic task, which may cause the software configuration being inconsistent with hardware. So add mutex handling to avoid this. user hns3 driver periodic task │ add vlan 10 ───── hns3_vlan_rx_add_vid │ │ (suppose success) │ │ │ del vlan 10 ───── hns3_vlan_rx_kill_vid │ │ (suppose fail,add to │ │ vlan_del_fail_bmap) │ │ │ add vlan 10 ───── hns3_vlan_rx_add_vid │ (suppose success) │ foreach vlan_del_fail_bmp del vlan 10 Fixes: fe4144d47eef ("net: hns3: sync VLAN filter entries when kill VLAN ID failed") Signed-off-by: Jian Shen Signed-off-by: Jijie Shao Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../hisilicon/hns3/hns3pf/hclge_main.c | 28 +++++++++++++------ .../hisilicon/hns3/hns3vf/hclgevf_main.c | 11 ++++++-- 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index c42574e29747..a884e8f7e947 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -10026,8 +10026,6 @@ static void hclge_rm_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id, struct hclge_vport_vlan_cfg *vlan, *tmp; struct hclge_dev *hdev = vport->back; - mutex_lock(&hdev->vport_lock); - list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) { if (vlan->vlan_id == vlan_id) { if (is_write_tbl && vlan->hd_tbl_status) @@ -10042,8 +10040,6 @@ static void hclge_rm_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id, break; } } - - mutex_unlock(&hdev->vport_lock); } void hclge_rm_vport_all_vlan_table(struct hclge_vport *vport, bool is_del_list) @@ -10452,11 +10448,16 @@ int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto, * handle mailbox. Just record the vlan id, and remove it after * reset finished. */ + mutex_lock(&hdev->vport_lock); if ((test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) || test_bit(HCLGE_STATE_RST_FAIL, &hdev->state)) && is_kill) { set_bit(vlan_id, vport->vlan_del_fail_bmap); + mutex_unlock(&hdev->vport_lock); return -EBUSY; + } else if (!is_kill && test_bit(vlan_id, vport->vlan_del_fail_bmap)) { + clear_bit(vlan_id, vport->vlan_del_fail_bmap); } + mutex_unlock(&hdev->vport_lock); /* when port base vlan enabled, we use port base vlan as the vlan * filter entry. In this case, we don't update vlan filter table @@ -10471,17 +10472,22 @@ int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto, } if (!ret) { - if (!is_kill) + if (!is_kill) { hclge_add_vport_vlan_table(vport, vlan_id, writen_to_tbl); - else if (is_kill && vlan_id != 0) + } else if (is_kill && vlan_id != 0) { + mutex_lock(&hdev->vport_lock); hclge_rm_vport_vlan_table(vport, vlan_id, false); + mutex_unlock(&hdev->vport_lock); + } } else if (is_kill) { /* when remove hw vlan filter failed, record the vlan id, * and try to remove it from hw later, to be consistence * with stack */ + mutex_lock(&hdev->vport_lock); set_bit(vlan_id, vport->vlan_del_fail_bmap); + mutex_unlock(&hdev->vport_lock); } hclge_set_vport_vlan_fltr_change(vport); @@ -10521,6 +10527,7 @@ static void hclge_sync_vlan_filter(struct hclge_dev *hdev) int i, ret, sync_cnt = 0; u16 vlan_id; + mutex_lock(&hdev->vport_lock); /* start from vport 1 for PF is always alive */ for (i = 0; i < hdev->num_alloc_vport; i++) { struct hclge_vport *vport = &hdev->vport[i]; @@ -10531,21 +10538,26 @@ static void hclge_sync_vlan_filter(struct hclge_dev *hdev) ret = hclge_set_vlan_filter_hw(hdev, htons(ETH_P_8021Q), vport->vport_id, vlan_id, true); - if (ret && ret != -EINVAL) + if (ret && ret != -EINVAL) { + mutex_unlock(&hdev->vport_lock); return; + } clear_bit(vlan_id, vport->vlan_del_fail_bmap); hclge_rm_vport_vlan_table(vport, vlan_id, false); hclge_set_vport_vlan_fltr_change(vport); sync_cnt++; - if (sync_cnt >= HCLGE_MAX_SYNC_COUNT) + if (sync_cnt >= HCLGE_MAX_SYNC_COUNT) { + mutex_unlock(&hdev->vport_lock); return; + } vlan_id = find_first_bit(vport->vlan_del_fail_bmap, VLAN_N_VID); } } + mutex_unlock(&hdev->vport_lock); hclge_sync_vlan_fltr_state(hdev); } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index a4d68fb216fb..1c62e58ff6d8 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -1206,6 +1206,8 @@ static int hclgevf_set_vlan_filter(struct hnae3_handle *handle, test_bit(HCLGEVF_STATE_RST_FAIL, &hdev->state)) && is_kill) { set_bit(vlan_id, hdev->vlan_del_fail_bmap); return -EBUSY; + } else if (!is_kill && test_bit(vlan_id, hdev->vlan_del_fail_bmap)) { + clear_bit(vlan_id, hdev->vlan_del_fail_bmap); } hclgevf_build_send_msg(&send_msg, HCLGE_MBX_SET_VLAN, @@ -1233,20 +1235,25 @@ static void hclgevf_sync_vlan_filter(struct hclgevf_dev *hdev) int ret, sync_cnt = 0; u16 vlan_id; + if (bitmap_empty(hdev->vlan_del_fail_bmap, VLAN_N_VID)) + return; + + rtnl_lock(); vlan_id = find_first_bit(hdev->vlan_del_fail_bmap, VLAN_N_VID); while (vlan_id != VLAN_N_VID) { ret = hclgevf_set_vlan_filter(handle, htons(ETH_P_8021Q), vlan_id, true); if (ret) - return; + break; clear_bit(vlan_id, hdev->vlan_del_fail_bmap); sync_cnt++; if (sync_cnt >= HCLGEVF_MAX_SYNC_COUNT) - return; + break; vlan_id = find_first_bit(hdev->vlan_del_fail_bmap, VLAN_N_VID); } + rtnl_unlock(); } static int hclgevf_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable) From fc6df8e99b38154ebb87a014286acd771ef6bcfe Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Fri, 10 Nov 2023 17:37:08 +0800 Subject: [PATCH 0187/1397] net: hns3: add barrier in vf mailbox reply process [ Upstream commit ac92c0a9a0603fb448e60f38e63302e4eebb8035 ] In hclgevf_mbx_handler() and hclgevf_get_mbx_resp() functions, there is a typical store-store and load-load scenario between received_resp and additional_info. This patch adds barrier to fix the problem. Fixes: 4671042f1ef0 ("net: hns3: add match_id to check mailbox response from PF to VF") Signed-off-by: Yonglong Liu Signed-off-by: Jijie Shao Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c index bbf7b14079de..85c2a634c8f9 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c @@ -63,6 +63,9 @@ static int hclgevf_get_mbx_resp(struct hclgevf_dev *hdev, u16 code0, u16 code1, i++; } + /* ensure additional_info will be seen after received_resp */ + smp_rmb(); + if (i >= HCLGEVF_MAX_TRY_TIMES) { dev_err(&hdev->pdev->dev, "VF could not get mbx(%u,%u) resp(=%d) from PF in %d tries\n", @@ -178,6 +181,10 @@ static void hclgevf_handle_mbx_response(struct hclgevf_dev *hdev, resp->resp_status = hclgevf_resp_to_errno(resp_status); memcpy(resp->additional_info, req->msg.resp_data, HCLGE_MBX_MAX_RESP_DATA_SIZE * sizeof(u8)); + + /* ensure additional_info will be seen before setting received_resp */ + smp_wmb(); + if (match_id) { /* If match_id is not zero, it means PF support match_id. * if the match_id is right, VF get the right response, or From ab31012867f28f9fb3fe6646b0943099b31163bd Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Fri, 10 Nov 2023 17:37:09 +0800 Subject: [PATCH 0188/1397] net: hns3: fix incorrect capability bit display for copper port [ Upstream commit 75b247b57d8b71bcb679e4cb37d0db104848806c ] Currently, the FEC capability bit is default set for device version V2. It's incorrect for the copper port. Eventhough it doesn't make the nic work abnormal, but the capability information display in debugfs may confuse user. So clear it when driver get the port type inforamtion. Fixes: 433ccce83504 ("net: hns3: use FEC capability queried from firmware") Signed-off-by: Jian Shen Signed-off-by: Jijie Shao Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index a884e8f7e947..af8a3e6429a7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -11664,6 +11664,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) goto err_msi_irq_uninit; if (hdev->hw.mac.media_type == HNAE3_MEDIA_TYPE_COPPER) { + clear_bit(HNAE3_DEV_SUPPORT_FEC_B, ae_dev->caps); if (hnae3_dev_phy_imp_supported(hdev)) ret = hclge_update_tp_port_info(hdev); else From f79d985c69060047426be68b7e4c1663d5d731b4 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Fri, 10 Nov 2023 17:37:10 +0800 Subject: [PATCH 0189/1397] net: hns3: fix out-of-bounds access may occur when coalesce info is read via debugfs [ Upstream commit 53aba458f23846112c0d44239580ff59bc5c36c3 ] The hns3 driver define an array of string to show the coalesce info, but if the kernel adds a new mode or a new state, out-of-bounds access may occur when coalesce info is read via debugfs, this patch fix the problem. Fixes: c99fead7cb07 ("net: hns3: add debugfs support for interrupt coalesce") Signed-off-by: Yonglong Liu Signed-off-by: Jijie Shao Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index b8508533878b..4f385a18d288 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -500,11 +500,14 @@ static void hns3_get_coal_info(struct hns3_enet_tqp_vector *tqp_vector, } sprintf(result[j++], "%d", i); - sprintf(result[j++], "%s", dim_state_str[dim->state]); + sprintf(result[j++], "%s", dim->state < ARRAY_SIZE(dim_state_str) ? + dim_state_str[dim->state] : "unknown"); sprintf(result[j++], "%u", dim->profile_ix); - sprintf(result[j++], "%s", dim_cqe_mode_str[dim->mode]); + sprintf(result[j++], "%s", dim->mode < ARRAY_SIZE(dim_cqe_mode_str) ? + dim_cqe_mode_str[dim->mode] : "unknown"); sprintf(result[j++], "%s", - dim_tune_stat_str[dim->tune_state]); + dim->tune_state < ARRAY_SIZE(dim_tune_stat_str) ? + dim_tune_stat_str[dim->tune_state] : "unknown"); sprintf(result[j++], "%u", dim->steps_left); sprintf(result[j++], "%u", dim->steps_right); sprintf(result[j++], "%u", dim->tired); From 43d85bdeac5acb831f5334a80404cfb46ca5870e Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Fri, 10 Nov 2023 17:37:11 +0800 Subject: [PATCH 0190/1397] net: hns3: fix variable may not initialized problem in hns3_init_mac_addr() [ Upstream commit dbd2f3b20c6ae425665b6975d766e3653d453e73 ] When a VF is calling hns3_init_mac_addr(), get_mac_addr() may return fail, then the value of mac_addr_temp is not initialized. Fixes: 76ad4f0ee747 ("net: hns3: Add support of HNS3 Ethernet Driver for hip08 SoC") Signed-off-by: Yonglong Liu Signed-off-by: Jijie Shao Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index cf50368441b7..677cfaa5fe08 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -5140,7 +5140,7 @@ static int hns3_init_mac_addr(struct net_device *netdev) struct hns3_nic_priv *priv = netdev_priv(netdev); char format_mac_addr[HNAE3_FORMAT_MAC_ADDR_LEN]; struct hnae3_handle *h = priv->ae_handle; - u8 mac_addr_temp[ETH_ALEN]; + u8 mac_addr_temp[ETH_ALEN] = {0}; int ret = 0; if (h->ae_algo->ops->get_mac_addr) From fbcad948c48625996d96c46fd81b9d2c586392c8 Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Fri, 10 Nov 2023 17:37:12 +0800 Subject: [PATCH 0191/1397] net: hns3: fix VF reset fail issue [ Upstream commit 65e98bb56fa3ce2edb400930c05238c9b380500e ] Currently the reset process in hns3 and firmware watchdog init process is asynchronous. We think firmware watchdog initialization is completed before VF clear the interrupt source. However, firmware initialization may not complete early. So VF will receive multiple reset interrupts and fail to reset. So we add delay before VF interrupt source and 5 ms delay is enough to avoid second reset interrupt. Fixes: 427900d27d86 ("net: hns3: fix the timing issue of VF clearing interrupt sources") Signed-off-by: Jijie Shao Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 14 +++++++++++++- .../ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h | 1 + 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 1c62e58ff6d8..0aa9beefd1c7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -1981,8 +1981,18 @@ static enum hclgevf_evt_cause hclgevf_check_evt_cause(struct hclgevf_dev *hdev, return HCLGEVF_VECTOR0_EVENT_OTHER; } +static void hclgevf_reset_timer(struct timer_list *t) +{ + struct hclgevf_dev *hdev = from_timer(hdev, t, reset_timer); + + hclgevf_clear_event_cause(hdev, HCLGEVF_VECTOR0_EVENT_RST); + hclgevf_reset_task_schedule(hdev); +} + static irqreturn_t hclgevf_misc_irq_handle(int irq, void *data) { +#define HCLGEVF_RESET_DELAY 5 + enum hclgevf_evt_cause event_cause; struct hclgevf_dev *hdev = data; u32 clearval; @@ -1994,7 +2004,8 @@ static irqreturn_t hclgevf_misc_irq_handle(int irq, void *data) switch (event_cause) { case HCLGEVF_VECTOR0_EVENT_RST: - hclgevf_reset_task_schedule(hdev); + mod_timer(&hdev->reset_timer, + jiffies + msecs_to_jiffies(HCLGEVF_RESET_DELAY)); break; case HCLGEVF_VECTOR0_EVENT_MBX: hclgevf_mbx_handler(hdev); @@ -2937,6 +2948,7 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev) HCLGEVF_DRIVER_NAME); hclgevf_task_schedule(hdev, round_jiffies_relative(HZ)); + timer_setup(&hdev->reset_timer, hclgevf_reset_timer, 0); return 0; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index 81c16b8c8da2..a73f2bf3a56a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -219,6 +219,7 @@ struct hclgevf_dev { enum hnae3_reset_type reset_level; unsigned long reset_pending; enum hnae3_reset_type reset_type; + struct timer_list reset_timer; #define HCLGEVF_RESET_REQUESTED 0 #define HCLGEVF_RESET_PENDING 1 From d282ea0a7a00ca0c0c9cad8ce4a65078da50abb9 Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Fri, 10 Nov 2023 17:37:13 +0800 Subject: [PATCH 0192/1397] net: hns3: fix VF wrong speed and duplex issue [ Upstream commit dff655e82faffc287d4a72a59f66fa120bf904e4 ] If PF is down, firmware will returns 10 Mbit/s rate and half-duplex mode when PF queries the port information from firmware. After imp reset command is executed, PF status changes to down, and PF will query link status and updates port information from firmware in a periodic scheduled task. However, there is a low probability that port information is updated when PF is down, and then PF link status changes to up. In this case, PF synchronizes incorrect rate and duplex mode to VF. This patch fixes it by updating port information before PF synchronizes the rate and duplex to the VF when PF changes to up. Fixes: 18b6e31f8bf4 ("net: hns3: PF add support for pushing link status to VFs") Signed-off-by: Jijie Shao Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index af8a3e6429a7..a61d9fd732b9 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -61,6 +61,7 @@ static void hclge_sync_fd_table(struct hclge_dev *hdev); static void hclge_update_fec_stats(struct hclge_dev *hdev); static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret, int wait_cnt); +static int hclge_update_port_info(struct hclge_dev *hdev); static struct hnae3_ae_algo ae_algo; @@ -3043,6 +3044,9 @@ static void hclge_update_link_status(struct hclge_dev *hdev) if (state != hdev->hw.mac.link) { hdev->hw.mac.link = state; + if (state == HCLGE_LINK_STATUS_UP) + hclge_update_port_info(hdev); + client->ops->link_status_change(handle, state); hclge_config_mac_tnl_int(hdev, state); if (rclient && rclient->ops->link_status_change) From 927fff9bdbafa12b7e07d6538bfb0bb087c5f83e Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Sat, 11 Nov 2023 01:39:47 +0900 Subject: [PATCH 0193/1397] tipc: Fix kernel-infoleak due to uninitialized TLV value [ Upstream commit fb317eb23b5ee4c37b0656a9a52a3db58d9dd072 ] KMSAN reported the following kernel-infoleak issue: ===================================================== BUG: KMSAN: kernel-infoleak in instrument_copy_to_user include/linux/instrumented.h:114 [inline] BUG: KMSAN: kernel-infoleak in copy_to_user_iter lib/iov_iter.c:24 [inline] BUG: KMSAN: kernel-infoleak in iterate_ubuf include/linux/iov_iter.h:29 [inline] BUG: KMSAN: kernel-infoleak in iterate_and_advance2 include/linux/iov_iter.h:245 [inline] BUG: KMSAN: kernel-infoleak in iterate_and_advance include/linux/iov_iter.h:271 [inline] BUG: KMSAN: kernel-infoleak in _copy_to_iter+0x4ec/0x2bc0 lib/iov_iter.c:186 instrument_copy_to_user include/linux/instrumented.h:114 [inline] copy_to_user_iter lib/iov_iter.c:24 [inline] iterate_ubuf include/linux/iov_iter.h:29 [inline] iterate_and_advance2 include/linux/iov_iter.h:245 [inline] iterate_and_advance include/linux/iov_iter.h:271 [inline] _copy_to_iter+0x4ec/0x2bc0 lib/iov_iter.c:186 copy_to_iter include/linux/uio.h:197 [inline] simple_copy_to_iter net/core/datagram.c:532 [inline] __skb_datagram_iter.5+0x148/0xe30 net/core/datagram.c:420 skb_copy_datagram_iter+0x52/0x210 net/core/datagram.c:546 skb_copy_datagram_msg include/linux/skbuff.h:3960 [inline] netlink_recvmsg+0x43d/0x1630 net/netlink/af_netlink.c:1967 sock_recvmsg_nosec net/socket.c:1044 [inline] sock_recvmsg net/socket.c:1066 [inline] __sys_recvfrom+0x476/0x860 net/socket.c:2246 __do_sys_recvfrom net/socket.c:2264 [inline] __se_sys_recvfrom net/socket.c:2260 [inline] __x64_sys_recvfrom+0x130/0x200 net/socket.c:2260 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x44/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b Uninit was created at: slab_post_alloc_hook+0x103/0x9e0 mm/slab.h:768 slab_alloc_node mm/slub.c:3478 [inline] kmem_cache_alloc_node+0x5f7/0xb50 mm/slub.c:3523 kmalloc_reserve+0x13c/0x4a0 net/core/skbuff.c:560 __alloc_skb+0x2fd/0x770 net/core/skbuff.c:651 alloc_skb include/linux/skbuff.h:1286 [inline] tipc_tlv_alloc net/tipc/netlink_compat.c:156 [inline] tipc_get_err_tlv+0x90/0x5d0 net/tipc/netlink_compat.c:170 tipc_nl_compat_recv+0x1042/0x15d0 net/tipc/netlink_compat.c:1324 genl_family_rcv_msg_doit net/netlink/genetlink.c:972 [inline] genl_family_rcv_msg net/netlink/genetlink.c:1052 [inline] genl_rcv_msg+0x1220/0x12c0 net/netlink/genetlink.c:1067 netlink_rcv_skb+0x4a4/0x6a0 net/netlink/af_netlink.c:2545 genl_rcv+0x41/0x60 net/netlink/genetlink.c:1076 netlink_unicast_kernel net/netlink/af_netlink.c:1342 [inline] netlink_unicast+0xf4b/0x1230 net/netlink/af_netlink.c:1368 netlink_sendmsg+0x1242/0x1420 net/netlink/af_netlink.c:1910 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] ____sys_sendmsg+0x997/0xd60 net/socket.c:2588 ___sys_sendmsg+0x271/0x3b0 net/socket.c:2642 __sys_sendmsg net/socket.c:2671 [inline] __do_sys_sendmsg net/socket.c:2680 [inline] __se_sys_sendmsg net/socket.c:2678 [inline] __x64_sys_sendmsg+0x2fa/0x4a0 net/socket.c:2678 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x44/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b Bytes 34-35 of 36 are uninitialized Memory access of size 36 starts at ffff88802d464a00 Data copied to user address 00007ff55033c0a0 CPU: 0 PID: 30322 Comm: syz-executor.0 Not tainted 6.6.0-14500-g1c41041124bd #10 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-1.fc38 04/01/2014 ===================================================== tipc_add_tlv() puts TLV descriptor and value onto `skb`. This size is calculated with TLV_SPACE() macro. It adds the size of struct tlv_desc and the length of TLV value passed as an argument, and aligns the result to a multiple of TLV_ALIGNTO, i.e., a multiple of 4 bytes. If the size of struct tlv_desc plus the length of TLV value is not aligned, the current implementation leaves the remaining bytes uninitialized. This is the cause of the above kernel-infoleak issue. This patch resolves this issue by clearing data up to an aligned size. Fixes: d0796d1ef63d ("tipc: convert legacy nl bearer dump to nl compat") Signed-off-by: Shigeru Yoshida Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/tipc/netlink_compat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 5bc076f2fa74..c763008a8adb 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -102,6 +102,7 @@ static int tipc_add_tlv(struct sk_buff *skb, u16 type, void *data, u16 len) return -EMSGSIZE; skb_put(skb, TLV_SPACE(len)); + memset(tlv, 0, TLV_SPACE(len)); tlv->tlv_type = htons(type); tlv->tlv_len = htons(TLV_LENGTH(len)); if (len && data) From 2b0e99072654edd601d05c0061a20337af5008ba Mon Sep 17 00:00:00 2001 From: Sven Auhagen Date: Sat, 11 Nov 2023 05:41:12 +0100 Subject: [PATCH 0194/1397] net: mvneta: fix calls to page_pool_get_stats [ Upstream commit ca8add922f9c7f6e2e3c71039da8e0dcc64b87ed ] Calling page_pool_get_stats in the mvneta driver without checks leads to kernel crashes. First the page pool is only available if the bm is not used. The page pool is also not allocated when the port is stopped. It can also be not allocated in case of errors. The current implementation leads to the following crash calling ethstats on a port that is down or when calling it at the wrong moment: ble to handle kernel NULL pointer dereference at virtual address 00000070 [00000070] *pgd=00000000 Internal error: Oops: 5 [#1] SMP ARM Hardware name: Marvell Armada 380/385 (Device Tree) PC is at page_pool_get_stats+0x18/0x1cc LR is at mvneta_ethtool_get_stats+0xa0/0xe0 [mvneta] pc : [] lr : [] psr: a0000013 sp : f1439d48 ip : f1439dc0 fp : 0000001d r10: 00000100 r9 : c4816b80 r8 : f0d75150 r7 : bf0b400c r6 : c238f000 r5 : 00000000 r4 : f1439d68 r3 : c2091040 r2 : ffffffd8 r1 : f1439d68 r0 : 00000000 Flags: NzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none Control: 10c5387d Table: 066b004a DAC: 00000051 Register r0 information: NULL pointer Register r1 information: 2-page vmalloc region starting at 0xf1438000 allocated at kernel_clone+0x9c/0x390 Register r2 information: non-paged memory Register r3 information: slab kmalloc-2k start c2091000 pointer offset 64 size 2048 Register r4 information: 2-page vmalloc region starting at 0xf1438000 allocated at kernel_clone+0x9c/0x390 Register r5 information: NULL pointer Register r6 information: slab kmalloc-cg-4k start c238f000 pointer offset 0 size 4096 Register r7 information: 15-page vmalloc region starting at 0xbf0a8000 allocated at load_module+0xa30/0x219c Register r8 information: 1-page vmalloc region starting at 0xf0d75000 allocated at ethtool_get_stats+0x138/0x208 Register r9 information: slab task_struct start c4816b80 pointer offset 0 Register r10 information: non-paged memory Register r11 information: non-paged memory Register r12 information: 2-page vmalloc region starting at 0xf1438000 allocated at kernel_clone+0x9c/0x390 Process snmpd (pid: 733, stack limit = 0x38de3a88) Stack: (0xf1439d48 to 0xf143a000) 9d40: 000000c0 00000001 c238f000 bf0b400c f0d75150 c4816b80 9d60: 00000100 bf0a98d8 00000000 00000000 00000000 00000000 00000000 00000000 9d80: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 9da0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 9dc0: 00000dc0 5335509c 00000035 c238f000 bf0b2214 01067f50 f0d75000 c0b9b9c8 9de0: 0000001d 00000035 c2212094 5335509c c4816b80 c238f000 c5ad6e00 01067f50 9e00: c1b0be80 c4816b80 00014813 c0b9d7f0 00000000 00000000 0000001d 0000001d 9e20: 00000000 00001200 00000000 00000000 c216ed90 c73943b8 00000000 00000000 9e40: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 9e60: 00000000 c0ad9034 00000000 00000000 00000000 00000000 00000000 00000000 9e80: 00000000 00000000 00000000 5335509c c1b0be80 f1439ee4 00008946 c1b0be80 9ea0: 01067f50 f1439ee3 00000000 00000046 b6d77ae0 c0b383f0 00008946 becc83e8 9ec0: c1b0be80 00000051 0000000b c68ca480 c7172d00 c0ad8ff0 f1439ee3 cf600e40 9ee0: 01600e40 32687465 00000000 00000000 00000000 01067f50 00000000 00000000 9f00: 00000000 5335509c 00008946 00008946 00000000 c68ca480 becc83e8 c05e2de0 9f20: f1439fb0 c03002f0 00000006 5ac3c35a c4816b80 00000006 b6d77ae0 c030caf0 9f40: c4817350 00000014 f1439e1c 0000000c 00000000 00000051 01000000 00000014 9f60: 00003fec f1439edc 00000001 c0372abc b6d77ae0 c0372abc cf600e40 5335509c 9f80: c21e6800 01015c9c 0000000b 00008946 00000036 c03002f0 c4816b80 00000036 9fa0: b6d77ae0 c03000c0 01015c9c 0000000b 0000000b 00008946 becc83e8 00000000 9fc0: 01015c9c 0000000b 00008946 00000036 00000035 010678a0 b6d797ec b6d77ae0 9fe0: b6dbf738 becc838c b6d186d7 b6baa858 40000030 0000000b 00000000 00000000 page_pool_get_stats from mvneta_ethtool_get_stats+0xa0/0xe0 [mvneta] mvneta_ethtool_get_stats [mvneta] from ethtool_get_stats+0x154/0x208 ethtool_get_stats from dev_ethtool+0xf48/0x2480 dev_ethtool from dev_ioctl+0x538/0x63c dev_ioctl from sock_ioctl+0x49c/0x53c sock_ioctl from sys_ioctl+0x134/0xbd8 sys_ioctl from ret_fast_syscall+0x0/0x1c Exception stack(0xf1439fa8 to 0xf1439ff0) 9fa0: 01015c9c 0000000b 0000000b 00008946 becc83e8 00000000 9fc0: 01015c9c 0000000b 00008946 00000036 00000035 010678a0 b6d797ec b6d77ae0 9fe0: b6dbf738 becc838c b6d186d7 b6baa858 Code: e28dd004 e1a05000 e2514000 0a00006a (e5902070) This commit adds the proper checks before calling page_pool_get_stats. Fixes: b3fc79225f05 ("net: mvneta: add support for page_pool_get_stats") Signed-off-by: Sven Auhagen Reported-by: Paulo Da Silva Acked-by: Lorenzo Bianconi Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/mvneta.c | 28 +++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index d483b8c00ec0..165f76d1231c 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -4790,14 +4790,17 @@ static void mvneta_ethtool_get_strings(struct net_device *netdev, u32 sset, u8 *data) { if (sset == ETH_SS_STATS) { + struct mvneta_port *pp = netdev_priv(netdev); int i; for (i = 0; i < ARRAY_SIZE(mvneta_statistics); i++) memcpy(data + i * ETH_GSTRING_LEN, mvneta_statistics[i].name, ETH_GSTRING_LEN); - data += ETH_GSTRING_LEN * ARRAY_SIZE(mvneta_statistics); - page_pool_ethtool_stats_get_strings(data); + if (!pp->bm_priv) { + data += ETH_GSTRING_LEN * ARRAY_SIZE(mvneta_statistics); + page_pool_ethtool_stats_get_strings(data); + } } } @@ -4915,8 +4918,10 @@ static void mvneta_ethtool_pp_stats(struct mvneta_port *pp, u64 *data) struct page_pool_stats stats = {}; int i; - for (i = 0; i < rxq_number; i++) - page_pool_get_stats(pp->rxqs[i].page_pool, &stats); + for (i = 0; i < rxq_number; i++) { + if (pp->rxqs[i].page_pool) + page_pool_get_stats(pp->rxqs[i].page_pool, &stats); + } page_pool_ethtool_stats_get(data, &stats); } @@ -4932,14 +4937,21 @@ static void mvneta_ethtool_get_stats(struct net_device *dev, for (i = 0; i < ARRAY_SIZE(mvneta_statistics); i++) *data++ = pp->ethtool_stats[i]; - mvneta_ethtool_pp_stats(pp, data); + if (!pp->bm_priv) + mvneta_ethtool_pp_stats(pp, data); } static int mvneta_ethtool_get_sset_count(struct net_device *dev, int sset) { - if (sset == ETH_SS_STATS) - return ARRAY_SIZE(mvneta_statistics) + - page_pool_ethtool_stats_get_count(); + if (sset == ETH_SS_STATS) { + int count = ARRAY_SIZE(mvneta_statistics); + struct mvneta_port *pp = netdev_priv(dev); + + if (!pp->bm_priv) + count += page_pool_ethtool_stats_get_count(); + + return count; + } return -EOPNOTSUPP; } From 0735ea00b2711f49bc79d5705b2e1e006a32a056 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sun, 12 Nov 2023 22:16:32 -0500 Subject: [PATCH 0195/1397] ppp: limit MRU to 64K [ Upstream commit c0a2a1b0d631fc460d830f52d06211838874d655 ] ppp_sync_ioctl allows setting device MRU, but does not sanity check this input. Limit to a sane upper bound of 64KB. No implementation I could find generates larger than 64KB frames. RFC 2823 mentions an upper bound of PPP over SDL of 64KB based on the 16-bit length field. Other protocols will be smaller, such as PPPoE (9KB jumbo frame) and PPPoA (18190 maximum CPCS-SDU size, RFC 2364). PPTP and L2TP encapsulate in IP. Syzbot managed to trigger alloc warning in __alloc_pages: if (WARN_ON_ONCE_GFP(order > MAX_ORDER, gfp)) WARNING: CPU: 1 PID: 37 at mm/page_alloc.c:4544 __alloc_pages+0x3ab/0x4a0 mm/page_alloc.c:4544 __alloc_skb+0x12b/0x330 net/core/skbuff.c:651 __netdev_alloc_skb+0x72/0x3f0 net/core/skbuff.c:715 netdev_alloc_skb include/linux/skbuff.h:3225 [inline] dev_alloc_skb include/linux/skbuff.h:3238 [inline] ppp_sync_input drivers/net/ppp/ppp_synctty.c:669 [inline] ppp_sync_receive+0xff/0x680 drivers/net/ppp/ppp_synctty.c:334 tty_ldisc_receive_buf+0x14c/0x180 drivers/tty/tty_buffer.c:390 tty_port_default_receive_buf+0x70/0xb0 drivers/tty/tty_port.c:37 receive_buf drivers/tty/tty_buffer.c:444 [inline] flush_to_ldisc+0x261/0x780 drivers/tty/tty_buffer.c:494 process_one_work+0x884/0x15c0 kernel/workqueue.c:2630 With call ioctl$PPPIOCSMRU1(r1, 0x40047452, &(0x7f0000000100)=0x5e6417a8) Similar code exists in other drivers that implement ppp_channel_ops ioctl PPPIOCSMRU. Those might also be in scope. Notably excluded from this are pppol2tp_ioctl and pppoe_ioctl. This code goes back to the start of git history. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot+6177e1f90d92583bcc58@syzkaller.appspotmail.com Signed-off-by: Willem de Bruijn Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ppp/ppp_synctty.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ppp/ppp_synctty.c b/drivers/net/ppp/ppp_synctty.c index ea261a628786..52d05ce4a281 100644 --- a/drivers/net/ppp/ppp_synctty.c +++ b/drivers/net/ppp/ppp_synctty.c @@ -453,6 +453,10 @@ ppp_sync_ioctl(struct ppp_channel *chan, unsigned int cmd, unsigned long arg) case PPPIOCSMRU: if (get_user(val, (int __user *) argp)) break; + if (val > U16_MAX) { + err = -EINVAL; + break; + } if (val < PPP_MRU) val = PPP_MRU; ap->mru = val; From a7762cb2904603bf5e3001fa29f3ed6713b82daf Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 25 Sep 2023 17:54:13 +0200 Subject: [PATCH 0196/1397] xen/events: fix delayed eoi list handling [ Upstream commit 47d970204054f859f35a2237baa75c2d84fcf436 ] When delaying eoi handling of events, the related elements are queued into the percpu lateeoi list. In case the list isn't empty, the elements should be sorted by the time when eoi handling is to happen. Unfortunately a new element will never be queued at the start of the list, even if it has a handling time lower than all other list elements. Fix that by handling that case the same way as for an empty list. Fixes: e99502f76271 ("xen/events: defer eoi in case of excessive number of events") Reported-by: Jan Beulich Signed-off-by: Juergen Gross Reviewed-by: Oleksandr Tyshchenko Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- drivers/xen/events/events_base.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 2cf0c2b69386..c50419638ac0 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -604,7 +604,9 @@ static void lateeoi_list_add(struct irq_info *info) spin_lock_irqsave(&eoi->eoi_list_lock, flags); - if (list_empty(&eoi->eoi_list)) { + elem = list_first_entry_or_null(&eoi->eoi_list, struct irq_info, + eoi_list); + if (!elem || info->eoi_time < elem->eoi_time) { list_add(&info->eoi_list, &eoi->eoi_list); mod_delayed_work_on(info->eoi_cpu, system_wq, &eoi->delayed, delay); From b80056bd75a16e4550873ecefe12bc8fd190b1cf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 13 Nov 2023 11:52:31 +0800 Subject: [PATCH 0197/1397] blk-mq: make sure active queue usage is held for bio_integrity_prep() [ Upstream commit b0077e269f6c152e807fdac90b58caf012cdbaab ] blk_integrity_unregister() can come if queue usage counter isn't held for one bio with integrity prepared, so this request may be completed with calling profile->complete_fn, then kernel panic. Another constraint is that bio_integrity_prep() needs to be called before bio merge. Fix the issue by: - call bio_integrity_prep() with one queue usage counter grabbed reliably - call bio_integrity_prep() before bio merge Fixes: 900e080752025f00 ("block: move queue enter logic into blk_mq_submit_bio()") Reported-by: Yi Zhang Cc: Christoph Hellwig Signed-off-by: Ming Lei Tested-by: Yi Zhang Link: https://lore.kernel.org/r/20231113035231.2708053-1-ming.lei@redhat.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-mq.c | 75 +++++++++++++++++++++++++------------------------- 1 file changed, 38 insertions(+), 37 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 1fafd54dce3c..6ab7f360ff2a 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2875,11 +2875,8 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q, }; struct request *rq; - if (unlikely(bio_queue_enter(bio))) - return NULL; - if (blk_mq_attempt_bio_merge(q, bio, nsegs)) - goto queue_exit; + return NULL; rq_qos_throttle(q, bio); @@ -2895,35 +2892,23 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q, rq_qos_cleanup(q, bio); if (bio->bi_opf & REQ_NOWAIT) bio_wouldblock_error(bio); -queue_exit: - blk_queue_exit(q); return NULL; } -static inline struct request *blk_mq_get_cached_request(struct request_queue *q, - struct blk_plug *plug, struct bio **bio, unsigned int nsegs) +/* return true if this @rq can be used for @bio */ +static bool blk_mq_can_use_cached_rq(struct request *rq, struct blk_plug *plug, + struct bio *bio) { - struct request *rq; - enum hctx_type type, hctx_type; + enum hctx_type type = blk_mq_get_hctx_type(bio->bi_opf); + enum hctx_type hctx_type = rq->mq_hctx->type; - if (!plug) - return NULL; - rq = rq_list_peek(&plug->cached_rq); - if (!rq || rq->q != q) - return NULL; + WARN_ON_ONCE(rq_list_peek(&plug->cached_rq) != rq); - if (blk_mq_attempt_bio_merge(q, *bio, nsegs)) { - *bio = NULL; - return NULL; - } - - type = blk_mq_get_hctx_type((*bio)->bi_opf); - hctx_type = rq->mq_hctx->type; if (type != hctx_type && !(type == HCTX_TYPE_READ && hctx_type == HCTX_TYPE_DEFAULT)) - return NULL; - if (op_is_flush(rq->cmd_flags) != op_is_flush((*bio)->bi_opf)) - return NULL; + return false; + if (op_is_flush(rq->cmd_flags) != op_is_flush(bio->bi_opf)) + return false; /* * If any qos ->throttle() end up blocking, we will have flushed the @@ -2931,12 +2916,12 @@ static inline struct request *blk_mq_get_cached_request(struct request_queue *q, * before we throttle. */ plug->cached_rq = rq_list_next(rq); - rq_qos_throttle(q, *bio); + rq_qos_throttle(rq->q, bio); blk_mq_rq_time_init(rq, 0); - rq->cmd_flags = (*bio)->bi_opf; + rq->cmd_flags = bio->bi_opf; INIT_LIST_HEAD(&rq->queuelist); - return rq; + return true; } static void bio_set_ioprio(struct bio *bio) @@ -2966,7 +2951,7 @@ void blk_mq_submit_bio(struct bio *bio) struct blk_plug *plug = blk_mq_plug(bio); const int is_sync = op_is_sync(bio->bi_opf); struct blk_mq_hw_ctx *hctx; - struct request *rq; + struct request *rq = NULL; unsigned int nr_segs = 1; blk_status_t ret; @@ -2977,20 +2962,36 @@ void blk_mq_submit_bio(struct bio *bio) return; } - if (!bio_integrity_prep(bio)) - return; - bio_set_ioprio(bio); - rq = blk_mq_get_cached_request(q, plug, &bio, nr_segs); - if (!rq) { - if (!bio) + if (plug) { + rq = rq_list_peek(&plug->cached_rq); + if (rq && rq->q != q) + rq = NULL; + } + if (rq) { + if (!bio_integrity_prep(bio)) return; - rq = blk_mq_get_new_requests(q, plug, bio, nr_segs); - if (unlikely(!rq)) + if (blk_mq_attempt_bio_merge(q, bio, nr_segs)) return; + if (blk_mq_can_use_cached_rq(rq, plug, bio)) + goto done; + percpu_ref_get(&q->q_usage_counter); + } else { + if (unlikely(bio_queue_enter(bio))) + return; + if (!bio_integrity_prep(bio)) + goto fail; + } + + rq = blk_mq_get_new_requests(q, plug, bio, nr_segs); + if (unlikely(!rq)) { +fail: + blk_queue_exit(q); + return; } +done: trace_block_getrq(bio); rq_qos_track(q, rq, bio); From c86085585de15a2e73697e6adfc86b5baa891fff Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Nov 2023 17:48:59 +0000 Subject: [PATCH 0198/1397] ptp: annotate data-race around q->head and q->tail [ Upstream commit 73bde5a3294853947252cd9092a3517c7cb0cd2d ] As I was working on a syzbot report, I found that KCSAN would probably complain that reading q->head or q->tail without barriers could lead to invalid results. Add corresponding READ_ONCE() and WRITE_ONCE() to avoid load-store tearing. Fixes: d94ba80ebbea ("ptp: Added a brand new class driver for ptp clocks.") Signed-off-by: Eric Dumazet Acked-by: Richard Cochran Link: https://lore.kernel.org/r/20231109174859.3995880-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/ptp/ptp_chardev.c | 3 ++- drivers/ptp/ptp_clock.c | 5 +++-- drivers/ptp/ptp_private.h | 8 ++++++-- drivers/ptp/ptp_sysfs.c | 3 ++- 4 files changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index 362bf756e6b7..5a3a4cc0bec8 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -490,7 +490,8 @@ ssize_t ptp_read(struct posix_clock *pc, for (i = 0; i < cnt; i++) { event[i] = queue->buf[queue->head]; - queue->head = (queue->head + 1) % PTP_MAX_TIMESTAMPS; + /* Paired with READ_ONCE() in queue_cnt() */ + WRITE_ONCE(queue->head, (queue->head + 1) % PTP_MAX_TIMESTAMPS); } spin_unlock_irqrestore(&queue->lock, flags); diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index 80f74e38c2da..9a50bfb56453 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -56,10 +56,11 @@ static void enqueue_external_timestamp(struct timestamp_event_queue *queue, dst->t.sec = seconds; dst->t.nsec = remainder; + /* Both WRITE_ONCE() are paired with READ_ONCE() in queue_cnt() */ if (!queue_free(queue)) - queue->head = (queue->head + 1) % PTP_MAX_TIMESTAMPS; + WRITE_ONCE(queue->head, (queue->head + 1) % PTP_MAX_TIMESTAMPS); - queue->tail = (queue->tail + 1) % PTP_MAX_TIMESTAMPS; + WRITE_ONCE(queue->tail, (queue->tail + 1) % PTP_MAX_TIMESTAMPS); spin_unlock_irqrestore(&queue->lock, flags); } diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h index 75f58fc468a7..b8d4f61f14be 100644 --- a/drivers/ptp/ptp_private.h +++ b/drivers/ptp/ptp_private.h @@ -76,9 +76,13 @@ struct ptp_vclock { * that a writer might concurrently increment the tail does not * matter, since the queue remains nonempty nonetheless. */ -static inline int queue_cnt(struct timestamp_event_queue *q) +static inline int queue_cnt(const struct timestamp_event_queue *q) { - int cnt = q->tail - q->head; + /* + * Paired with WRITE_ONCE() in enqueue_external_timestamp(), + * ptp_read(), extts_fifo_show(). + */ + int cnt = READ_ONCE(q->tail) - READ_ONCE(q->head); return cnt < 0 ? PTP_MAX_TIMESTAMPS + cnt : cnt; } diff --git a/drivers/ptp/ptp_sysfs.c b/drivers/ptp/ptp_sysfs.c index 6e4d5456a885..34ea5c16123a 100644 --- a/drivers/ptp/ptp_sysfs.c +++ b/drivers/ptp/ptp_sysfs.c @@ -90,7 +90,8 @@ static ssize_t extts_fifo_show(struct device *dev, qcnt = queue_cnt(queue); if (qcnt) { event = queue->buf[queue->head]; - queue->head = (queue->head + 1) % PTP_MAX_TIMESTAMPS; + /* Paired with READ_ONCE() in queue_cnt() */ + WRITE_ONCE(queue->head, (queue->head + 1) % PTP_MAX_TIMESTAMPS); } spin_unlock_irqrestore(&queue->lock, flags); From d98c91215a5748a0f536e7ccea26027005196859 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Nov 2023 18:01:02 +0000 Subject: [PATCH 0199/1397] bonding: stop the device in bond_setup_by_slave() [ Upstream commit 3cffa2ddc4d3fcf70cde361236f5a614f81a09b2 ] Commit 9eed321cde22 ("net: lapbether: only support ethernet devices") has been able to keep syzbot away from net/lapb, until today. In the following splat [1], the issue is that a lapbether device has been created on a bonding device without members. Then adding a non ARPHRD_ETHER member forced the bonding master to change its type. The fix is to make sure we call dev_close() in bond_setup_by_slave() so that the potential linked lapbether devices (or any other devices having assumptions on the physical device) are removed. A similar bug has been addressed in commit 40baec225765 ("bonding: fix panic on non-ARPHRD_ETHER enslave failure") [1] skbuff: skb_under_panic: text:ffff800089508810 len:44 put:40 head:ffff0000c78e7c00 data:ffff0000c78e7bea tail:0x16 end:0x140 dev:bond0 kernel BUG at net/core/skbuff.c:192 ! Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP Modules linked in: CPU: 0 PID: 6007 Comm: syz-executor383 Not tainted 6.6.0-rc3-syzkaller-gbf6547d8715b #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/04/2023 pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : skb_panic net/core/skbuff.c:188 [inline] pc : skb_under_panic+0x13c/0x140 net/core/skbuff.c:202 lr : skb_panic net/core/skbuff.c:188 [inline] lr : skb_under_panic+0x13c/0x140 net/core/skbuff.c:202 sp : ffff800096a06aa0 x29: ffff800096a06ab0 x28: ffff800096a06ba0 x27: dfff800000000000 x26: ffff0000ce9b9b50 x25: 0000000000000016 x24: ffff0000c78e7bea x23: ffff0000c78e7c00 x22: 000000000000002c x21: 0000000000000140 x20: 0000000000000028 x19: ffff800089508810 x18: ffff800096a06100 x17: 0000000000000000 x16: ffff80008a629a3c x15: 0000000000000001 x14: 1fffe00036837a32 x13: 0000000000000000 x12: 0000000000000000 x11: 0000000000000201 x10: 0000000000000000 x9 : cb50b496c519aa00 x8 : cb50b496c519aa00 x7 : 0000000000000001 x6 : 0000000000000001 x5 : ffff800096a063b8 x4 : ffff80008e280f80 x3 : ffff8000805ad11c x2 : 0000000000000001 x1 : 0000000100000201 x0 : 0000000000000086 Call trace: skb_panic net/core/skbuff.c:188 [inline] skb_under_panic+0x13c/0x140 net/core/skbuff.c:202 skb_push+0xf0/0x108 net/core/skbuff.c:2446 ip6gre_header+0xbc/0x738 net/ipv6/ip6_gre.c:1384 dev_hard_header include/linux/netdevice.h:3136 [inline] lapbeth_data_transmit+0x1c4/0x298 drivers/net/wan/lapbether.c:257 lapb_data_transmit+0x8c/0xb0 net/lapb/lapb_iface.c:447 lapb_transmit_buffer+0x178/0x204 net/lapb/lapb_out.c:149 lapb_send_control+0x220/0x320 net/lapb/lapb_subr.c:251 __lapb_disconnect_request+0x9c/0x17c net/lapb/lapb_iface.c:326 lapb_device_event+0x288/0x4e0 net/lapb/lapb_iface.c:492 notifier_call_chain+0x1a4/0x510 kernel/notifier.c:93 raw_notifier_call_chain+0x3c/0x50 kernel/notifier.c:461 call_netdevice_notifiers_info net/core/dev.c:1970 [inline] call_netdevice_notifiers_extack net/core/dev.c:2008 [inline] call_netdevice_notifiers net/core/dev.c:2022 [inline] __dev_close_many+0x1b8/0x3c4 net/core/dev.c:1508 dev_close_many+0x1e0/0x470 net/core/dev.c:1559 dev_close+0x174/0x250 net/core/dev.c:1585 lapbeth_device_event+0x2e4/0x958 drivers/net/wan/lapbether.c:466 notifier_call_chain+0x1a4/0x510 kernel/notifier.c:93 raw_notifier_call_chain+0x3c/0x50 kernel/notifier.c:461 call_netdevice_notifiers_info net/core/dev.c:1970 [inline] call_netdevice_notifiers_extack net/core/dev.c:2008 [inline] call_netdevice_notifiers net/core/dev.c:2022 [inline] __dev_close_many+0x1b8/0x3c4 net/core/dev.c:1508 dev_close_many+0x1e0/0x470 net/core/dev.c:1559 dev_close+0x174/0x250 net/core/dev.c:1585 bond_enslave+0x2298/0x30cc drivers/net/bonding/bond_main.c:2332 bond_do_ioctl+0x268/0xc64 drivers/net/bonding/bond_main.c:4539 dev_ifsioc+0x754/0x9ac dev_ioctl+0x4d8/0xd34 net/core/dev_ioctl.c:786 sock_do_ioctl+0x1d4/0x2d0 net/socket.c:1217 sock_ioctl+0x4e8/0x834 net/socket.c:1322 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:871 [inline] __se_sys_ioctl fs/ioctl.c:857 [inline] __arm64_sys_ioctl+0x14c/0x1c8 fs/ioctl.c:857 __invoke_syscall arch/arm64/kernel/syscall.c:37 [inline] invoke_syscall+0x98/0x2b8 arch/arm64/kernel/syscall.c:51 el0_svc_common+0x130/0x23c arch/arm64/kernel/syscall.c:136 do_el0_svc+0x48/0x58 arch/arm64/kernel/syscall.c:155 el0_svc+0x58/0x16c arch/arm64/kernel/entry-common.c:678 el0t_64_sync_handler+0x84/0xfc arch/arm64/kernel/entry-common.c:696 el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:591 Code: aa1803e6 aa1903e7 a90023f5 94785b8b (d4210000) Fixes: 872254dd6b1f ("net/bonding: Enable bonding to enslave non ARPHRD_ETHER") Reported-by: syzbot Signed-off-by: Eric Dumazet Acked-by: Jay Vosburgh Reviewed-by: Hangbin Liu Link: https://lore.kernel.org/r/20231109180102.4085183-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/bonding/bond_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 51d47eda1c87..8e6cc0e133b7 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1500,6 +1500,10 @@ static void bond_compute_features(struct bonding *bond) static void bond_setup_by_slave(struct net_device *bond_dev, struct net_device *slave_dev) { + bool was_up = !!(bond_dev->flags & IFF_UP); + + dev_close(bond_dev); + bond_dev->header_ops = slave_dev->header_ops; bond_dev->type = slave_dev->type; @@ -1514,6 +1518,8 @@ static void bond_setup_by_slave(struct net_device *bond_dev, bond_dev->flags &= ~(IFF_BROADCAST | IFF_MULTICAST); bond_dev->flags |= (IFF_POINTOPOINT | IFF_NOARP); } + if (was_up) + dev_open(bond_dev, NULL); } /* On bonding slaves other than the currently active slave, suppress From fa5fcabf5a25e3bad0fa5e432531d533b51b2ecd Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 9 Nov 2023 10:03:12 +0100 Subject: [PATCH 0200/1397] net: ethernet: cortina: Fix max RX frame define [ Upstream commit 510e35fb931ffc3b100e5d5ae4595cd3beca9f1a ] Enumerator 3 is 1548 bytes according to the datasheet. Not 1542. Fixes: 4d5ae32f5e1e ("net: ethernet: Add a driver for Gemini gigabit ethernet") Reviewed-by: Andrew Lunn Signed-off-by: Linus Walleij Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20231109-gemini-largeframe-fix-v4-1-6e611528db08@linaro.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/cortina/gemini.c | 4 ++-- drivers/net/ethernet/cortina/gemini.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c index a8b9d1a3e4d5..5bdd1b252840 100644 --- a/drivers/net/ethernet/cortina/gemini.c +++ b/drivers/net/ethernet/cortina/gemini.c @@ -432,8 +432,8 @@ static const struct gmac_max_framelen gmac_maxlens[] = { .val = CONFIG0_MAXLEN_1536, }, { - .max_l3_len = 1542, - .val = CONFIG0_MAXLEN_1542, + .max_l3_len = 1548, + .val = CONFIG0_MAXLEN_1548, }, { .max_l3_len = 9212, diff --git a/drivers/net/ethernet/cortina/gemini.h b/drivers/net/ethernet/cortina/gemini.h index 9fdf77d5eb37..99efb1155743 100644 --- a/drivers/net/ethernet/cortina/gemini.h +++ b/drivers/net/ethernet/cortina/gemini.h @@ -787,7 +787,7 @@ union gmac_config0 { #define CONFIG0_MAXLEN_1536 0 #define CONFIG0_MAXLEN_1518 1 #define CONFIG0_MAXLEN_1522 2 -#define CONFIG0_MAXLEN_1542 3 +#define CONFIG0_MAXLEN_1548 3 #define CONFIG0_MAXLEN_9k 4 /* 9212 */ #define CONFIG0_MAXLEN_10k 5 /* 10236 */ #define CONFIG0_MAXLEN_1518__6 6 From fdd624d1376f3e9f0ec0485f0bf4e3d860643def Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 9 Nov 2023 10:03:13 +0100 Subject: [PATCH 0201/1397] net: ethernet: cortina: Handle large frames [ Upstream commit d4d0c5b4d279bfe3585fbd806efefd3e51c82afa ] The Gemini ethernet controller provides hardware checksumming for frames up to 1514 bytes including ethernet headers but not FCS. If we start sending bigger frames (after first bumping up the MTU on both interfaces sending and receiving the frames), truncated packets start to appear on the target such as in this tcpdump resulting from ping -s 1474: 23:34:17.241983 14:d6:4d:a8:3c:4f (oui Unknown) > bc:ae:c5:6b:a8:3d (oui Unknown), ethertype IPv4 (0x0800), length 1514: truncated-ip - 2 bytes missing! (tos 0x0, ttl 64, id 32653, offset 0, flags [DF], proto ICMP (1), length 1502) OpenWrt.lan > Fecusia: ICMP echo request, id 1672, seq 50, length 1482 If we bypass the hardware checksumming and provide a software fallback, everything starts working fine up to the max TX MTU of 2047 bytes, for example ping -s2000 192.168.1.2: 00:44:29.587598 bc:ae:c5:6b:a8:3d (oui Unknown) > 14:d6:4d:a8:3c:4f (oui Unknown), ethertype IPv4 (0x0800), length 2042: (tos 0x0, ttl 64, id 51828, offset 0, flags [none], proto ICMP (1), length 2028) Fecusia > OpenWrt.lan: ICMP echo reply, id 1683, seq 4, length 2008 The bit enabling to bypass hardware checksum (or any of the "TSS" bits) are undocumented in the hardware reference manual. The entire hardware checksum unit appears undocumented. The conclusion that we need to use the "bypass" bit was found by trial-and-error. Since no hardware checksum will happen, we slot in a software checksum fallback. Check for the condition where we need to compute checksum on the skb with either hardware or software using == CHECKSUM_PARTIAL instead of != CHECKSUM_NONE which is an incomplete check according to . On the D-Link DIR-685 router this fixes a bug on the conduit interface to the RTL8366RB DSA switch: as the switch needs to add space for its tag it increases the MTU on the conduit interface to 1504 and that means that when the router sends packages of 1500 bytes these get an extra 4 bytes of DSA tag and the transfer fails because of the erroneous hardware checksumming, affecting such basic functionality as the LuCI web interface. Fixes: 4d5ae32f5e1e ("net: ethernet: Add a driver for Gemini gigabit ethernet") Signed-off-by: Linus Walleij Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20231109-gemini-largeframe-fix-v4-2-6e611528db08@linaro.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/cortina/gemini.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c index 5bdd1b252840..dbbccef86516 100644 --- a/drivers/net/ethernet/cortina/gemini.c +++ b/drivers/net/ethernet/cortina/gemini.c @@ -1145,6 +1145,7 @@ static int gmac_map_tx_bufs(struct net_device *netdev, struct sk_buff *skb, dma_addr_t mapping; unsigned short mtu; void *buffer; + int ret; mtu = ETH_HLEN; mtu += netdev->mtu; @@ -1159,9 +1160,30 @@ static int gmac_map_tx_bufs(struct net_device *netdev, struct sk_buff *skb, word3 |= mtu; } - if (skb->ip_summed != CHECKSUM_NONE) { + if (skb->len >= ETH_FRAME_LEN) { + /* Hardware offloaded checksumming isn't working on frames + * bigger than 1514 bytes. A hypothesis about this is that the + * checksum buffer is only 1518 bytes, so when the frames get + * bigger they get truncated, or the last few bytes get + * overwritten by the FCS. + * + * Just use software checksumming and bypass on bigger frames. + */ + if (skb->ip_summed == CHECKSUM_PARTIAL) { + ret = skb_checksum_help(skb); + if (ret) + return ret; + } + word1 |= TSS_BYPASS_BIT; + } else if (skb->ip_summed == CHECKSUM_PARTIAL) { int tcp = 0; + /* We do not switch off the checksumming on non TCP/UDP + * frames: as is shown from tests, the checksumming engine + * is smart enough to see that a frame is not actually TCP + * or UDP and then just pass it through without any changes + * to the frame. + */ if (skb->protocol == htons(ETH_P_IP)) { word1 |= TSS_IP_CHKSUM_BIT; tcp = ip_hdr(skb)->protocol == IPPROTO_TCP; From 2fcf4d7c1d2e18133733c0cba7b3a7b4b56135a3 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 9 Nov 2023 10:03:14 +0100 Subject: [PATCH 0202/1397] net: ethernet: cortina: Fix MTU max setting [ Upstream commit dc6c0bfbaa947dd7976e30e8c29b10c868b6fa42 ] The RX max frame size is over 10000 for the Gemini ethernet, but the TX max frame size is actually just 2047 (0x7ff after checking the datasheet). Reflect this in what we offer to Linux, cap the MTU at the TX max frame minus ethernet headers. We delete the code disabling the hardware checksum for large MTUs as netdev->mtu can no longer be larger than netdev->max_mtu meaning the if()-clause in gmac_fix_features() is never true. Fixes: 4d5ae32f5e1e ("net: ethernet: Add a driver for Gemini gigabit ethernet") Reviewed-by: Andrew Lunn Signed-off-by: Linus Walleij Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20231109-gemini-largeframe-fix-v4-3-6e611528db08@linaro.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/cortina/gemini.c | 17 ++++------------- drivers/net/ethernet/cortina/gemini.h | 2 +- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c index dbbccef86516..636949737d72 100644 --- a/drivers/net/ethernet/cortina/gemini.c +++ b/drivers/net/ethernet/cortina/gemini.c @@ -2000,15 +2000,6 @@ static int gmac_change_mtu(struct net_device *netdev, int new_mtu) return 0; } -static netdev_features_t gmac_fix_features(struct net_device *netdev, - netdev_features_t features) -{ - if (netdev->mtu + ETH_HLEN + VLAN_HLEN > MTU_SIZE_BIT_MASK) - features &= ~GMAC_OFFLOAD_FEATURES; - - return features; -} - static int gmac_set_features(struct net_device *netdev, netdev_features_t features) { @@ -2234,7 +2225,6 @@ static const struct net_device_ops gmac_351x_ops = { .ndo_set_mac_address = gmac_set_mac_address, .ndo_get_stats64 = gmac_get_stats64, .ndo_change_mtu = gmac_change_mtu, - .ndo_fix_features = gmac_fix_features, .ndo_set_features = gmac_set_features, }; @@ -2486,11 +2476,12 @@ static int gemini_ethernet_port_probe(struct platform_device *pdev) netdev->hw_features = GMAC_OFFLOAD_FEATURES; netdev->features |= GMAC_OFFLOAD_FEATURES | NETIF_F_GRO; - /* We can handle jumbo frames up to 10236 bytes so, let's accept - * payloads of 10236 bytes minus VLAN and ethernet header + /* We can receive jumbo frames up to 10236 bytes but only + * transmit 2047 bytes so, let's accept payloads of 2047 + * bytes minus VLAN and ethernet header */ netdev->min_mtu = ETH_MIN_MTU; - netdev->max_mtu = 10236 - VLAN_ETH_HLEN; + netdev->max_mtu = MTU_SIZE_BIT_MASK - VLAN_ETH_HLEN; port->freeq_refill = 0; netif_napi_add(netdev, &port->napi, gmac_napi_poll); diff --git a/drivers/net/ethernet/cortina/gemini.h b/drivers/net/ethernet/cortina/gemini.h index 99efb1155743..24bb989981f2 100644 --- a/drivers/net/ethernet/cortina/gemini.h +++ b/drivers/net/ethernet/cortina/gemini.h @@ -502,7 +502,7 @@ union gmac_txdesc_3 { #define SOF_BIT 0x80000000 #define EOF_BIT 0x40000000 #define EOFIE_BIT BIT(29) -#define MTU_SIZE_BIT_MASK 0x1fff +#define MTU_SIZE_BIT_MASK 0x7ff /* Max MTU 2047 bytes */ /* GMAC Tx Descriptor */ struct gmac_txdesc { From 069a3ec329ff43e7869a3d94c62cd03203016bce Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 13 Nov 2023 13:49:38 +0000 Subject: [PATCH 0203/1397] af_unix: fix use-after-free in unix_stream_read_actor() [ Upstream commit 4b7b492615cf3017190f55444f7016812b66611d ] syzbot reported the following crash [1] After releasing unix socket lock, u->oob_skb can be changed by another thread. We must temporarily increase skb refcount to make sure this other thread will not free the skb under us. [1] BUG: KASAN: slab-use-after-free in unix_stream_read_actor+0xa7/0xc0 net/unix/af_unix.c:2866 Read of size 4 at addr ffff88801f3b9cc4 by task syz-executor107/5297 CPU: 1 PID: 5297 Comm: syz-executor107 Not tainted 6.6.0-syzkaller-15910-gb8e3a87a627b #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/09/2023 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd9/0x1b0 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:364 [inline] print_report+0xc4/0x620 mm/kasan/report.c:475 kasan_report+0xda/0x110 mm/kasan/report.c:588 unix_stream_read_actor+0xa7/0xc0 net/unix/af_unix.c:2866 unix_stream_recv_urg net/unix/af_unix.c:2587 [inline] unix_stream_read_generic+0x19a5/0x2480 net/unix/af_unix.c:2666 unix_stream_recvmsg+0x189/0x1b0 net/unix/af_unix.c:2903 sock_recvmsg_nosec net/socket.c:1044 [inline] sock_recvmsg+0xe2/0x170 net/socket.c:1066 ____sys_recvmsg+0x21f/0x5c0 net/socket.c:2803 ___sys_recvmsg+0x115/0x1a0 net/socket.c:2845 __sys_recvmsg+0x114/0x1e0 net/socket.c:2875 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x3f/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b RIP: 0033:0x7fc67492c559 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 51 18 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fc6748ab228 EFLAGS: 00000246 ORIG_RAX: 000000000000002f RAX: ffffffffffffffda RBX: 000000000000001c RCX: 00007fc67492c559 RDX: 0000000040010083 RSI: 0000000020000140 RDI: 0000000000000004 RBP: 00007fc6749b6348 R08: 00007fc6748ab6c0 R09: 00007fc6748ab6c0 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fc6749b6340 R13: 00007fc6749b634c R14: 00007ffe9fac52a0 R15: 00007ffe9fac5388 Allocated by task 5295: kasan_save_stack+0x33/0x50 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 __kasan_slab_alloc+0x81/0x90 mm/kasan/common.c:328 kasan_slab_alloc include/linux/kasan.h:188 [inline] slab_post_alloc_hook mm/slab.h:763 [inline] slab_alloc_node mm/slub.c:3478 [inline] kmem_cache_alloc_node+0x180/0x3c0 mm/slub.c:3523 __alloc_skb+0x287/0x330 net/core/skbuff.c:641 alloc_skb include/linux/skbuff.h:1286 [inline] alloc_skb_with_frags+0xe4/0x710 net/core/skbuff.c:6331 sock_alloc_send_pskb+0x7e4/0x970 net/core/sock.c:2780 sock_alloc_send_skb include/net/sock.h:1884 [inline] queue_oob net/unix/af_unix.c:2147 [inline] unix_stream_sendmsg+0xb5f/0x10a0 net/unix/af_unix.c:2301 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0xd5/0x180 net/socket.c:745 ____sys_sendmsg+0x6ac/0x940 net/socket.c:2584 ___sys_sendmsg+0x135/0x1d0 net/socket.c:2638 __sys_sendmsg+0x117/0x1e0 net/socket.c:2667 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x3f/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b Freed by task 5295: kasan_save_stack+0x33/0x50 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 kasan_save_free_info+0x2b/0x40 mm/kasan/generic.c:522 ____kasan_slab_free mm/kasan/common.c:236 [inline] ____kasan_slab_free+0x15b/0x1b0 mm/kasan/common.c:200 kasan_slab_free include/linux/kasan.h:164 [inline] slab_free_hook mm/slub.c:1800 [inline] slab_free_freelist_hook+0x114/0x1e0 mm/slub.c:1826 slab_free mm/slub.c:3809 [inline] kmem_cache_free+0xf8/0x340 mm/slub.c:3831 kfree_skbmem+0xef/0x1b0 net/core/skbuff.c:1015 __kfree_skb net/core/skbuff.c:1073 [inline] consume_skb net/core/skbuff.c:1288 [inline] consume_skb+0xdf/0x170 net/core/skbuff.c:1282 queue_oob net/unix/af_unix.c:2178 [inline] unix_stream_sendmsg+0xd49/0x10a0 net/unix/af_unix.c:2301 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0xd5/0x180 net/socket.c:745 ____sys_sendmsg+0x6ac/0x940 net/socket.c:2584 ___sys_sendmsg+0x135/0x1d0 net/socket.c:2638 __sys_sendmsg+0x117/0x1e0 net/socket.c:2667 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x3f/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b The buggy address belongs to the object at ffff88801f3b9c80 which belongs to the cache skbuff_head_cache of size 240 The buggy address is located 68 bytes inside of freed 240-byte region [ffff88801f3b9c80, ffff88801f3b9d70) The buggy address belongs to the physical page: page:ffffea00007cee40 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1f3b9 flags: 0xfff00000000800(slab|node=0|zone=1|lastcpupid=0x7ff) page_type: 0xffffffff() raw: 00fff00000000800 ffff888142a60640 dead000000000122 0000000000000000 raw: 0000000000000000 00000000000c000c 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 0, migratetype Unmovable, gfp_mask 0x12cc0(GFP_KERNEL|__GFP_NOWARN|__GFP_NORETRY), pid 5299, tgid 5283 (syz-executor107), ts 103803840339, free_ts 103600093431 set_page_owner include/linux/page_owner.h:31 [inline] post_alloc_hook+0x2cf/0x340 mm/page_alloc.c:1537 prep_new_page mm/page_alloc.c:1544 [inline] get_page_from_freelist+0xa25/0x36c0 mm/page_alloc.c:3312 __alloc_pages+0x1d0/0x4a0 mm/page_alloc.c:4568 alloc_pages_mpol+0x258/0x5f0 mm/mempolicy.c:2133 alloc_slab_page mm/slub.c:1870 [inline] allocate_slab+0x251/0x380 mm/slub.c:2017 new_slab mm/slub.c:2070 [inline] ___slab_alloc+0x8c7/0x1580 mm/slub.c:3223 __slab_alloc.constprop.0+0x56/0xa0 mm/slub.c:3322 __slab_alloc_node mm/slub.c:3375 [inline] slab_alloc_node mm/slub.c:3468 [inline] kmem_cache_alloc_node+0x132/0x3c0 mm/slub.c:3523 __alloc_skb+0x287/0x330 net/core/skbuff.c:641 alloc_skb include/linux/skbuff.h:1286 [inline] alloc_skb_with_frags+0xe4/0x710 net/core/skbuff.c:6331 sock_alloc_send_pskb+0x7e4/0x970 net/core/sock.c:2780 sock_alloc_send_skb include/net/sock.h:1884 [inline] queue_oob net/unix/af_unix.c:2147 [inline] unix_stream_sendmsg+0xb5f/0x10a0 net/unix/af_unix.c:2301 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0xd5/0x180 net/socket.c:745 ____sys_sendmsg+0x6ac/0x940 net/socket.c:2584 ___sys_sendmsg+0x135/0x1d0 net/socket.c:2638 __sys_sendmsg+0x117/0x1e0 net/socket.c:2667 page last free stack trace: reset_page_owner include/linux/page_owner.h:24 [inline] free_pages_prepare mm/page_alloc.c:1137 [inline] free_unref_page_prepare+0x4f8/0xa90 mm/page_alloc.c:2347 free_unref_page+0x33/0x3b0 mm/page_alloc.c:2487 __unfreeze_partials+0x21d/0x240 mm/slub.c:2655 qlink_free mm/kasan/quarantine.c:168 [inline] qlist_free_all+0x6a/0x170 mm/kasan/quarantine.c:187 kasan_quarantine_reduce+0x18e/0x1d0 mm/kasan/quarantine.c:294 __kasan_slab_alloc+0x65/0x90 mm/kasan/common.c:305 kasan_slab_alloc include/linux/kasan.h:188 [inline] slab_post_alloc_hook mm/slab.h:763 [inline] slab_alloc_node mm/slub.c:3478 [inline] slab_alloc mm/slub.c:3486 [inline] __kmem_cache_alloc_lru mm/slub.c:3493 [inline] kmem_cache_alloc+0x15d/0x380 mm/slub.c:3502 vm_area_dup+0x21/0x2f0 kernel/fork.c:500 __split_vma+0x17d/0x1070 mm/mmap.c:2365 split_vma mm/mmap.c:2437 [inline] vma_modify+0x25d/0x450 mm/mmap.c:2472 vma_modify_flags include/linux/mm.h:3271 [inline] mprotect_fixup+0x228/0xc80 mm/mprotect.c:635 do_mprotect_pkey+0x852/0xd60 mm/mprotect.c:809 __do_sys_mprotect mm/mprotect.c:830 [inline] __se_sys_mprotect mm/mprotect.c:827 [inline] __x64_sys_mprotect+0x78/0xb0 mm/mprotect.c:827 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x3f/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b Memory state around the buggy address: ffff88801f3b9b80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88801f3b9c00: fb fb fb fb fb fb fc fc fc fc fc fc fc fc fc fc >ffff88801f3b9c80: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88801f3b9d00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fc fc ffff88801f3b9d80: fc fc fc fc fc fc fc fc fa fb fb fb fb fb fb fb Fixes: 876c14ad014d ("af_unix: fix holding spinlock in oob handling") Reported-and-tested-by: syzbot+7a2d546fa43e49315ed3@syzkaller.appspotmail.com Signed-off-by: Eric Dumazet Cc: Rao Shoaib Reviewed-by: Rao shoaib Link: https://lore.kernel.org/r/20231113134938.168151-1-edumazet@google.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/unix/af_unix.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 3e8a04a13668..3e6eeacb13ae 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2553,15 +2553,16 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state) if (!(state->flags & MSG_PEEK)) WRITE_ONCE(u->oob_skb, NULL); - + else + skb_get(oob_skb); unix_state_unlock(sk); chunk = state->recv_actor(oob_skb, 0, chunk, state); - if (!(state->flags & MSG_PEEK)) { + if (!(state->flags & MSG_PEEK)) UNIXCB(oob_skb).consumed += 1; - kfree_skb(oob_skb); - } + + consume_skb(oob_skb); mutex_unlock(&u->iolock); From 5772b7309818c5c447d01d1f3eae9f93f2c64a6b Mon Sep 17 00:00:00 2001 From: Linkui Xiao Date: Wed, 1 Nov 2023 11:20:18 +0800 Subject: [PATCH 0204/1397] netfilter: nf_conntrack_bridge: initialize err to 0 [ Upstream commit a44af08e3d4d7566eeea98d7a29fe06e7b9de944 ] K2CI reported a problem: consume_skb(skb); return err; [nf_br_ip_fragment() error] uninitialized symbol 'err'. err is not initialized, because returning 0 is expected, initialize err to 0. Fixes: 3c171f496ef5 ("netfilter: bridge: add connection tracking system") Reported-by: k2ci Signed-off-by: Linkui Xiao Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/bridge/netfilter/nf_conntrack_bridge.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c index 71056ee84773..0fcf357ea7ad 100644 --- a/net/bridge/netfilter/nf_conntrack_bridge.c +++ b/net/bridge/netfilter/nf_conntrack_bridge.c @@ -37,7 +37,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk, ktime_t tstamp = skb->tstamp; struct ip_frag_state state; struct iphdr *iph; - int err; + int err = 0; /* for offloaded checksums cleanup checksum before fragmentation */ if (skb->ip_summed == CHECKSUM_PARTIAL && From 013deed31ab15ef287b0045e4e7bd8f250e75b94 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 3 Nov 2023 09:42:51 +0300 Subject: [PATCH 0205/1397] netfilter: nf_tables: fix pointer math issue in nft_byteorder_eval() [ Upstream commit c301f0981fdd3fd1ffac6836b423c4d7a8e0eb63 ] The problem is in nft_byteorder_eval() where we are iterating through a loop and writing to dst[0], dst[1], dst[2] and so on... On each iteration we are writing 8 bytes. But dst[] is an array of u32 so each element only has space for 4 bytes. That means that every iteration overwrites part of the previous element. I spotted this bug while reviewing commit caf3ef7468f7 ("netfilter: nf_tables: prevent OOB access in nft_byteorder_eval") which is a related issue. I think that the reason we have not detected this bug in testing is that most of time we only write one element. Fixes: ce1e7989d989 ("netfilter: nft_byteorder: provide 64bit le/be conversion") Signed-off-by: Dan Carpenter Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- include/net/netfilter/nf_tables.h | 4 ++-- net/netfilter/nft_byteorder.c | 5 +++-- net/netfilter/nft_meta.c | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 7c816359d5a9..75972e211ba1 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -178,9 +178,9 @@ static inline __be32 nft_reg_load_be32(const u32 *sreg) return *(__force __be32 *)sreg; } -static inline void nft_reg_store64(u32 *dreg, u64 val) +static inline void nft_reg_store64(u64 *dreg, u64 val) { - put_unaligned(val, (u64 *)dreg); + put_unaligned(val, dreg); } static inline u64 nft_reg_load64(const u32 *sreg) diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index e596d1a842f7..f6e791a68101 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -38,13 +38,14 @@ void nft_byteorder_eval(const struct nft_expr *expr, switch (priv->size) { case 8: { + u64 *dst64 = (void *)dst; u64 src64; switch (priv->op) { case NFT_BYTEORDER_NTOH: for (i = 0; i < priv->len / 8; i++) { src64 = nft_reg_load64(&src[i]); - nft_reg_store64(&dst[i], + nft_reg_store64(&dst64[i], be64_to_cpu((__force __be64)src64)); } break; @@ -52,7 +53,7 @@ void nft_byteorder_eval(const struct nft_expr *expr, for (i = 0; i < priv->len / 8; i++) { src64 = (__force __u64) cpu_to_be64(nft_reg_load64(&src[i])); - nft_reg_store64(&dst[i], src64); + nft_reg_store64(&dst64[i], src64); } break; } diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index f7da7c43333b..ba0d3683a45d 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -63,7 +63,7 @@ nft_meta_get_eval_time(enum nft_meta_keys key, { switch (key) { case NFT_META_TIME_NS: - nft_reg_store64(dest, ktime_get_real_ns()); + nft_reg_store64((u64 *)dest, ktime_get_real_ns()); break; case NFT_META_TIME_DAY: nft_reg_store8(dest, nft_meta_weekday()); From f603b616bafe242efd597dfaef42b22ae6ade0ce Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 13 Nov 2023 20:34:56 +0100 Subject: [PATCH 0206/1397] netfilter: nf_tables: bogus ENOENT when destroying element which does not exist [ Upstream commit a7d5a955bfa854ac6b0c53aaf933394b4e6139e4 ] destroy element command bogusly reports ENOENT in case a set element does not exist. ENOENT errors are skipped, however, err is still set and propagated to userspace. # nft destroy element ip raw BLACKLIST { 1.2.3.4 } Error: Could not process rule: No such file or directory destroy element ip raw BLACKLIST { 1.2.3.4 } ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Fixes: f80a612dd77c ("netfilter: nf_tables: add support to destroy operation") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 3bf428a188cc..3807c6c1181f 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -7205,10 +7205,11 @@ static int nf_tables_delsetelem(struct sk_buff *skb, if (err < 0) { NL_SET_BAD_ATTR(extack, attr); - break; + return err; } } - return err; + + return 0; } /* From e5d20035855eb6d62eb78a5e5f0ac0a33fbdea01 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Mon, 13 Nov 2023 19:42:49 +0200 Subject: [PATCH 0207/1397] net: stmmac: fix rx budget limit check [ Upstream commit fa02de9e75889915b554eda1964a631fd019973b ] The while loop condition verifies 'count < limit'. Neither value change before the 'count >= limit' check. As is this check is dead code. But code inspection reveals a code path that modifies 'count' and then goto 'drain_data' and back to 'read_again'. So there is a need to verify count value sanity after 'read_again'. Move 'read_again' up to fix the count limit check. Fixes: ec222003bd94 ("net: stmmac: Prepare to add Split Header support") Signed-off-by: Baruch Siach Reviewed-by: Serge Semin Link: https://lore.kernel.org/r/d9486296c3b6b12ab3a0515fcd47d56447a07bfc.1699897370.git.baruch@tkos.co.il Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 5801f4d50f95..53358015fbe5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -5302,10 +5302,10 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) len = 0; } +read_again: if (count >= limit) break; -read_again: buf1_len = 0; buf2_len = 0; entry = next_entry; From 779334e59850f863bf34665e0ff0b6faf126873b Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Mon, 13 Nov 2023 19:42:50 +0200 Subject: [PATCH 0208/1397] net: stmmac: avoid rx queue overrun [ Upstream commit b6cb4541853c7ee512111b0e7ddf3cb66c99c137 ] dma_rx_size can be set as low as 64. Rx budget might be higher than that. Make sure to not overrun allocated rx buffers when budget is larger. Leave one descriptor unused to avoid wrap around of 'dirty_rx' vs 'cur_rx'. Signed-off-by: Baruch Siach Reviewed-by: Serge Semin Fixes: 47dd7a540b8a ("net: add support for STMicroelectronics Ethernet controllers.") Link: https://lore.kernel.org/r/d95413e44c97d4692e72cec13a75f894abeb6998.1699897370.git.baruch@tkos.co.il Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 53358015fbe5..1fa4da96c8f5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -5267,6 +5267,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) dma_dir = page_pool_get_dma_dir(rx_q->page_pool); buf_sz = DIV_ROUND_UP(priv->dma_conf.dma_buf_sz, PAGE_SIZE) * PAGE_SIZE; + limit = min(priv->dma_conf.dma_rx_size - 1, (unsigned int)limit); if (netif_msg_rx_status(priv)) { void *rx_head; From f4cbba7e2f0aa0a8f69735a84b06e61156e85ded Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Mon, 13 Nov 2023 10:32:56 -0800 Subject: [PATCH 0209/1397] pds_core: use correct index to mask irq [ Upstream commit 09d4c14c6c5e6e781a3879fed7f8e116a18b8c65 ] Use the qcq's interrupt index, not the irq number, to mask the interrupt. Since the irq number can be out of range from the number of possible interrupts, we can end up accessing and potentially scribbling on out-of-range and/or unmapped memory, making the kernel angry. [ 3116.039364] BUG: unable to handle page fault for address: ffffbeea1c3edf84 [ 3116.047059] #PF: supervisor write access in kernel mode [ 3116.052895] #PF: error_code(0x0002) - not-present page [ 3116.058636] PGD 100000067 P4D 100000067 PUD 1001f2067 PMD 10f82e067 PTE 0 [ 3116.066221] Oops: 0002 [#1] SMP NOPTI [ 3116.092948] RIP: 0010:iowrite32+0x9/0x76 [ 3116.190452] Call Trace: [ 3116.193185] [ 3116.195430] ? show_trace_log_lvl+0x1d6/0x2f9 [ 3116.200298] ? show_trace_log_lvl+0x1d6/0x2f9 [ 3116.205166] ? pdsc_adminq_isr+0x43/0x55 [pds_core] [ 3116.210618] ? __die_body.cold+0x8/0xa [ 3116.214806] ? page_fault_oops+0x16d/0x1ac [ 3116.219382] ? exc_page_fault+0xbe/0x13b [ 3116.223764] ? asm_exc_page_fault+0x22/0x27 [ 3116.228440] ? iowrite32+0x9/0x76 [ 3116.232143] pdsc_adminq_isr+0x43/0x55 [pds_core] [ 3116.237627] __handle_irq_event_percpu+0x3a/0x184 [ 3116.243088] handle_irq_event+0x57/0xb0 [ 3116.247575] handle_edge_irq+0x87/0x225 [ 3116.252062] __common_interrupt+0x3e/0xbc [ 3116.256740] common_interrupt+0x7b/0x98 [ 3116.261216] [ 3116.263745] [ 3116.266268] asm_common_interrupt+0x22/0x27 Reported-by: Joao Martins Fixes: 01ba61b55b20 ("pds_core: Add adminq processing and commands") Signed-off-by: Shannon Nelson Link: https://lore.kernel.org/r/20231113183257.71110-2-shannon.nelson@amd.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/amd/pds_core/adminq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amd/pds_core/adminq.c b/drivers/net/ethernet/amd/pds_core/adminq.c index 045fe133f6ee..5beadabc2136 100644 --- a/drivers/net/ethernet/amd/pds_core/adminq.c +++ b/drivers/net/ethernet/amd/pds_core/adminq.c @@ -146,7 +146,7 @@ irqreturn_t pdsc_adminq_isr(int irq, void *data) } queue_work(pdsc->wq, &qcq->work); - pds_core_intr_mask(&pdsc->intr_ctrl[irq], PDS_CORE_INTR_MASK_CLEAR); + pds_core_intr_mask(&pdsc->intr_ctrl[qcq->intx], PDS_CORE_INTR_MASK_CLEAR); return IRQ_HANDLED; } From 3409e3adb1b41a31ede1409d5288a9a1b5b1dead Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Mon, 13 Nov 2023 10:32:57 -0800 Subject: [PATCH 0210/1397] pds_core: fix up some format-truncation complaints [ Upstream commit 7c02f6ae676a954216a192612040f9a0cde3adf7 ] Our friendly kernel test robot pointed out a couple of potential string truncation issues. None of which were we worried about, but can be relatively easily fixed to quiet the complaints. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202310211736.66syyDpp-lkp@intel.com/ Fixes: 45d76f492938 ("pds_core: set up device and adminq") Signed-off-by: Shannon Nelson Link: https://lore.kernel.org/r/20231113183257.71110-3-shannon.nelson@amd.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/amd/pds_core/core.h | 2 +- drivers/net/ethernet/amd/pds_core/dev.c | 8 ++++++-- drivers/net/ethernet/amd/pds_core/devlink.c | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h index e545fafc4819..b1c1f1007b06 100644 --- a/drivers/net/ethernet/amd/pds_core/core.h +++ b/drivers/net/ethernet/amd/pds_core/core.h @@ -15,7 +15,7 @@ #define PDSC_DRV_DESCRIPTION "AMD/Pensando Core Driver" #define PDSC_WATCHDOG_SECS 5 -#define PDSC_QUEUE_NAME_MAX_SZ 32 +#define PDSC_QUEUE_NAME_MAX_SZ 16 #define PDSC_ADMINQ_MIN_LENGTH 16 /* must be a power of two */ #define PDSC_NOTIFYQ_LENGTH 64 /* must be a power of two */ #define PDSC_TEARDOWN_RECOVERY false diff --git a/drivers/net/ethernet/amd/pds_core/dev.c b/drivers/net/ethernet/amd/pds_core/dev.c index f77cd9f5a2fd..eb178728edba 100644 --- a/drivers/net/ethernet/amd/pds_core/dev.c +++ b/drivers/net/ethernet/amd/pds_core/dev.c @@ -254,10 +254,14 @@ static int pdsc_identify(struct pdsc *pdsc) struct pds_core_drv_identity drv = {}; size_t sz; int err; + int n; drv.drv_type = cpu_to_le32(PDS_DRIVER_LINUX); - snprintf(drv.driver_ver_str, sizeof(drv.driver_ver_str), - "%s %s", PDS_CORE_DRV_NAME, utsname()->release); + /* Catching the return quiets a Wformat-truncation complaint */ + n = snprintf(drv.driver_ver_str, sizeof(drv.driver_ver_str), + "%s %s", PDS_CORE_DRV_NAME, utsname()->release); + if (n > sizeof(drv.driver_ver_str)) + dev_dbg(pdsc->dev, "release name truncated, don't care\n"); /* Next let's get some info about the device * We use the devcmd_lock at this level in order to diff --git a/drivers/net/ethernet/amd/pds_core/devlink.c b/drivers/net/ethernet/amd/pds_core/devlink.c index d9607033bbf2..d2abf32b93fe 100644 --- a/drivers/net/ethernet/amd/pds_core/devlink.c +++ b/drivers/net/ethernet/amd/pds_core/devlink.c @@ -104,7 +104,7 @@ int pdsc_dl_info_get(struct devlink *dl, struct devlink_info_req *req, struct pds_core_fw_list_info fw_list; struct pdsc *pdsc = devlink_priv(dl); union pds_core_dev_comp comp; - char buf[16]; + char buf[32]; int listlen; int err; int i; From ff33be9cecee28f021493369bddb826f068a1acb Mon Sep 17 00:00:00 2001 From: Ziwei Xiao Date: Mon, 13 Nov 2023 16:41:44 -0800 Subject: [PATCH 0211/1397] gve: Fixes for napi_poll when budget is 0 [ Upstream commit 278a370c1766060d2144d6cf0b06c101e1043b6d ] Netpoll will explicilty pass the polling call with a budget of 0 to indicate it's clearing the Tx path only. For the gve_rx_poll and gve_xdp_poll, they were mistakenly taking the 0 budget as the indication to do all the work. Add check to avoid the rx path and xdp path being called when budget is 0. And also avoid napi_complete_done being called when budget is 0 for netpoll. Fixes: f5cedc84a30d ("gve: Add transmit and receive support") Signed-off-by: Ziwei Xiao Link: https://lore.kernel.org/r/20231114004144.2022268-1-ziweixiao@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/google/gve/gve_main.c | 8 +++++++- drivers/net/ethernet/google/gve/gve_rx.c | 4 ---- drivers/net/ethernet/google/gve/gve_tx.c | 4 ---- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 83b09dcfafc4..5703240474e5 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -254,10 +254,13 @@ static int gve_napi_poll(struct napi_struct *napi, int budget) if (block->tx) { if (block->tx->q_num < priv->tx_cfg.num_queues) reschedule |= gve_tx_poll(block, budget); - else + else if (budget) reschedule |= gve_xdp_poll(block, budget); } + if (!budget) + return 0; + if (block->rx) { work_done = gve_rx_poll(block, budget); reschedule |= work_done == budget; @@ -298,6 +301,9 @@ static int gve_napi_poll_dqo(struct napi_struct *napi, int budget) if (block->tx) reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true); + if (!budget) + return 0; + if (block->rx) { work_done = gve_rx_poll_dqo(block, budget); reschedule |= work_done == budget; diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index e84a066aa1a4..73655347902d 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -1007,10 +1007,6 @@ int gve_rx_poll(struct gve_notify_block *block, int budget) feat = block->napi.dev->features; - /* If budget is 0, do all the work */ - if (budget == 0) - budget = INT_MAX; - if (budget > 0) work_done = gve_clean_rx_done(rx, budget, feat); diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c index 6957a865cff3..9f6ffc4a54f0 100644 --- a/drivers/net/ethernet/google/gve/gve_tx.c +++ b/drivers/net/ethernet/google/gve/gve_tx.c @@ -925,10 +925,6 @@ bool gve_xdp_poll(struct gve_notify_block *block, int budget) bool repoll; u32 to_do; - /* If budget is 0, do all the work */ - if (budget == 0) - budget = INT_MAX; - /* Find out how much work there is to be done */ nic_done = gve_tx_load_event_counter(priv, tx); to_do = min_t(u32, (nic_done - tx->done), budget); From 5753dbb32648d7bb3530b022d5fa360868a983b1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 14 Nov 2023 09:55:50 -0700 Subject: [PATCH 0212/1397] io_uring/fdinfo: remove need for sqpoll lock for thread/pid retrieval [ Upstream commit a0d45c3f596be53c1bd8822a1984532d14fdcea9 ] A previous commit added a trylock for getting the SQPOLL thread info via fdinfo, but this introduced a regression where we often fail to get it if the thread is busy. For that case, we end up not printing the current CPU and PID info. Rather than rely on this lock, just print the pid we already stored in the io_sq_data struct, and ensure we update the current CPU every time we've slept or potentially rescheduled. The latter won't potentially be 100% accurate, but that wasn't the case before either as the task can get migrated at any time unless it has been pinned at creation time. We retain keeping the io_sq_data dereference inside the ctx->uring_lock, as it has always been, as destruction of the thread and data happen below that. We could make this RCU safe, but there's little point in doing that. With this, we always print the last valid information we had, rather than have spurious outputs with missing information. Fixes: 7644b1a1c9a7 ("io_uring/fdinfo: lock SQ thread while retrieving thread cpu/pid") Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- io_uring/fdinfo.c | 9 ++------- io_uring/sqpoll.c | 12 ++++++++++-- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index f04a43044d91..976e9500f651 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c @@ -145,13 +145,8 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *f) if (has_lock && (ctx->flags & IORING_SETUP_SQPOLL)) { struct io_sq_data *sq = ctx->sq_data; - if (mutex_trylock(&sq->lock)) { - if (sq->thread) { - sq_pid = task_pid_nr(sq->thread); - sq_cpu = task_cpu(sq->thread); - } - mutex_unlock(&sq->lock); - } + sq_pid = sq->task_pid; + sq_cpu = sq->sq_cpu; } seq_printf(m, "SqThread:\t%d\n", sq_pid); diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index bd6c2c7959a5..65b5dbe3c850 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -214,6 +214,7 @@ static bool io_sqd_handle_event(struct io_sq_data *sqd) did_sig = get_signal(&ksig); cond_resched(); mutex_lock(&sqd->lock); + sqd->sq_cpu = raw_smp_processor_id(); } return did_sig || test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); } @@ -229,10 +230,15 @@ static int io_sq_thread(void *data) snprintf(buf, sizeof(buf), "iou-sqp-%d", sqd->task_pid); set_task_comm(current, buf); - if (sqd->sq_cpu != -1) + /* reset to our pid after we've set task_comm, for fdinfo */ + sqd->task_pid = current->pid; + + if (sqd->sq_cpu != -1) { set_cpus_allowed_ptr(current, cpumask_of(sqd->sq_cpu)); - else + } else { set_cpus_allowed_ptr(current, cpu_online_mask); + sqd->sq_cpu = raw_smp_processor_id(); + } mutex_lock(&sqd->lock); while (1) { @@ -261,6 +267,7 @@ static int io_sq_thread(void *data) mutex_unlock(&sqd->lock); cond_resched(); mutex_lock(&sqd->lock); + sqd->sq_cpu = raw_smp_processor_id(); } continue; } @@ -294,6 +301,7 @@ static int io_sq_thread(void *data) mutex_unlock(&sqd->lock); schedule(); mutex_lock(&sqd->lock); + sqd->sq_cpu = raw_smp_processor_id(); } list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) atomic_andnot(IORING_SQ_NEED_WAKEUP, From ead487b374b9f29644a315cab72f6fd8957aeed3 Mon Sep 17 00:00:00 2001 From: Itamar Gozlan Date: Tue, 14 Nov 2023 13:58:32 -0800 Subject: [PATCH 0213/1397] Revert "net/mlx5: DR, Supporting inline WQE when possible" [ Upstream commit df3aafe501853c92bc9e25b05dcb030fee072962 ] This reverts commit 95c337cce0e11d06a715da73e6796ade9216637f. The revert is required due to the suspicion it cause some tests fail and will be moved to further investigation. Fixes: 95c337cce0e1 ("net/mlx5: DR, Supporting inline WQE when possible") Signed-off-by: Itamar Gozlan Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20231114215846.5902-2-saeed@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../mellanox/mlx5/core/steering/dr_send.c | 115 ++---------------- 1 file changed, 13 insertions(+), 102 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c index 4e8527a724f5..6fa06ba2d346 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c @@ -52,7 +52,6 @@ struct dr_qp_init_attr { u32 cqn; u32 pdn; u32 max_send_wr; - u32 max_send_sge; struct mlx5_uars_page *uar; u8 isolate_vl_tc:1; }; @@ -247,37 +246,6 @@ static int dr_poll_cq(struct mlx5dr_cq *dr_cq, int ne) return err == CQ_POLL_ERR ? err : npolled; } -static int dr_qp_get_args_update_send_wqe_size(struct dr_qp_init_attr *attr) -{ - return roundup_pow_of_two(sizeof(struct mlx5_wqe_ctrl_seg) + - sizeof(struct mlx5_wqe_flow_update_ctrl_seg) + - sizeof(struct mlx5_wqe_header_modify_argument_update_seg)); -} - -/* We calculate for specific RC QP with the required functionality */ -static int dr_qp_calc_rc_send_wqe(struct dr_qp_init_attr *attr) -{ - int update_arg_size; - int inl_size = 0; - int tot_size; - int size; - - update_arg_size = dr_qp_get_args_update_send_wqe_size(attr); - - size = sizeof(struct mlx5_wqe_ctrl_seg) + - sizeof(struct mlx5_wqe_raddr_seg); - inl_size = size + ALIGN(sizeof(struct mlx5_wqe_inline_seg) + - DR_STE_SIZE, 16); - - size += attr->max_send_sge * sizeof(struct mlx5_wqe_data_seg); - - size = max(size, update_arg_size); - - tot_size = max(size, inl_size); - - return ALIGN(tot_size, MLX5_SEND_WQE_BB); -} - static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev, struct dr_qp_init_attr *attr) { @@ -285,7 +253,6 @@ static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev, u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {}; struct mlx5_wq_param wqp; struct mlx5dr_qp *dr_qp; - int wqe_size; int inlen; void *qpc; void *in; @@ -365,15 +332,6 @@ static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev, if (err) goto err_in; dr_qp->uar = attr->uar; - wqe_size = dr_qp_calc_rc_send_wqe(attr); - dr_qp->max_inline_data = min(wqe_size - - (sizeof(struct mlx5_wqe_ctrl_seg) + - sizeof(struct mlx5_wqe_raddr_seg) + - sizeof(struct mlx5_wqe_inline_seg)), - (2 * MLX5_SEND_WQE_BB - - (sizeof(struct mlx5_wqe_ctrl_seg) + - sizeof(struct mlx5_wqe_raddr_seg) + - sizeof(struct mlx5_wqe_inline_seg)))); return dr_qp; @@ -437,48 +395,8 @@ dr_rdma_handle_flow_access_arg_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl, MLX5_SEND_WQE_DS; } -static int dr_set_data_inl_seg(struct mlx5dr_qp *dr_qp, - struct dr_data_seg *data_seg, void *wqe) -{ - int inline_header_size = sizeof(struct mlx5_wqe_ctrl_seg) + - sizeof(struct mlx5_wqe_raddr_seg) + - sizeof(struct mlx5_wqe_inline_seg); - struct mlx5_wqe_inline_seg *seg; - int left_space; - int inl = 0; - void *addr; - int len; - int idx; - - seg = wqe; - wqe += sizeof(*seg); - addr = (void *)(unsigned long)(data_seg->addr); - len = data_seg->length; - inl += len; - left_space = MLX5_SEND_WQE_BB - inline_header_size; - - if (likely(len > left_space)) { - memcpy(wqe, addr, left_space); - len -= left_space; - addr += left_space; - idx = (dr_qp->sq.pc + 1) & (dr_qp->sq.wqe_cnt - 1); - wqe = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx); - } - - memcpy(wqe, addr, len); - - if (likely(inl)) { - seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG); - return DIV_ROUND_UP(inl + sizeof(seg->byte_count), - MLX5_SEND_WQE_DS); - } else { - return 0; - } -} - static void -dr_rdma_handle_icm_write_segments(struct mlx5dr_qp *dr_qp, - struct mlx5_wqe_ctrl_seg *wq_ctrl, +dr_rdma_handle_icm_write_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl, u64 remote_addr, u32 rkey, struct dr_data_seg *data_seg, @@ -494,17 +412,15 @@ dr_rdma_handle_icm_write_segments(struct mlx5dr_qp *dr_qp, wq_raddr->reserved = 0; wq_dseg = (void *)(wq_raddr + 1); - /* WQE ctrl segment + WQE remote addr segment */ - *size = (sizeof(*wq_ctrl) + sizeof(*wq_raddr)) / MLX5_SEND_WQE_DS; - if (data_seg->send_flags & IB_SEND_INLINE) { - *size += dr_set_data_inl_seg(dr_qp, data_seg, wq_dseg); - } else { - wq_dseg->byte_count = cpu_to_be32(data_seg->length); - wq_dseg->lkey = cpu_to_be32(data_seg->lkey); - wq_dseg->addr = cpu_to_be64(data_seg->addr); - *size += sizeof(*wq_dseg) / MLX5_SEND_WQE_DS; /* WQE data segment */ - } + wq_dseg->byte_count = cpu_to_be32(data_seg->length); + wq_dseg->lkey = cpu_to_be32(data_seg->lkey); + wq_dseg->addr = cpu_to_be64(data_seg->addr); + + *size = (sizeof(*wq_ctrl) + /* WQE ctrl segment */ + sizeof(*wq_dseg) + /* WQE data segment */ + sizeof(*wq_raddr)) / /* WQE remote addr segment */ + MLX5_SEND_WQE_DS; } static void dr_set_ctrl_seg(struct mlx5_wqe_ctrl_seg *wq_ctrl, @@ -535,7 +451,7 @@ static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr, switch (opcode) { case MLX5_OPCODE_RDMA_READ: case MLX5_OPCODE_RDMA_WRITE: - dr_rdma_handle_icm_write_segments(dr_qp, wq_ctrl, remote_addr, + dr_rdma_handle_icm_write_segments(wq_ctrl, remote_addr, rkey, data_seg, &size); break; case MLX5_OPCODE_FLOW_TBL_ACCESS: @@ -656,7 +572,7 @@ static void dr_fill_write_args_segs(struct mlx5dr_send_ring *send_ring, if (send_ring->pending_wqe % send_ring->signal_th == 0) send_info->write.send_flags |= IB_SEND_SIGNALED; else - send_info->write.send_flags &= ~IB_SEND_SIGNALED; + send_info->write.send_flags = 0; } static void dr_fill_write_icm_segs(struct mlx5dr_domain *dmn, @@ -680,13 +596,9 @@ static void dr_fill_write_icm_segs(struct mlx5dr_domain *dmn, } send_ring->pending_wqe++; - if (!send_info->write.lkey) - send_info->write.send_flags |= IB_SEND_INLINE; if (send_ring->pending_wqe % send_ring->signal_th == 0) send_info->write.send_flags |= IB_SEND_SIGNALED; - else - send_info->write.send_flags &= ~IB_SEND_SIGNALED; send_ring->pending_wqe++; send_info->read.length = send_info->write.length; @@ -696,9 +608,9 @@ static void dr_fill_write_icm_segs(struct mlx5dr_domain *dmn, send_info->read.lkey = send_ring->sync_mr->mkey; if (send_ring->pending_wqe % send_ring->signal_th == 0) - send_info->read.send_flags |= IB_SEND_SIGNALED; + send_info->read.send_flags = IB_SEND_SIGNALED; else - send_info->read.send_flags &= ~IB_SEND_SIGNALED; + send_info->read.send_flags = 0; } static void dr_fill_data_segs(struct mlx5dr_domain *dmn, @@ -1345,7 +1257,6 @@ int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn) dmn->send_ring->cq->qp = dmn->send_ring->qp; dmn->info.max_send_wr = QUEUE_SIZE; - init_attr.max_send_sge = 1; dmn->info.max_inline_size = min(dmn->send_ring->qp->max_inline_data, DR_STE_SIZE); From 35309b05bc7055a0083a91faffee54077926e585 Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Tue, 14 Nov 2023 13:58:33 -0800 Subject: [PATCH 0214/1397] net/mlx5: Free used cpus mask when an IRQ is released [ Upstream commit 7d2f74d1d4385a5bcf90618537f16a45121c30ae ] Each EQ table maintains a cpumask of the already used CPUs that are mapped to IRQs to ensure that each IRQ gets mapped to a unique CPU. However, on IRQ release, the said cpumask is not updated by clearing the CPU from the mask to allow future IRQ request, causing the following error when a SF is reloaded after it has utilized all CPUs for its IRQs: mlx5_irq_affinity_request:135:(pid 306010): Didn't find a matching IRQ. err = -28 Thus, when releasing an IRQ, clear its mapped CPU from the used CPUs mask, to prevent the case described above. While at it, move the used cpumask update to the EQ layer as it is more fitting and preserves symmetricity of the IRQ request/release API. Fixes: a1772de78d73 ("net/mlx5: Refactor completion IRQ request/release API") Signed-off-by: Maher Sanalla Reviewed-by: Tariq Toukan Reviewed-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20231114215846.5902-3-saeed@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 25 ++++++++--- .../mellanox/mlx5/core/irq_affinity.c | 42 ------------------- 2 files changed, 19 insertions(+), 48 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index ea0405e0a43f..40a6cb052a2d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -885,11 +885,14 @@ static void comp_irq_release_sf(struct mlx5_core_dev *dev, u16 vecidx) { struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_irq *irq; + int cpu; irq = xa_load(&table->comp_irqs, vecidx); if (!irq) return; + cpu = cpumask_first(mlx5_irq_get_affinity_mask(irq)); + cpumask_clear_cpu(cpu, &table->used_cpus); xa_erase(&table->comp_irqs, vecidx); mlx5_irq_affinity_irq_release(dev, irq); } @@ -897,16 +900,26 @@ static void comp_irq_release_sf(struct mlx5_core_dev *dev, u16 vecidx) static int comp_irq_request_sf(struct mlx5_core_dev *dev, u16 vecidx) { struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev); + struct irq_affinity_desc af_desc = {}; struct mlx5_irq *irq; - irq = mlx5_irq_affinity_irq_request_auto(dev, &table->used_cpus, vecidx); - if (IS_ERR(irq)) { - /* In case SF irq pool does not exist, fallback to the PF irqs*/ - if (PTR_ERR(irq) == -ENOENT) - return comp_irq_request_pci(dev, vecidx); + /* In case SF irq pool does not exist, fallback to the PF irqs*/ + if (!mlx5_irq_pool_is_sf_pool(pool)) + return comp_irq_request_pci(dev, vecidx); + af_desc.is_managed = 1; + cpumask_copy(&af_desc.mask, cpu_online_mask); + cpumask_andnot(&af_desc.mask, &af_desc.mask, &table->used_cpus); + irq = mlx5_irq_affinity_request(pool, &af_desc); + if (IS_ERR(irq)) return PTR_ERR(irq); - } + + cpumask_or(&table->used_cpus, &table->used_cpus, mlx5_irq_get_affinity_mask(irq)); + mlx5_core_dbg(pool->dev, "IRQ %u mapped to cpu %*pbl, %u EQs on this irq\n", + pci_irq_vector(dev->pdev, mlx5_irq_get_index(irq)), + cpumask_pr_args(mlx5_irq_get_affinity_mask(irq)), + mlx5_irq_read_locked(irq) / MLX5_EQ_REFS_PER_IRQ); return xa_err(xa_store(&table->comp_irqs, vecidx, irq, GFP_KERNEL)); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c index 047d5fed5f89..612e666ec263 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c @@ -168,45 +168,3 @@ void mlx5_irq_affinity_irq_release(struct mlx5_core_dev *dev, struct mlx5_irq *i if (pool->irqs_per_cpu) cpu_put(pool, cpu); } - -/** - * mlx5_irq_affinity_irq_request_auto - request one IRQ for mlx5 device. - * @dev: mlx5 device that is requesting the IRQ. - * @used_cpus: cpumask of bounded cpus by the device - * @vecidx: vector index to request an IRQ for. - * - * Each IRQ is bounded to at most 1 CPU. - * This function is requesting an IRQ according to the default assignment. - * The default assignment policy is: - * - request the least loaded IRQ which is not bound to any - * CPU of the previous IRQs requested. - * - * On success, this function updates used_cpus mask and returns an irq pointer. - * In case of an error, an appropriate error pointer is returned. - */ -struct mlx5_irq *mlx5_irq_affinity_irq_request_auto(struct mlx5_core_dev *dev, - struct cpumask *used_cpus, u16 vecidx) -{ - struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev); - struct irq_affinity_desc af_desc = {}; - struct mlx5_irq *irq; - - if (!mlx5_irq_pool_is_sf_pool(pool)) - return ERR_PTR(-ENOENT); - - af_desc.is_managed = 1; - cpumask_copy(&af_desc.mask, cpu_online_mask); - cpumask_andnot(&af_desc.mask, &af_desc.mask, used_cpus); - irq = mlx5_irq_affinity_request(pool, &af_desc); - - if (IS_ERR(irq)) - return irq; - - cpumask_or(used_cpus, used_cpus, mlx5_irq_get_affinity_mask(irq)); - mlx5_core_dbg(pool->dev, "IRQ %u mapped to cpu %*pbl, %u EQs on this irq\n", - pci_irq_vector(dev->pdev, mlx5_irq_get_index(irq)), - cpumask_pr_args(mlx5_irq_get_affinity_mask(irq)), - mlx5_irq_read_locked(irq) / MLX5_EQ_REFS_PER_IRQ); - - return irq; -} From 2226665bfb192ed58853e6cd988f32da8da81dc8 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Tue, 14 Nov 2023 13:58:35 -0800 Subject: [PATCH 0215/1397] net/mlx5: Decouple PHC .adjtime and .adjphase implementations [ Upstream commit fd64fd13c49a53012ce9170449dcd9eb71c11284 ] When running a phase adjustment operation, the free running clock should not be modified at all. The phase control keyword is intended to trigger an internal servo on the device that will converge to the provided delta. A free running counter cannot implement phase adjustment. Fixes: 8e11a68e2e8a ("net/mlx5: Add adjphase function to support hardware-only offset control") Signed-off-by: Rahul Rameshbabu Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20231114215846.5902-5-saeed@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index aa29f09e8356..0c83ef174275 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -384,7 +384,12 @@ static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) static int mlx5_ptp_adjphase(struct ptp_clock_info *ptp, s32 delta) { - return mlx5_ptp_adjtime(ptp, delta); + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); + struct mlx5_core_dev *mdev; + + mdev = container_of(clock, struct mlx5_core_dev, clock); + + return mlx5_ptp_adjtime_real_time(mdev, delta); } static int mlx5_ptp_freq_adj_real_time(struct mlx5_core_dev *mdev, long scaled_ppm) From 3911ba305a4623248cfa8cea49e3be5583356f82 Mon Sep 17 00:00:00 2001 From: Dust Li Date: Tue, 14 Nov 2023 13:58:36 -0800 Subject: [PATCH 0216/1397] net/mlx5e: fix double free of encap_header [ Upstream commit 6f9b1a0731662648949a1c0587f6acb3b7f8acf1 ] When mlx5_packet_reformat_alloc() fails, the encap_header allocated in mlx5e_tc_tun_create_header_ipv4{6} will be released within it. However, e->encap_header is already set to the previously freed encap_header before mlx5_packet_reformat_alloc(). As a result, the later mlx5e_encap_put() will free e->encap_header again, causing a double free issue. mlx5e_encap_put() --> mlx5e_encap_dealloc() --> kfree(e->encap_header) This happens when cmd: MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT fail. This patch fix it by not setting e->encap_header until mlx5_packet_reformat_alloc() success. Fixes: d589e785baf5e ("net/mlx5e: Allow concurrent creation of encap entries") Reported-by: Cruz Zhao Reported-by: Tianchen Ding Signed-off-by: Dust Li Reviewed-by: Wojciech Drewek Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index 00a04fdd756f..8bca696b6658 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -300,9 +300,6 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, if (err) goto destroy_neigh_entry; - e->encap_size = ipv4_encap_size; - e->encap_header = encap_header; - if (!(nud_state & NUD_VALID)) { neigh_event_send(attr.n, NULL); /* the encap entry will be made valid on neigh update event @@ -322,6 +319,8 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, goto destroy_neigh_entry; } + e->encap_size = ipv4_encap_size; + e->encap_header = encap_header; e->flags |= MLX5_ENCAP_ENTRY_VALID; mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev)); mlx5e_route_lookup_ipv4_put(&attr); @@ -568,9 +567,6 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, if (err) goto destroy_neigh_entry; - e->encap_size = ipv6_encap_size; - e->encap_header = encap_header; - if (!(nud_state & NUD_VALID)) { neigh_event_send(attr.n, NULL); /* the encap entry will be made valid on neigh update event @@ -590,6 +586,8 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, goto destroy_neigh_entry; } + e->encap_size = ipv6_encap_size; + e->encap_header = encap_header; e->flags |= MLX5_ENCAP_ENTRY_VALID; mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev)); mlx5e_route_lookup_ipv6_put(&attr); From bf66a6fadbab1b003a5a5b4944ec5f563587003b Mon Sep 17 00:00:00 2001 From: Gavin Li Date: Tue, 14 Nov 2023 13:58:37 -0800 Subject: [PATCH 0217/1397] net/mlx5e: fix double free of encap_header in update funcs [ Upstream commit 3a4aa3cb83563df942be49d145ee3b7ddf17d6bb ] Follow up to the previous patch to fix the same issue for mlx5e_tc_tun_update_header_ipv4{6} when mlx5_packet_reformat_alloc() fails. When mlx5_packet_reformat_alloc() fails, the encap_header allocated in mlx5e_tc_tun_update_header_ipv4{6} will be released within it. However, e->encap_header is already set to the previously freed encap_header before mlx5_packet_reformat_alloc(). As a result, the later mlx5e_encap_put() will free e->encap_header again, causing a double free issue. mlx5e_encap_put() --> mlx5e_encap_dealloc() --> kfree(e->encap_header) This patch fix it by not setting e->encap_header until mlx5_packet_reformat_alloc() success. Fixes: a54e20b4fcae ("net/mlx5e: Add basic TC tunnel set action for SRIOV offloads") Signed-off-by: Gavin Li Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20231114215846.5902-7-saeed@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../ethernet/mellanox/mlx5/core/en/tc_tun.c | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index 8bca696b6658..668da5c70e63 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -403,16 +403,12 @@ int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv, if (err) goto free_encap; - e->encap_size = ipv4_encap_size; - kfree(e->encap_header); - e->encap_header = encap_header; - if (!(nud_state & NUD_VALID)) { neigh_event_send(attr.n, NULL); /* the encap entry will be made valid on neigh update event * and not used before that. */ - goto release_neigh; + goto free_encap; } memset(&reformat_params, 0, sizeof(reformat_params)); @@ -426,6 +422,10 @@ int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv, goto free_encap; } + e->encap_size = ipv4_encap_size; + kfree(e->encap_header); + e->encap_header = encap_header; + e->flags |= MLX5_ENCAP_ENTRY_VALID; mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev)); mlx5e_route_lookup_ipv4_put(&attr); @@ -669,16 +669,12 @@ int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv, if (err) goto free_encap; - e->encap_size = ipv6_encap_size; - kfree(e->encap_header); - e->encap_header = encap_header; - if (!(nud_state & NUD_VALID)) { neigh_event_send(attr.n, NULL); /* the encap entry will be made valid on neigh update event * and not used before that. */ - goto release_neigh; + goto free_encap; } memset(&reformat_params, 0, sizeof(reformat_params)); @@ -692,6 +688,10 @@ int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv, goto free_encap; } + e->encap_size = ipv6_encap_size; + kfree(e->encap_header); + e->encap_header = encap_header; + e->flags |= MLX5_ENCAP_ENTRY_VALID; mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev)); mlx5e_route_lookup_ipv6_put(&attr); From 5091e4ae11de818f2c1e1c0c374ebf1c29df75c4 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Tue, 14 Nov 2023 13:58:38 -0800 Subject: [PATCH 0218/1397] net/mlx5e: Fix pedit endianness [ Upstream commit 0c101a23ca7eaf00eef1328eefb04b3a93401cc8 ] Referenced commit addressed endianness issue in mlx5 pedit implementation in ad hoc manner instead of systematically treating integer values according to their types which left pedit fields of sizes not equal to 4 and where the bytes being modified are not least significant ones broken on big endian machines since wrong bits will be consumed during parsing which leads to following example error when applying pedit to source and destination MAC addresses: [Wed Oct 18 12:52:42 2023] mlx5_core 0001:00:00.1 p1v3_r: attempt to offload an unsupported field (cmd 0) [Wed Oct 18 12:52:42 2023] mask: 00000000330c5b68: 00 00 00 00 ff ff 00 00 00 00 ff ff 00 00 00 00 ................ [Wed Oct 18 12:52:42 2023] mask: 0000000017d22fd9: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ [Wed Oct 18 12:52:42 2023] mask: 000000008186d717: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ [Wed Oct 18 12:52:42 2023] mask: 0000000029eb6149: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ [Wed Oct 18 12:52:42 2023] mask: 000000007ed103e4: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ [Wed Oct 18 12:52:42 2023] mask: 00000000db8101a6: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ [Wed Oct 18 12:52:42 2023] mask: 00000000ec3c08a9: 00 00 00 00 00 00 00 00 00 00 00 00 ............ Treat masks and values of pedit and filter match as network byte order, refactor pointers to them to void pointers instead of confusing u32 pointers and only cast to pointer-to-integer when reading a value from them. Treat pedit mlx5_fields->field_mask as host byte order according to its type u32, change the constants in fields array accordingly. Fixes: 82198d8bcdef ("net/mlx5e: Fix endianness when calculating pedit mask first bit") Signed-off-by: Vlad Buslov Reviewed-by: Gal Pressman Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20231114215846.5902-8-saeed@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 60 ++++++++++--------- 1 file changed, 32 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index c8590483ddc6..b62fd3749341 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3145,7 +3145,7 @@ static struct mlx5_fields fields[] = { OFFLOAD(DIPV6_31_0, 32, U32_MAX, ip6.daddr.s6_addr32[3], 0, dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]), OFFLOAD(IPV6_HOPLIMIT, 8, U8_MAX, ip6.hop_limit, 0, ttl_hoplimit), - OFFLOAD(IP_DSCP, 16, 0xc00f, ip6, 0, ip_dscp), + OFFLOAD(IP_DSCP, 16, 0x0fc0, ip6, 0, ip_dscp), OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source, 0, tcp_sport), OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest, 0, tcp_dport), @@ -3156,21 +3156,31 @@ static struct mlx5_fields fields[] = { OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest, 0, udp_dport), }; -static unsigned long mask_to_le(unsigned long mask, int size) +static u32 mask_field_get(void *mask, struct mlx5_fields *f) { - __be32 mask_be32; - __be16 mask_be16; - - if (size == 32) { - mask_be32 = (__force __be32)(mask); - mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32)); - } else if (size == 16) { - mask_be32 = (__force __be32)(mask); - mask_be16 = *(__be16 *)&mask_be32; - mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16)); + switch (f->field_bsize) { + case 32: + return be32_to_cpu(*(__be32 *)mask) & f->field_mask; + case 16: + return be16_to_cpu(*(__be16 *)mask) & (u16)f->field_mask; + default: + return *(u8 *)mask & (u8)f->field_mask; } +} - return mask; +static void mask_field_clear(void *mask, struct mlx5_fields *f) +{ + switch (f->field_bsize) { + case 32: + *(__be32 *)mask &= ~cpu_to_be32(f->field_mask); + break; + case 16: + *(__be16 *)mask &= ~cpu_to_be16((u16)f->field_mask); + break; + default: + *(u8 *)mask &= ~(u8)f->field_mask; + break; + } } static int offload_pedit_fields(struct mlx5e_priv *priv, @@ -3182,11 +3192,12 @@ static int offload_pedit_fields(struct mlx5e_priv *priv, struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals; struct pedit_headers_action *hdrs = parse_attr->hdrs; void *headers_c, *headers_v, *action, *vals_p; - u32 *s_masks_p, *a_masks_p, s_mask, a_mask; struct mlx5e_tc_mod_hdr_acts *mod_acts; - unsigned long mask, field_mask; + void *s_masks_p, *a_masks_p; int i, first, last, next_z; struct mlx5_fields *f; + unsigned long mask; + u32 s_mask, a_mask; u8 cmd; mod_acts = &parse_attr->mod_hdr_acts; @@ -3202,15 +3213,11 @@ static int offload_pedit_fields(struct mlx5e_priv *priv, bool skip; f = &fields[i]; - /* avoid seeing bits set from previous iterations */ - s_mask = 0; - a_mask = 0; - s_masks_p = (void *)set_masks + f->offset; a_masks_p = (void *)add_masks + f->offset; - s_mask = *s_masks_p & f->field_mask; - a_mask = *a_masks_p & f->field_mask; + s_mask = mask_field_get(s_masks_p, f); + a_mask = mask_field_get(a_masks_p, f); if (!s_mask && !a_mask) /* nothing to offload here */ continue; @@ -3237,22 +3244,20 @@ static int offload_pedit_fields(struct mlx5e_priv *priv, match_mask, f->field_bsize)) skip = true; /* clear to denote we consumed this field */ - *s_masks_p &= ~f->field_mask; + mask_field_clear(s_masks_p, f); } else { cmd = MLX5_ACTION_TYPE_ADD; mask = a_mask; vals_p = (void *)add_vals + f->offset; /* add 0 is no change */ - if ((*(u32 *)vals_p & f->field_mask) == 0) + if (!mask_field_get(vals_p, f)) skip = true; /* clear to denote we consumed this field */ - *a_masks_p &= ~f->field_mask; + mask_field_clear(a_masks_p, f); } if (skip) continue; - mask = mask_to_le(mask, f->field_bsize); - first = find_first_bit(&mask, f->field_bsize); next_z = find_next_zero_bit(&mask, f->field_bsize, first); last = find_last_bit(&mask, f->field_bsize); @@ -3279,10 +3284,9 @@ static int offload_pedit_fields(struct mlx5e_priv *priv, MLX5_SET(set_action_in, action, field, f->field); if (cmd == MLX5_ACTION_TYPE_SET) { + unsigned long field_mask = f->field_mask; int start; - field_mask = mask_to_le(f->field_mask, f->field_bsize); - /* if field is bit sized it can start not from first bit */ start = find_first_bit(&field_mask, f->field_bsize); From f676cc90ad0f08f7c959ea625a7e2143a36f5c66 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 14 Nov 2023 13:58:39 -0800 Subject: [PATCH 0219/1397] net/mlx5e: Don't modify the peer sent-to-vport rules for IPSec offload [ Upstream commit bdf788cf224f61c20a01c58c00685d394d57887f ] As IPSec packet offload in switchdev mode is not supported with LAG, it's unnecessary to modify those sent-to-vport rules to the peer eswitch. Fixes: c6c2bf5db4ea ("net/mlx5e: Support IPsec packet offload for TX in switchdev mode") Signed-off-by: Jianbo Liu Reviewed-by: Leon Romanovsky Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20231114215846.5902-9-saeed@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index b296ac52a439..88236e75fd90 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -984,7 +984,8 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw, dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - if (rep->vport == MLX5_VPORT_UPLINK && on_esw->offloads.ft_ipsec_tx_pol) { + if (rep->vport == MLX5_VPORT_UPLINK && + on_esw == from_esw && on_esw->offloads.ft_ipsec_tx_pol) { dest.ft = on_esw->offloads.ft_ipsec_tx_pol; flow_act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL; dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; From 3bc1967ddaac54323be664649e50e3e2618baadb Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Tue, 14 Nov 2023 13:58:40 -0800 Subject: [PATCH 0220/1397] net/mlx5e: Avoid referencing skb after free-ing in drop path of mlx5e_sq_xmit_wqe [ Upstream commit 64f14d16eef1f939000f2617b50c7c996b5117d4 ] When SQ is a port timestamping SQ for PTP, do not access tx flags of skb after free-ing the skb. Free the skb only after all references that depend on it have been handled in the dropped WQE path. Fixes: 3178308ad4ca ("net/mlx5e: Make tx_port_ts logic resilient to out-of-order CQEs") Signed-off-by: Rahul Rameshbabu Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20231114215846.5902-10-saeed@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index d41435c22ce5..19f2c25b05a0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -494,10 +494,10 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, err_drop: stats->dropped++; - dev_kfree_skb_any(skb); if (unlikely(sq->ptpsq && (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))) mlx5e_ptp_metadata_fifo_push(&sq->ptpsq->metadata_freelist, be32_to_cpu(eseg->flow_table_metadata)); + dev_kfree_skb_any(skb); mlx5e_tx_flush(sq); } From 4d510506b46504664eacf8a44a9e8f3e54c137b8 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Tue, 14 Nov 2023 13:58:41 -0800 Subject: [PATCH 0221/1397] net/mlx5e: Track xmit submission to PTP WQ after populating metadata map [ Upstream commit 7e3f3ba97e6cc6fce5bf62df2ca06c8e59040167 ] Ensure the skb is available in metadata mapping to skbs before tracking the metadata index for detecting undelivered CQEs. If the metadata index is put in the tracking list before putting the skb in the map, the metadata index might be used for detecting undelivered CQEs before the relevant skb is available in the map, which can lead to a null-ptr-deref. Log: general protection fault, probably for non-canonical address 0xdffffc0000000005: 0000 [#1] SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000028-0x000000000000002f] CPU: 0 PID: 1243 Comm: kworker/0:2 Not tainted 6.6.0-rc4+ #108 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Workqueue: events mlx5e_rx_dim_work [mlx5_core] RIP: 0010:mlx5e_ptp_napi_poll+0x9a4/0x2290 [mlx5_core] Code: 8c 24 38 cc ff ff 4c 8d 3c c1 4c 89 f9 48 c1 e9 03 42 80 3c 31 00 0f 85 97 0f 00 00 4d 8b 3f 49 8d 7f 28 48 89 f9 48 c1 e9 03 <42> 80 3c 31 00 0f 85 8b 0f 00 00 49 8b 47 28 48 85 c0 0f 84 05 07 RSP: 0018:ffff8884d3c09c88 EFLAGS: 00010206 RAX: 0000000000000069 RBX: ffff8881160349d8 RCX: 0000000000000005 RDX: ffffed10218f48cf RSI: 0000000000000004 RDI: 0000000000000028 RBP: ffff888122707700 R08: 0000000000000001 R09: ffffed109a781383 R10: 0000000000000003 R11: 0000000000000003 R12: ffff88810c7a7a40 R13: ffff888122707700 R14: dffffc0000000000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff8884d3c00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f4f878dd6e0 CR3: 000000014d108002 CR4: 0000000000370eb0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? die_addr+0x3c/0xa0 ? exc_general_protection+0x144/0x210 ? asm_exc_general_protection+0x22/0x30 ? mlx5e_ptp_napi_poll+0x9a4/0x2290 [mlx5_core] ? mlx5e_ptp_napi_poll+0x8f6/0x2290 [mlx5_core] __napi_poll.constprop.0+0xa4/0x580 net_rx_action+0x460/0xb80 ? _raw_spin_unlock_irqrestore+0x32/0x60 ? __napi_poll.constprop.0+0x580/0x580 ? tasklet_action_common.isra.0+0x2ef/0x760 __do_softirq+0x26c/0x827 irq_exit_rcu+0xc2/0x100 common_interrupt+0x7f/0xa0 asm_common_interrupt+0x22/0x40 RIP: 0010:__kmem_cache_alloc_node+0xb/0x330 Code: 41 5d 41 5e 41 5f c3 8b 44 24 14 8b 4c 24 10 09 c8 eb d5 e8 b7 43 ca 01 0f 1f 80 00 00 00 00 0f 1f 44 00 00 55 48 89 e5 41 57 <41> 56 41 89 d6 41 55 41 89 f5 41 54 49 89 fc 53 48 83 e4 f0 48 83 RSP: 0018:ffff88812c4079c0 EFLAGS: 00000246 RAX: 1ffffffff083c7fe RBX: ffff888100042dc0 RCX: 0000000000000218 RDX: 00000000ffffffff RSI: 0000000000000dc0 RDI: ffff888100042dc0 RBP: ffff88812c4079c8 R08: ffffffffa0289f96 R09: ffffed1025880ea9 R10: ffff888138839f80 R11: 0000000000000002 R12: 0000000000000dc0 R13: 0000000000000100 R14: 000000000000008c R15: ffff8881271fc450 ? cmd_exec+0x796/0x2200 [mlx5_core] kmalloc_trace+0x26/0xc0 cmd_exec+0x796/0x2200 [mlx5_core] mlx5_cmd_do+0x22/0xc0 [mlx5_core] mlx5_cmd_exec+0x17/0x30 [mlx5_core] mlx5_core_modify_cq_moderation+0x139/0x1b0 [mlx5_core] ? mlx5_add_cq_to_tasklet+0x280/0x280 [mlx5_core] ? lockdep_set_lock_cmp_fn+0x190/0x190 ? process_one_work+0x659/0x1220 mlx5e_rx_dim_work+0x9d/0x100 [mlx5_core] process_one_work+0x730/0x1220 ? lockdep_hardirqs_on_prepare+0x400/0x400 ? max_active_store+0xf0/0xf0 ? assign_work+0x168/0x240 worker_thread+0x70f/0x12d0 ? __kthread_parkme+0xd1/0x1d0 ? process_one_work+0x1220/0x1220 kthread+0x2d9/0x3b0 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x2d/0x70 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork_asm+0x11/0x20 Modules linked in: xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat br_netfilter rpcsec_gss_krb5 auth_rpcgss oid_registry overlay mlx5_ib ib_uverbs ib_core zram zsmalloc mlx5_core fuse ---[ end trace 0000000000000000 ]--- Fixes: 3178308ad4ca ("net/mlx5e: Make tx_port_ts logic resilient to out-of-order CQEs") Signed-off-by: Rahul Rameshbabu Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20231114215846.5902-11-saeed@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 19f2c25b05a0..f0b506e562df 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -399,9 +399,9 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, u8 metadata_index = be32_to_cpu(eseg->flow_table_metadata); mlx5e_skb_cb_hwtstamp_init(skb); - mlx5e_ptpsq_track_metadata(sq->ptpsq, metadata_index); mlx5e_ptp_metadata_map_put(&sq->ptpsq->metadata_map, skb, metadata_index); + mlx5e_ptpsq_track_metadata(sq->ptpsq, metadata_index); if (!netif_tx_queue_stopped(sq->txq) && mlx5e_ptpsq_metadata_freelist_empty(sq->ptpsq)) { netif_tx_stop_queue(sq->txq); From e5d30f7da35720060299483e65fc372980a82dfb Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Tue, 14 Nov 2023 13:58:42 -0800 Subject: [PATCH 0222/1397] net/mlx5e: Update doorbell for port timestamping CQ before the software counter [ Upstream commit 92214be5979c0961a471b7eaaaeacab41bdf456c ] Previously, mlx5e_ptp_poll_ts_cq would update the device doorbell with the incremented consumer index after the relevant software counters in the kernel were updated. In the mlx5e_sq_xmit_wqe context, this would lead to either overrunning the device CQ or exceeding the expected software buffer size in the device CQ if the device CQ size was greater than the software buffer size. Update the relevant software counter only after updating the device CQ consumer index in the port timestamping napi_poll context. Log: mlx5_core 0000:08:00.0: cq_err_event_notifier:517:(pid 0): CQ error on CQN 0x487, syndrome 0x1 mlx5_core 0000:08:00.0 eth2: mlx5e_cq_error_event: cqn=0x000487 event=0x04 Fixes: 1880bc4e4a96 ("net/mlx5e: Add TX port timestamp support") Signed-off-by: Rahul Rameshbabu Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20231114215846.5902-12-saeed@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../net/ethernet/mellanox/mlx5/core/en/ptp.c | 20 +++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index bb11e644d24f..af3928eddafd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -177,6 +177,8 @@ static void mlx5e_ptpsq_mark_ts_cqes_undelivered(struct mlx5e_ptpsq *ptpsq, static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq, struct mlx5_cqe64 *cqe, + u8 *md_buff, + u8 *md_buff_sz, int budget) { struct mlx5e_ptp_port_ts_cqe_list *pending_cqe_list = ptpsq->ts_cqe_pending_list; @@ -211,19 +213,24 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq, mlx5e_ptpsq_mark_ts_cqes_undelivered(ptpsq, hwtstamp); out: napi_consume_skb(skb, budget); - mlx5e_ptp_metadata_fifo_push(&ptpsq->metadata_freelist, metadata_id); + md_buff[*md_buff_sz++] = metadata_id; if (unlikely(mlx5e_ptp_metadata_map_unhealthy(&ptpsq->metadata_map)) && !test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) queue_work(ptpsq->txqsq.priv->wq, &ptpsq->report_unhealthy_work); } -static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget) +static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int napi_budget) { struct mlx5e_ptpsq *ptpsq = container_of(cq, struct mlx5e_ptpsq, ts_cq); - struct mlx5_cqwq *cqwq = &cq->wq; + int budget = min(napi_budget, MLX5E_TX_CQ_POLL_BUDGET); + u8 metadata_buff[MLX5E_TX_CQ_POLL_BUDGET]; + u8 metadata_buff_sz = 0; + struct mlx5_cqwq *cqwq; struct mlx5_cqe64 *cqe; int work_done = 0; + cqwq = &cq->wq; + if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &ptpsq->txqsq.state))) return false; @@ -234,7 +241,8 @@ static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget) do { mlx5_cqwq_pop(cqwq); - mlx5e_ptp_handle_ts_cqe(ptpsq, cqe, budget); + mlx5e_ptp_handle_ts_cqe(ptpsq, cqe, + metadata_buff, &metadata_buff_sz, napi_budget); } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(cqwq))); mlx5_cqwq_update_db_record(cqwq); @@ -242,6 +250,10 @@ static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget) /* ensure cq space is freed before enabling more cqes */ wmb(); + while (metadata_buff_sz > 0) + mlx5e_ptp_metadata_fifo_push(&ptpsq->metadata_freelist, + metadata_buff[--metadata_buff_sz]); + mlx5e_txqsq_wake(&ptpsq->txqsq); return work_done == budget; From 13738ed8141c136512292ecc39684e37e162cd6b Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Tue, 14 Nov 2023 13:58:43 -0800 Subject: [PATCH 0223/1397] net/mlx5: Increase size of irq name buffer [ Upstream commit 3338bebfc26a1e2cebbba82a1cf12c0159608e73 ] Without increased buffer size, will trigger -Wformat-truncation with W=1 for the snprintf operation writing to the buffer. drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c: In function 'mlx5_irq_alloc': drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c:296:7: error: '@pci:' directive output may be truncated writing 5 bytes into a region of size between 1 and 32 [-Werror=format-truncation=] 296 | "%s@pci:%s", name, pci_name(dev->pdev)); | ^~~~~ drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c:295:2: note: 'snprintf' output 6 or more bytes (assuming 37) into a destination of size 32 295 | snprintf(irq->name, MLX5_MAX_IRQ_NAME, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 296 | "%s@pci:%s", name, pci_name(dev->pdev)); | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fixes: ada9f5d00797 ("IB/mlx5: Fix eq names to display nicely in /proc/interrupts") Link: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=6d4ab2e97dcfbcd748ae71761a9d8e5e41cc732c Signed-off-by: Rahul Rameshbabu Reviewed-by: Dragos Tatulea Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20231114215846.5902-13-saeed@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 6 +++--- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h | 3 +++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 653648216730..4dcf995cb1a2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -28,7 +28,7 @@ struct mlx5_irq { struct atomic_notifier_head nh; cpumask_var_t mask; - char name[MLX5_MAX_IRQ_NAME]; + char name[MLX5_MAX_IRQ_FORMATTED_NAME]; struct mlx5_irq_pool *pool; int refcount; struct msi_map map; @@ -292,8 +292,8 @@ struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i, else irq_sf_set_name(pool, name, i); ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh); - snprintf(irq->name, MLX5_MAX_IRQ_NAME, - "%s@pci:%s", name, pci_name(dev->pdev)); + snprintf(irq->name, MLX5_MAX_IRQ_FORMATTED_NAME, + MLX5_IRQ_NAME_FORMAT_STR, name, pci_name(dev->pdev)); err = request_irq(irq->map.virq, irq_int_handler, 0, irq->name, &irq->nh); if (err) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h index d3a77a0ab848..c4d377f8df30 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h @@ -7,6 +7,9 @@ #include #define MLX5_MAX_IRQ_NAME (32) +#define MLX5_IRQ_NAME_FORMAT_STR ("%s@pci:%s") +#define MLX5_MAX_IRQ_FORMATTED_NAME \ + (MLX5_MAX_IRQ_NAME + sizeof(MLX5_IRQ_NAME_FORMAT_STR)) /* max irq_index is 2047, so four chars */ #define MLX5_MAX_IRQ_IDX_CHARS (4) #define MLX5_EQ_REFS_PER_IRQ (2) From f31ed885c6da81a5357c22c68ac4abf432189c23 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 14 Nov 2023 13:58:44 -0800 Subject: [PATCH 0224/1397] net/mlx5e: Reduce the size of icosq_str [ Upstream commit dce94142842e119b982c27c1b62bd20890c7fd21 ] icosq_str size is unnecessarily too long, and it causes a build warning -Wformat-truncation with W=1. Looking closely, It doesn't need to be 255B, hence this patch reduces the size to 32B which should be more than enough to host the string: "ICOSQ: 0x%x, ". While here, add a missing space in the formatted string. This fixes the following build warning: $ KCFLAGS='-Wall -Werror' $ make O=/tmp/kbuild/linux W=1 -s -j12 drivers/net/ethernet/mellanox/mlx5/core/ drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c: In function 'mlx5e_reporter_rx_timeout': drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c:718:56: error: ', CQ: 0x' directive output may be truncated writing 8 bytes into a region of size between 0 and 255 [-Werror=format-truncation=] 718 | "RX timeout on channel: %d, %sRQ: 0x%x, CQ: 0x%x", | ^~~~~~~~ drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c:717:9: note: 'snprintf' output between 43 and 322 bytes into a destination of size 288 717 | snprintf(err_str, sizeof(err_str), | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 718 | "RX timeout on channel: %d, %sRQ: 0x%x, CQ: 0x%x", | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 719 | rq->ix, icosq_str, rq->rqn, rq->cq.mcq.cqn); | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fixes: 521f31af004a ("net/mlx5e: Allow RQ outside of channel context") Link: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=6d4ab2e97dcfbcd748ae71761a9d8e5e41cc732c Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20231114215846.5902-14-saeed@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index e8eea9ffd5eb..03b119a434bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -702,11 +702,11 @@ static int mlx5e_rx_reporter_dump(struct devlink_health_reporter *reporter, void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq) { - char icosq_str[MLX5E_REPORTER_PER_Q_MAX_LEN] = {}; char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; struct mlx5e_icosq *icosq = rq->icosq; struct mlx5e_priv *priv = rq->priv; struct mlx5e_err_ctx err_ctx = {}; + char icosq_str[32] = {}; err_ctx.ctx = rq; err_ctx.recover = mlx5e_rx_reporter_timeout_recover; @@ -715,7 +715,7 @@ void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq) if (icosq) snprintf(icosq_str, sizeof(icosq_str), "ICOSQ: 0x%x, ", icosq->sqn); snprintf(err_str, sizeof(err_str), - "RX timeout on channel: %d, %sRQ: 0x%x, CQ: 0x%x", + "RX timeout on channel: %d, %s RQ: 0x%x, CQ: 0x%x", rq->ix, icosq_str, rq->rqn, rq->cq.mcq.cqn); mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); From a97df7f5cd4798d96a67458d1a708aca6518d439 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Tue, 14 Nov 2023 13:58:45 -0800 Subject: [PATCH 0225/1397] net/mlx5e: Check return value of snprintf writing to fw_version buffer [ Upstream commit 41e63c2baa11dc2aa71df5dd27a5bd87d11b6bbb ] Treat the operation as an error case when the return value is equivalent to the size of the name buffer. Failed to write null terminator to the name buffer, making the string malformed and should not be used. Provide a string with only the firmware version when forming the string with the board id fails. Without check, will trigger -Wformat-truncation with W=1. drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c: In function 'mlx5e_ethtool_get_drvinfo': drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c:49:31: warning: '%.16s' directive output may be truncated writing up to 16 bytes into a region of size between 13 and 22 [-Wformat-truncation=] 49 | "%d.%d.%04d (%.16s)", | ^~~~~ drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c:48:9: note: 'snprintf' output between 12 and 37 bytes into a destination of size 32 48 | snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 49 | "%d.%d.%04d (%.16s)", | ~~~~~~~~~~~~~~~~~~~~~ 50 | fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev), | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 51 | mdev->board_id); | ~~~~~~~~~~~~~~~ Fixes: 84e11edb71de ("net/mlx5e: Show board id in ethtool driver information") Link: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=6d4ab2e97dcfbcd748ae71761a9d8e5e41cc732c Signed-off-by: Rahul Rameshbabu Reviewed-by: Dragos Tatulea Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index dff02434ff45..7c66bd73ddfa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -43,12 +43,17 @@ void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, struct ethtool_drvinfo *drvinfo) { struct mlx5_core_dev *mdev = priv->mdev; + int count; strscpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver)); - snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), - "%d.%d.%04d (%.16s)", - fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev), - mdev->board_id); + count = snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%d.%d.%04d (%.16s)", fw_rev_maj(mdev), + fw_rev_min(mdev), fw_rev_sub(mdev), mdev->board_id); + if (count == sizeof(drvinfo->fw_version)) + snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%d.%d.%04d", fw_rev_maj(mdev), + fw_rev_min(mdev), fw_rev_sub(mdev)); + strscpy(drvinfo->bus_info, dev_name(mdev->device), sizeof(drvinfo->bus_info)); } From 98ddd5bfec2b32a99d92494f015765b8efb0394c Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Tue, 14 Nov 2023 13:58:46 -0800 Subject: [PATCH 0226/1397] net/mlx5e: Check return value of snprintf writing to fw_version buffer for representors [ Upstream commit 1b2bd0c0264febcd8d47209079a6671c38e6558b ] Treat the operation as an error case when the return value is equivalent to the size of the name buffer. Failed to write null terminator to the name buffer, making the string malformed and should not be used. Provide a string with only the firmware version when forming the string with the board id fails. This logic for representors is identical to normal flow with ethtool. Without check, will trigger -Wformat-truncation with W=1. drivers/net/ethernet/mellanox/mlx5/core/en_rep.c: In function 'mlx5e_rep_get_drvinfo': drivers/net/ethernet/mellanox/mlx5/core/en_rep.c:78:31: warning: '%.16s' directive output may be truncated writing up to 16 bytes into a region of size between 13 and 22 [-Wformat-truncation=] 78 | "%d.%d.%04d (%.16s)", | ^~~~~ drivers/net/ethernet/mellanox/mlx5/core/en_rep.c:77:9: note: 'snprintf' output between 12 and 37 bytes into a destination of size 32 77 | snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 78 | "%d.%d.%04d (%.16s)", | ~~~~~~~~~~~~~~~~~~~~~ 79 | fw_rev_maj(mdev), fw_rev_min(mdev), | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 80 | fw_rev_sub(mdev), mdev->board_id); | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fixes: cf83c8fdcd47 ("net/mlx5e: Add missing ethtool driver info for representors") Link: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=6d4ab2e97dcfbcd748ae71761a9d8e5e41cc732c Signed-off-by: Rahul Rameshbabu Reviewed-by: Dragos Tatulea Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20231114215846.5902-16-saeed@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index fd1cce542b68..825f9c687633 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -71,13 +71,17 @@ static void mlx5e_rep_get_drvinfo(struct net_device *dev, { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; + int count; strscpy(drvinfo->driver, mlx5e_rep_driver_name, sizeof(drvinfo->driver)); - snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), - "%d.%d.%04d (%.16s)", - fw_rev_maj(mdev), fw_rev_min(mdev), - fw_rev_sub(mdev), mdev->board_id); + count = snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%d.%d.%04d (%.16s)", fw_rev_maj(mdev), + fw_rev_min(mdev), fw_rev_sub(mdev), mdev->board_id); + if (count == sizeof(drvinfo->fw_version)) + snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%d.%d.%04d", fw_rev_maj(mdev), + fw_rev_min(mdev), fw_rev_sub(mdev)); } static const struct counter_desc sw_rep_stats_desc[] = { From 0a7c9d1f550612fe0325b3987c2cdb7ec0546c58 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 13 Nov 2023 12:53:28 -0500 Subject: [PATCH 0227/1397] net: sched: do not offload flows with a helper in act_ct [ Upstream commit 7cd5af0e937a197295f3aa3721031f0fbae49cff ] There is no hardware supporting ct helper offload. However, prior to this patch, a flower filter with a helper in the ct action can be successfully set into the HW, for example (eth1 is a bnxt NIC): # tc qdisc add dev eth1 ingress_block 22 ingress # tc filter add block 22 proto ip flower skip_sw ip_proto tcp \ dst_port 21 ct_state -trk action ct helper ipv4-tcp-ftp # tc filter show dev eth1 ingress filter block 22 protocol ip pref 49152 flower chain 0 handle 0x1 eth_type ipv4 ip_proto tcp dst_port 21 ct_state -trk skip_sw in_hw in_hw_count 1 <---- action order 1: ct zone 0 helper ipv4-tcp-ftp pipe index 2 ref 1 bind 1 used_hw_stats delayed This might cause the flower filter not to work as expected in the HW. This patch avoids this problem by simply returning -EOPNOTSUPP in tcf_ct_offload_act_setup() to not allow to offload flows with a helper in act_ct. Fixes: a21b06e73191 ("net: sched: add helper support in act_ct") Signed-off-by: Xin Long Reviewed-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/f8685ec7702c4a448a1371a8b34b43217b583b9d.1699898008.git.lucien.xin@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- include/net/tc_act/tc_ct.h | 9 +++++++++ net/sched/act_ct.c | 3 +++ 2 files changed, 12 insertions(+) diff --git a/include/net/tc_act/tc_ct.h b/include/net/tc_act/tc_ct.h index b24ea2d9400b..1dc2f827d0bc 100644 --- a/include/net/tc_act/tc_ct.h +++ b/include/net/tc_act/tc_ct.h @@ -57,6 +57,11 @@ static inline struct nf_flowtable *tcf_ct_ft(const struct tc_action *a) return to_ct_params(a)->nf_ft; } +static inline struct nf_conntrack_helper *tcf_ct_helper(const struct tc_action *a) +{ + return to_ct_params(a)->helper; +} + #else static inline uint16_t tcf_ct_zone(const struct tc_action *a) { return 0; } static inline int tcf_ct_action(const struct tc_action *a) { return 0; } @@ -64,6 +69,10 @@ static inline struct nf_flowtable *tcf_ct_ft(const struct tc_action *a) { return NULL; } +static inline struct nf_conntrack_helper *tcf_ct_helper(const struct tc_action *a) +{ + return NULL; +} #endif /* CONFIG_NF_CONNTRACK */ #if IS_ENABLED(CONFIG_NET_ACT_CT) diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 3922d825ef2d..6dcc4585576e 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -1535,6 +1535,9 @@ static int tcf_ct_offload_act_setup(struct tc_action *act, void *entry_data, if (bind) { struct flow_action_entry *entry = entry_data; + if (tcf_ct_helper(act)) + return -EOPNOTSUPP; + entry->id = FLOW_ACTION_CT; entry->ct.action = tcf_ct_action(act); entry->ct.zone = tcf_ct_zone(act); From 7bd48f8a91bf56b738cac568a1972baf1b8532f6 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Tue, 14 Nov 2023 18:59:15 +0100 Subject: [PATCH 0228/1397] macvlan: Don't propagate promisc change to lower dev in passthru [ Upstream commit 7e1caeace0418381f36b3aa8403dfd82fc57fc53 ] Macvlan device in passthru mode sets its lower device promiscuous mode according to its MACVLAN_FLAG_NOPROMISC flag instead of synchronizing it to its own promiscuity setting. However, macvlan_change_rx_flags() function doesn't check the mode before propagating such changes to the lower device which can cause net_device->promiscuity counter overflow as illustrated by reproduction example [0] and resulting dmesg log [1]. Fix the issue by first verifying the mode in macvlan_change_rx_flags() function before propagating promiscuous mode change to the lower device. [0]: ip link add macvlan1 link enp8s0f0 type macvlan mode passthru ip link set macvlan1 promisc on ip l set dev macvlan1 up ip link set macvlan1 promisc off ip l set dev macvlan1 down ip l set dev macvlan1 up [1]: [ 5156.281724] macvlan1: entered promiscuous mode [ 5156.285467] mlx5_core 0000:08:00.0 enp8s0f0: entered promiscuous mode [ 5156.287639] macvlan1: left promiscuous mode [ 5156.288339] mlx5_core 0000:08:00.0 enp8s0f0: left promiscuous mode [ 5156.290907] mlx5_core 0000:08:00.0 enp8s0f0: entered promiscuous mode [ 5156.317197] mlx5_core 0000:08:00.0 enp8s0f0: promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken. Fixes: efdbd2b30caa ("macvlan: Propagate promiscuity setting to lower devices.") Reviewed-by: Gal Pressman Signed-off-by: Vlad Buslov Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20231114175915.1649154-1-vladbu@nvidia.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/macvlan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 02bd201bc7e5..c8da94af4161 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -780,7 +780,7 @@ static void macvlan_change_rx_flags(struct net_device *dev, int change) if (dev->flags & IFF_UP) { if (change & IFF_ALLMULTI) dev_set_allmulti(lowerdev, dev->flags & IFF_ALLMULTI ? 1 : -1); - if (change & IFF_PROMISC) + if (!macvlan_passthru(vlan->port) && change & IFF_PROMISC) dev_set_promiscuity(lowerdev, dev->flags & IFF_PROMISC ? 1 : -1); From b866371f9032a037ff861358782757ad1d765411 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Sat, 25 Mar 2023 21:57:07 +0800 Subject: [PATCH 0229/1397] tools/power/turbostat: Fix a knl bug [ Upstream commit 137f01b3529d292a68d22e9681e2f903c768f790 ] MSR_KNL_CORE_C6_RESIDENCY should be evaluated only if 1. this is KNL platform AND 2. need to get C6 residency or need to calculate C1 residency Fix the broken logic introduced by commit 1e9042b9c8d4 ("tools/power turbostat: Fix CPU%C1 display value"). Fixes: 1e9042b9c8d4 ("tools/power turbostat: Fix CPU%C1 display value") Signed-off-by: Zhang Rui Reviewed-by: Len Brown Signed-off-by: Sasha Levin --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 9a10512e3407..4e19bd2fa8b9 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2180,7 +2180,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) if ((DO_BIC(BIC_CPU_c6) || soft_c1_residency_display(BIC_CPU_c6)) && !do_knl_cstates) { if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6)) return -7; - } else if (do_knl_cstates || soft_c1_residency_display(BIC_CPU_c6)) { + } else if (do_knl_cstates && soft_c1_residency_display(BIC_CPU_c6)) { if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6)) return -7; } From 9b523bf810041c35d8c8105bb32102278c194f45 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Mon, 27 Mar 2023 11:17:44 +0800 Subject: [PATCH 0230/1397] tools/power/turbostat: Enable the C-state Pre-wake printing [ Upstream commit b61b7d8c4c22c4298a50ae5d0ee88facb85ce665 ] Currently the C-state Pre-wake will not be printed due to the probe has not been invoked. Invoke the probe function accordingly. Fixes: aeb01e6d71ff ("tools/power turbostat: Print the C-state Pre-wake settings") Signed-off-by: Chen Yu Reviewed-by: Zhang Rui Reviewed-by: Len Brown Signed-off-by: Sasha Levin --- tools/power/x86/turbostat/turbostat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 4e19bd2fa8b9..785de89077de 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -5790,6 +5790,7 @@ void process_cpuid() rapl_probe(family, model); perf_limit_reasons_probe(family, model); automatic_cstate_conversion_probe(family, model); + prewake_cstate_probe(family, model); check_tcc_offset(model_orig); From 6046c267323613bad832fd64b66d48ebae4533a0 Mon Sep 17 00:00:00 2001 From: Naomi Chu Date: Thu, 2 Nov 2023 13:24:24 +0800 Subject: [PATCH 0231/1397] scsi: ufs: core: Expand MCQ queue slot to DeviceQueueDepth + 1 [ Upstream commit defde5a50d91c74e1ce71a7f0bce7fb1ae311d84 ] The UFSHCI 4.0 specification mandates that there should always be at least one empty slot in each queue for distinguishing between full and empty states. Enlarge 'hwq->max_entries' to 'DeviceQueueDepth + 1' to allow UFSHCI 4.0 controllers to fully utilize MCQ queue slots. Fixes: 4682abfae2eb ("scsi: ufs: core: mcq: Allocate memory for MCQ mode") Signed-off-by: Naomi Chu Link: https://lore.kernel.org/r/20231102052426.12006-2-naomi.chu@mediatek.com Reviewed-by: Stanley Chu Reviewed-by: Bart Van Assche Reviewed-by: Peter Wang Reviewed-by: Chun-Hung Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/ufs/core/ufs-mcq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c index 2ba8ec254dce..5c75ab9d6bb5 100644 --- a/drivers/ufs/core/ufs-mcq.c +++ b/drivers/ufs/core/ufs-mcq.c @@ -436,7 +436,7 @@ int ufshcd_mcq_init(struct ufs_hba *hba) for (i = 0; i < hba->nr_hw_queues; i++) { hwq = &hba->uhq[i]; - hwq->max_entries = hba->nutrs; + hwq->max_entries = hba->nutrs + 1; spin_lock_init(&hwq->sq_lock); spin_lock_init(&hwq->cq_lock); mutex_init(&hwq->sq_mutex); From 6641ac164a0458988482250e3df83b9a238532d0 Mon Sep 17 00:00:00 2001 From: Anastasia Belova Date: Mon, 13 Nov 2023 17:52:32 +0300 Subject: [PATCH 0232/1397] cifs: spnego: add ';' in HOST_KEY_LEN [ Upstream commit ff31ba19d732efb9aca3633935d71085e68d5076 ] "host=" should start with ';' (as in cifs_get_spnego_key) So its length should be 6. Found by Linux Verification Center (linuxtesting.org) with SVACE. Reviewed-by: Paulo Alcantara (SUSE) Fixes: 7c9c3760b3a5 ("[CIFS] add constants for string lengths of keynames in SPNEGO upcall string") Signed-off-by: Anastasia Belova Co-developed-by: Ekaterina Esina Signed-off-by: Ekaterina Esina Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/cifs_spnego.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/cifs_spnego.c b/fs/smb/client/cifs_spnego.c index 6f3285f1dfee..af7849e5974f 100644 --- a/fs/smb/client/cifs_spnego.c +++ b/fs/smb/client/cifs_spnego.c @@ -64,8 +64,8 @@ struct key_type cifs_spnego_key_type = { * strlen(";sec=ntlmsspi") */ #define MAX_MECH_STR_LEN 13 -/* strlen of "host=" */ -#define HOST_KEY_LEN 5 +/* strlen of ";host=" */ +#define HOST_KEY_LEN 6 /* strlen of ";ip4=" or ";ip6=" */ #define IP_KEY_LEN 5 From 35867052aa591b4b36982d643323c43a4764914e Mon Sep 17 00:00:00 2001 From: Ekaterina Esina Date: Mon, 13 Nov 2023 19:42:41 +0300 Subject: [PATCH 0233/1397] cifs: fix check of rc in function generate_smb3signingkey [ Upstream commit 181724fc72486dec2bec8803459be05b5162aaa8 ] Remove extra check after condition, add check after generating key for encryption. The check is needed to return non zero rc before rewriting it with generating key for decryption. Found by Linux Verification Center (linuxtesting.org) with SVACE. Reviewed-by: Paulo Alcantara (SUSE) Fixes: d70e9fa55884 ("cifs: try opening channels after mounting") Signed-off-by: Ekaterina Esina Co-developed-by: Anastasia Belova Signed-off-by: Anastasia Belova Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/smb2transport.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index 23c50ed7d4b5..a136fc4cc2b5 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -452,6 +452,8 @@ generate_smb3signingkey(struct cifs_ses *ses, ptriplet->encryption.context, ses->smb3encryptionkey, SMB3_ENC_DEC_KEY_SIZE); + if (rc) + return rc; rc = generate_key(ses, ptriplet->decryption.label, ptriplet->decryption.context, ses->smb3decryptionkey, @@ -460,9 +462,6 @@ generate_smb3signingkey(struct cifs_ses *ses, return rc; } - if (rc) - return rc; - #ifdef CONFIG_CIFS_DEBUG_DUMP_KEYS cifs_dbg(VFS, "%s: dumping generated AES session keys\n", __func__); /* From 2635504d913f5c66d8853a12eb8eb852b5552824 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 9 Jun 2023 12:34:46 +0200 Subject: [PATCH 0234/1397] perf/core: Fix cpuctx refcounting commit 889c58b3155ff4c8e8671c95daef63d6fabbb6b1 upstream. Audit of the refcounting turned up that perf_pmu_migrate_context() fails to migrate the ctx refcount. Fixes: bd2756811766 ("perf: Rewrite core context handling") Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20230612093539.085862001@infradead.org Cc: Signed-off-by: Greg Kroah-Hartman --- include/linux/perf_event.h | 13 ++++++++----- kernel/events/core.c | 17 +++++++++++++++++ 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 16913318af93..e846f87e2d09 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -843,11 +843,11 @@ struct perf_event { }; /* - * ,-----------------------[1:n]----------------------. - * V V - * perf_event_context <-[1:n]-> perf_event_pmu_context <--- perf_event - * ^ ^ | | - * `--------[1:n]---------' `-[n:1]-> pmu <-[1:n]-' + * ,-----------------------[1:n]------------------------. + * V V + * perf_event_context <-[1:n]-> perf_event_pmu_context <-[1:n]- perf_event + * | | + * `--[n:1]-> pmu <-[1:n]--' * * * struct perf_event_pmu_context lifetime is refcount based and RCU freed @@ -865,6 +865,9 @@ struct perf_event { * ctx->mutex pinning the configuration. Since we hold a reference on * group_leader (through the filedesc) it can't go away, therefore it's * associated pmu_ctx must exist and cannot change due to ctx->mutex. + * + * perf_event holds a refcount on perf_event_context + * perf_event holds a refcount on perf_event_pmu_context */ struct perf_event_pmu_context { struct pmu *pmu; diff --git a/kernel/events/core.c b/kernel/events/core.c index 452c15d74732..6dbb03c53237 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4816,6 +4816,11 @@ find_get_pmu_context(struct pmu *pmu, struct perf_event_context *ctx, void *task_ctx_data = NULL; if (!ctx->task) { + /* + * perf_pmu_migrate_context() / __perf_pmu_install_event() + * relies on the fact that find_get_pmu_context() cannot fail + * for CPU contexts. + */ struct perf_cpu_pmu_context *cpc; cpc = per_cpu_ptr(pmu->cpu_pmu_context, event->cpu); @@ -12877,6 +12882,9 @@ static void __perf_pmu_install_event(struct pmu *pmu, int cpu, struct perf_event *event) { struct perf_event_pmu_context *epc; + struct perf_event_context *old_ctx = event->ctx; + + get_ctx(ctx); /* normally find_get_context() */ event->cpu = cpu; epc = find_get_pmu_context(pmu, ctx, event); @@ -12885,6 +12893,11 @@ static void __perf_pmu_install_event(struct pmu *pmu, if (event->state >= PERF_EVENT_STATE_OFF) event->state = PERF_EVENT_STATE_INACTIVE; perf_install_in_context(ctx, event, cpu); + + /* + * Now that event->ctx is updated and visible, put the old ctx. + */ + put_ctx(old_ctx); } static void __perf_pmu_install(struct perf_event_context *ctx, @@ -12923,6 +12936,10 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) struct perf_event_context *src_ctx, *dst_ctx; LIST_HEAD(events); + /* + * Since per-cpu context is persistent, no need to grab an extra + * reference. + */ src_ctx = &per_cpu_ptr(&perf_cpu_context, src_cpu)->ctx; dst_ctx = &per_cpu_ptr(&perf_cpu_context, dst_cpu)->ctx; From 10f49cdfd5fb342a1a9641930dc040c570694e98 Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Fri, 27 Oct 2023 10:28:22 -0700 Subject: [PATCH 0235/1397] i915/perf: Fix NULL deref bugs with drm_dbg() calls commit 471aa951bf1206d3c10d0daa67005b8e4db4ff83 upstream. When i915 perf interface is not available dereferencing it will lead to NULL dereferences. As returning -ENOTSUPP is pretty clear return when perf interface is not available. Fixes: 2fec539112e8 ("i915/perf: Replace DRM_DEBUG with driver specific drm_dbg call") Suggested-by: Tvrtko Ursulin Signed-off-by: Harshit Mogalapalli Reviewed-by: Tvrtko Ursulin Cc: # v6.0+ Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20231027172822.2753059-1-harshit.m.mogalapalli@oracle.com [tursulin: added stable tag] (cherry picked from commit 36f27350ff745bd228ab04d7845dfbffc177a889) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_perf.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 59e1e21df271..109135fcfca2 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -4286,11 +4286,8 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data, u32 known_open_flags; int ret; - if (!perf->i915) { - drm_dbg(&perf->i915->drm, - "i915 perf interface not available for this system\n"); + if (!perf->i915) return -ENOTSUPP; - } known_open_flags = I915_PERF_FLAG_FD_CLOEXEC | I915_PERF_FLAG_FD_NONBLOCK | @@ -4666,11 +4663,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, struct i915_oa_reg *regs; int err, id; - if (!perf->i915) { - drm_dbg(&perf->i915->drm, - "i915 perf interface not available for this system\n"); + if (!perf->i915) return -ENOTSUPP; - } if (!perf->metrics_kobj) { drm_dbg(&perf->i915->drm, @@ -4832,11 +4826,8 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, struct i915_oa_config *oa_config; int ret; - if (!perf->i915) { - drm_dbg(&perf->i915->drm, - "i915 perf interface not available for this system\n"); + if (!perf->i915) return -ENOTSUPP; - } if (i915_perf_stream_paranoid && !perfmon_capable()) { drm_dbg(&perf->i915->drm, From 1fd3a0b84f52b2591237a590d932af06ffeddc10 Mon Sep 17 00:00:00 2001 From: Ilkka Koskinen Date: Thu, 2 Nov 2023 17:16:54 -0700 Subject: [PATCH 0236/1397] perf: arm_cspmu: Reject events meant for other PMUs commit 15c7ef7341a2e54cfa12ac502c65d6fd2cce2b62 upstream. Coresight PMU driver didn't reject events meant for other PMUs. This caused some of the Core PMU events disappearing from the output of "perf list". In addition, trying to run e.g. $ perf stat -e r2 sleep 1 made Coresight PMU driver to handle the event instead of letting Core PMU driver to deal with it. Cc: stable@vger.kernel.org Fixes: e37dfd65731d ("perf: arm_cspmu: Add support for ARM CoreSight PMU driver") Signed-off-by: Ilkka Koskinen Acked-by: Will Deacon Reviewed-by: Besar Wicaksono Acked-by: Mark Rutland Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/20231103001654.35565-1-ilkka@os.amperecomputing.com Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- drivers/perf/arm_cspmu/arm_cspmu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c index e2b7827c4563..9363c31f31b8 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.c +++ b/drivers/perf/arm_cspmu/arm_cspmu.c @@ -635,6 +635,9 @@ static int arm_cspmu_event_init(struct perf_event *event) cspmu = to_arm_cspmu(event->pmu); + if (event->attr.type != event->pmu->type) + return -ENOENT; + /* * Following other "uncore" PMUs, we do not support sampling mode or * attach to a task (per-process mode). From 45a0de41ec383c8b7c6d442734ba3852dd2fc4a7 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Thu, 9 Nov 2023 09:21:28 +0100 Subject: [PATCH 0237/1397] drivers: perf: Check find_first_bit() return value commit c6e316ac05532febb0c966fa9b55f5258ed037be upstream. We must check the return value of find_first_bit() before using the return value as an index array since it happens to overflow the array and then panic: [ 107.318430] Kernel BUG [#1] [ 107.319434] CPU: 3 PID: 1238 Comm: kill Tainted: G E 6.6.0-rc6ubuntu-defconfig #2 [ 107.319465] Hardware name: riscv-virtio,qemu (DT) [ 107.319551] epc : pmu_sbi_ovf_handler+0x3a4/0x3ae [ 107.319840] ra : pmu_sbi_ovf_handler+0x52/0x3ae [ 107.319868] epc : ffffffff80a0a77c ra : ffffffff80a0a42a sp : ffffaf83fecda350 [ 107.319884] gp : ffffffff823961a8 tp : ffffaf8083db1dc0 t0 : ffffaf83fecda480 [ 107.319899] t1 : ffffffff80cafe62 t2 : 000000000000ff00 s0 : ffffaf83fecda520 [ 107.319921] s1 : ffffaf83fecda380 a0 : 00000018fca29df0 a1 : ffffffffffffffff [ 107.319936] a2 : 0000000001073734 a3 : 0000000000000004 a4 : 0000000000000000 [ 107.319951] a5 : 0000000000000040 a6 : 000000001d1c8774 a7 : 0000000000504d55 [ 107.319965] s2 : ffffffff82451f10 s3 : ffffffff82724e70 s4 : 000000000000003f [ 107.319980] s5 : 0000000000000011 s6 : ffffaf8083db27c0 s7 : 0000000000000000 [ 107.319995] s8 : 0000000000000001 s9 : 00007fffb45d6558 s10: 00007fffb45d81a0 [ 107.320009] s11: ffffaf7ffff60000 t3 : 0000000000000004 t4 : 0000000000000000 [ 107.320023] t5 : ffffaf7f80000000 t6 : ffffaf8000000000 [ 107.320037] status: 0000000200000100 badaddr: 0000000000000000 cause: 0000000000000003 [ 107.320081] [] pmu_sbi_ovf_handler+0x3a4/0x3ae [ 107.320112] [] handle_percpu_devid_irq+0x9e/0x1a0 [ 107.320131] [] generic_handle_domain_irq+0x28/0x36 [ 107.320148] [] riscv_intc_irq+0x36/0x4e [ 107.320166] [] handle_riscv_irq+0x54/0x86 [ 107.320189] [] do_irq+0x64/0x96 [ 107.320271] Code: 85a6 855e b097 ff7f 80e7 9220 b709 9002 4501 bbd9 (9002) 6097 [ 107.320585] ---[ end trace 0000000000000000 ]--- [ 107.320704] Kernel panic - not syncing: Fatal exception in interrupt [ 107.320775] SMP: stopping secondary CPUs [ 107.321219] Kernel Offset: 0x0 from 0xffffffff80000000 [ 107.333051] ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- Fixes: 4905ec2fb7e6 ("RISC-V: Add sscofpmf extension support") Signed-off-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20231109082128.40777-1-alexghiti@rivosinc.com Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: Greg Kroah-Hartman --- drivers/perf/riscv_pmu_sbi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index fcb0c70ca222..cd8a2b9efd78 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -687,6 +687,11 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev) /* Firmware counter don't support overflow yet */ fidx = find_first_bit(cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS); + if (fidx == RISCV_MAX_COUNTERS) { + csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num)); + return IRQ_NONE; + } + event = cpu_hw_evt->events[fidx]; if (!event) { csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num)); From 074aed64f345ae17287716b0f7ddd577b3153fb7 Mon Sep 17 00:00:00 2001 From: Vikash Garodia Date: Thu, 10 Aug 2023 07:55:01 +0530 Subject: [PATCH 0238/1397] media: venus: hfi: add checks to perform sanity on queue pointers commit 5e538fce33589da6d7cb2de1445b84d3a8a692f7 upstream. Read and write pointers are used to track the packet index in the memory shared between video driver and firmware. There is a possibility of OOB access if the read or write pointer goes beyond the queue memory size. Add checks for the read and write pointer to avoid OOB access. Cc: stable@vger.kernel.org Fixes: d96d3f30c0f2 ("[media] media: venus: hfi: add Venus HFI files") Signed-off-by: Vikash Garodia Signed-off-by: Stanimir Varbanov Signed-off-by: Hans Verkuil Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/qcom/venus/hfi_venus.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/media/platform/qcom/venus/hfi_venus.c b/drivers/media/platform/qcom/venus/hfi_venus.c index 19fc6575a489..f9437b6412b9 100644 --- a/drivers/media/platform/qcom/venus/hfi_venus.c +++ b/drivers/media/platform/qcom/venus/hfi_venus.c @@ -205,6 +205,11 @@ static int venus_write_queue(struct venus_hfi_device *hdev, new_wr_idx = wr_idx + dwords; wr_ptr = (u32 *)(queue->qmem.kva + (wr_idx << 2)); + + if (wr_ptr < (u32 *)queue->qmem.kva || + wr_ptr > (u32 *)(queue->qmem.kva + queue->qmem.size - sizeof(*wr_ptr))) + return -EINVAL; + if (new_wr_idx < qsize) { memcpy(wr_ptr, packet, dwords << 2); } else { @@ -272,6 +277,11 @@ static int venus_read_queue(struct venus_hfi_device *hdev, } rd_ptr = (u32 *)(queue->qmem.kva + (rd_idx << 2)); + + if (rd_ptr < (u32 *)queue->qmem.kva || + rd_ptr > (u32 *)(queue->qmem.kva + queue->qmem.size - sizeof(*rd_ptr))) + return -EINVAL; + dwords = *rd_ptr >> 2; if (!dwords) return -EINVAL; From ee534525baf87a8e82229fc4ad4a0743a5b431f5 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 28 Sep 2023 10:29:53 +0300 Subject: [PATCH 0239/1397] perf intel-pt: Fix async branch flags commit f2d87895cbc4af80649850dcf5da36de6b2ed3dd upstream. Ensure PERF_IP_FLAG_ASYNC is set always for asynchronous branches (i.e. interrupts etc). Fixes: 90e457f7be08 ("perf tools: Add Intel PT support") Cc: stable@vger.kernel.org Signed-off-by: Adrian Hunter Acked-by: Namhyung Kim Link: https://lore.kernel.org/r/20230928072953.19369-1-adrian.hunter@intel.com Signed-off-by: Namhyung Kim Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/intel-pt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index dbf0bc71a63b..f38893e0b036 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -1512,9 +1512,11 @@ static void intel_pt_sample_flags(struct intel_pt_queue *ptq) } else if (ptq->state->flags & INTEL_PT_ASYNC) { if (!ptq->state->to_ip) ptq->flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_ASYNC | PERF_IP_FLAG_TRACE_END; else if (ptq->state->from_nr && !ptq->state->to_nr) ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | + PERF_IP_FLAG_ASYNC | PERF_IP_FLAG_VMEXIT; else ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | From 63fe567a0668a893e8bb16faa8624e8c99c2dbd2 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 19 Oct 2023 01:34:23 +1000 Subject: [PATCH 0240/1397] powerpc/perf: Fix disabling BHRB and instruction sampling commit ea142e590aec55ba40c5affb4d49e68c713c63dc upstream. When the PMU is disabled, MMCRA is not updated to disable BHRB and instruction sampling. This can lead to those features remaining enabled, which can slow down a real or emulated CPU. Fixes: 1cade527f6e9 ("powerpc/perf: BHRB control to disable BHRB logic when not used") Cc: stable@vger.kernel.org # v5.9+ Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://msgid.link/20231018153423.298373-1-npiggin@gmail.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/perf/core-book3s.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 8c1f7def596e..10b946e9c6e7 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -1371,8 +1371,7 @@ static void power_pmu_disable(struct pmu *pmu) /* * Disable instruction sampling if it was enabled */ - if (cpuhw->mmcr.mmcra & MMCRA_SAMPLE_ENABLE) - val &= ~MMCRA_SAMPLE_ENABLE; + val &= ~MMCRA_SAMPLE_ENABLE; /* Disable BHRB via mmcra (BHRBRD) for p10 */ if (ppmu->flags & PPMU_ARCH_31) @@ -1383,7 +1382,7 @@ static void power_pmu_disable(struct pmu *pmu) * instruction sampling or BHRB. */ if (val != mmcra) { - mtspr(SPRN_MMCRA, mmcra); + mtspr(SPRN_MMCRA, val); mb(); isync(); } From 792473a4e0eb872443ff30bc96f2b7cd0d878619 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 6 Oct 2023 21:09:28 -0700 Subject: [PATCH 0241/1397] randstruct: Fix gcc-plugin performance mode to stay in group commit 381fdb73d1e2a48244de7260550e453d1003bb8e upstream. The performance mode of the gcc-plugin randstruct was shuffling struct members outside of the cache-line groups. Limit the range to the specified group indexes. Cc: linux-hardening@vger.kernel.org Cc: stable@vger.kernel.org Reported-by: Lukas Loidolt Closes: https://lore.kernel.org/all/f3ca77f0-e414-4065-83a5-ae4c4d25545d@student.tuwien.ac.at Fixes: 313dd1b62921 ("gcc-plugins: Add the randstruct plugin") Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- scripts/gcc-plugins/randomize_layout_plugin.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/scripts/gcc-plugins/randomize_layout_plugin.c b/scripts/gcc-plugins/randomize_layout_plugin.c index 5e5744b65f8a..910bd21d08f4 100644 --- a/scripts/gcc-plugins/randomize_layout_plugin.c +++ b/scripts/gcc-plugins/randomize_layout_plugin.c @@ -191,12 +191,14 @@ static void partition_struct(tree *fields, unsigned long length, struct partitio static void performance_shuffle(tree *newtree, unsigned long length, ranctx *prng_state) { - unsigned long i, x; + unsigned long i, x, index; struct partition_group size_group[length]; unsigned long num_groups = 0; unsigned long randnum; partition_struct(newtree, length, (struct partition_group *)&size_group, &num_groups); + + /* FIXME: this group shuffle is currently a no-op. */ for (i = num_groups - 1; i > 0; i--) { struct partition_group tmp; randnum = ranval(prng_state) % (i + 1); @@ -206,11 +208,14 @@ static void performance_shuffle(tree *newtree, unsigned long length, ranctx *prn } for (x = 0; x < num_groups; x++) { - for (i = size_group[x].start + size_group[x].length - 1; i > size_group[x].start; i--) { + for (index = size_group[x].length - 1; index > 0; index--) { tree tmp; + + i = size_group[x].start + index; if (DECL_BIT_FIELD_TYPE(newtree[i])) continue; - randnum = ranval(prng_state) % (i + 1); + randnum = ranval(prng_state) % (index + 1); + randnum += size_group[x].start; // we could handle this case differently if desired if (DECL_BIT_FIELD_TYPE(newtree[randnum])) continue; From 96474ea47dc67b0704392d59192b233c8197db0e Mon Sep 17 00:00:00 2001 From: Mark Hasemeyer Date: Tue, 7 Nov 2023 14:47:43 -0700 Subject: [PATCH 0242/1397] spi: Fix null dereference on suspend commit bef4a48f4ef798c4feddf045d49e53c8a97d5e37 upstream. A race condition exists where a synchronous (noqueue) transfer can be active during a system suspend. This can cause a null pointer dereference exception to occur when the system resumes. Example order of events leading to the exception: 1. spi_sync() calls __spi_transfer_message_noqueue() which sets ctlr->cur_msg 2. Spi transfer begins via spi_transfer_one_message() 3. System is suspended interrupting the transfer context 4. System is resumed 6. spi_controller_resume() calls spi_start_queue() which resets cur_msg to NULL 7. Spi transfer context resumes and spi_finalize_current_message() is called which dereferences cur_msg (which is now NULL) Wait for synchronous transfers to complete before suspending by acquiring the bus mutex and setting/checking a suspend flag. Signed-off-by: Mark Hasemeyer Link: https://lore.kernel.org/r/20231107144743.v1.1.I7987f05f61901f567f7661763646cb7d7919b528@changeid Signed-off-by: Mark Brown Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi.c | 56 ++++++++++++++++++++++++++++------------- include/linux/spi/spi.h | 1 + 2 files changed, 40 insertions(+), 17 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 8d6304cb061e..399e81d37b3b 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -3323,33 +3323,52 @@ void spi_unregister_controller(struct spi_controller *ctlr) } EXPORT_SYMBOL_GPL(spi_unregister_controller); +static inline int __spi_check_suspended(const struct spi_controller *ctlr) +{ + return ctlr->flags & SPI_CONTROLLER_SUSPENDED ? -ESHUTDOWN : 0; +} + +static inline void __spi_mark_suspended(struct spi_controller *ctlr) +{ + mutex_lock(&ctlr->bus_lock_mutex); + ctlr->flags |= SPI_CONTROLLER_SUSPENDED; + mutex_unlock(&ctlr->bus_lock_mutex); +} + +static inline void __spi_mark_resumed(struct spi_controller *ctlr) +{ + mutex_lock(&ctlr->bus_lock_mutex); + ctlr->flags &= ~SPI_CONTROLLER_SUSPENDED; + mutex_unlock(&ctlr->bus_lock_mutex); +} + int spi_controller_suspend(struct spi_controller *ctlr) { - int ret; + int ret = 0; /* Basically no-ops for non-queued controllers */ - if (!ctlr->queued) - return 0; - - ret = spi_stop_queue(ctlr); - if (ret) - dev_err(&ctlr->dev, "queue stop failed\n"); + if (ctlr->queued) { + ret = spi_stop_queue(ctlr); + if (ret) + dev_err(&ctlr->dev, "queue stop failed\n"); + } + __spi_mark_suspended(ctlr); return ret; } EXPORT_SYMBOL_GPL(spi_controller_suspend); int spi_controller_resume(struct spi_controller *ctlr) { - int ret; - - if (!ctlr->queued) - return 0; + int ret = 0; - ret = spi_start_queue(ctlr); - if (ret) - dev_err(&ctlr->dev, "queue restart failed\n"); + __spi_mark_resumed(ctlr); + if (ctlr->queued) { + ret = spi_start_queue(ctlr); + if (ret) + dev_err(&ctlr->dev, "queue restart failed\n"); + } return ret; } EXPORT_SYMBOL_GPL(spi_controller_resume); @@ -4153,8 +4172,7 @@ static void __spi_transfer_message_noqueue(struct spi_controller *ctlr, struct s ctlr->cur_msg = msg; ret = __spi_pump_transfer_message(ctlr, msg, was_busy); if (ret) - goto out; - + dev_err(&ctlr->dev, "noqueue transfer failed\n"); ctlr->cur_msg = NULL; ctlr->fallback = false; @@ -4170,7 +4188,6 @@ static void __spi_transfer_message_noqueue(struct spi_controller *ctlr, struct s spi_idle_runtime_pm(ctlr); } -out: mutex_unlock(&ctlr->io_mutex); } @@ -4193,6 +4210,11 @@ static int __spi_sync(struct spi_device *spi, struct spi_message *message) int status; struct spi_controller *ctlr = spi->controller; + if (__spi_check_suspended(ctlr)) { + dev_warn_once(&spi->dev, "Attempted to sync while suspend\n"); + return -ESHUTDOWN; + } + status = __spi_validate(spi, message); if (status != 0) return status; diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 7f8b478fdeb3..8cc7a99927f9 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -566,6 +566,7 @@ struct spi_controller { #define SPI_CONTROLLER_MUST_RX BIT(3) /* Requires rx */ #define SPI_CONTROLLER_MUST_TX BIT(4) /* Requires tx */ #define SPI_CONTROLLER_GPIO_SS BIT(5) /* GPIO CS must select slave */ +#define SPI_CONTROLLER_SUSPENDED BIT(6) /* Currently suspended */ /* Flag indicating if the allocation of this struct is devres-managed */ bool devm_allocated; From 156a9b745653fc81e73e4bf086943eac549f6e42 Mon Sep 17 00:00:00 2001 From: Hao Sun Date: Wed, 1 Nov 2023 13:33:51 +0100 Subject: [PATCH 0243/1397] bpf: Fix check_stack_write_fixed_off() to correctly spill imm commit 811c363645b33e6e22658634329e95f383dfc705 upstream. In check_stack_write_fixed_off(), imm value is cast to u32 before being spilled to the stack. Therefore, the sign information is lost, and the range information is incorrect when load from the stack again. For the following prog: 0: r2 = r10 1: *(u64*)(r2 -40) = -44 2: r0 = *(u64*)(r2 - 40) 3: if r0 s<= 0xa goto +2 4: r0 = 1 5: exit 6: r0 = 0 7: exit The verifier gives: func#0 @0 0: R1=ctx(off=0,imm=0) R10=fp0 0: (bf) r2 = r10 ; R2_w=fp0 R10=fp0 1: (7a) *(u64 *)(r2 -40) = -44 ; R2_w=fp0 fp-40_w=4294967252 2: (79) r0 = *(u64 *)(r2 -40) ; R0_w=4294967252 R2_w=fp0 fp-40_w=4294967252 3: (c5) if r0 s< 0xa goto pc+2 mark_precise: frame0: last_idx 3 first_idx 0 subseq_idx -1 mark_precise: frame0: regs=r0 stack= before 2: (79) r0 = *(u64 *)(r2 -40) 3: R0_w=4294967252 4: (b7) r0 = 1 ; R0_w=1 5: (95) exit verification time 7971 usec stack depth 40 processed 6 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0 So remove the incorrect cast, since imm field is declared as s32, and __mark_reg_known() takes u64, so imm would be correctly sign extended by compiler. Fixes: ecdf985d7615 ("bpf: track immediate values written to stack by BPF_ST instruction") Cc: stable@vger.kernel.org Signed-off-by: Hao Sun Acked-by: Shung-Hsi Yu Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20231101-fix-check-stack-write-v3-1-f05c2b1473d5@gmail.com Signed-off-by: Alexei Starovoitov Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 3e6bc21d6b17..173a8ae5e45f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4376,7 +4376,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, insn->imm != 0 && env->bpf_capable) { struct bpf_reg_state fake_reg = {}; - __mark_reg_known(&fake_reg, (u32)insn->imm); + __mark_reg_known(&fake_reg, insn->imm); fake_reg.type = SCALAR_VALUE; save_register_state(state, spi, &fake_reg, size); } else if (reg && is_spillable_regtype(reg->type)) { From 226b46d53bb8e1d49fc541d7851c09ab04d782b9 Mon Sep 17 00:00:00 2001 From: Shung-Hsi Yu Date: Thu, 2 Nov 2023 13:39:03 +0800 Subject: [PATCH 0244/1397] bpf: Fix precision tracking for BPF_ALU | BPF_TO_BE | BPF_END MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 291d044fd51f8484066300ee42afecf8c8db7b3a upstream. BPF_END and BPF_NEG has a different specification for the source bit in the opcode compared to other ALU/ALU64 instructions, and is either reserved or use to specify the byte swap endianness. In both cases the source bit does not encode source operand location, and src_reg is a reserved field. backtrack_insn() currently does not differentiate BPF_END and BPF_NEG from other ALU/ALU64 instructions, which leads to r0 being incorrectly marked as precise when processing BPF_ALU | BPF_TO_BE | BPF_END instructions. This commit teaches backtrack_insn() to correctly mark precision for such case. While precise tracking of BPF_NEG and other BPF_END instructions are correct and does not need fixing, this commit opt to process all BPF_NEG and BPF_END instructions within the same if-clause to better align with current convention used in the verifier (e.g. check_alu_op). Fixes: b5dc0163d8fd ("bpf: precise scalar_value tracking") Cc: stable@vger.kernel.org Reported-by: Mohamed Mahmoud Closes: https://lore.kernel.org/r/87jzrrwptf.fsf@toke.dk Tested-by: Toke Høiland-Jørgensen Tested-by: Tao Lyu Acked-by: Eduard Zingerman Signed-off-by: Shung-Hsi Yu Link: https://lore.kernel.org/r/20231102053913.12004-2-shung-hsi.yu@suse.com Signed-off-by: Alexei Starovoitov Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 173a8ae5e45f..824531d4c262 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3444,7 +3444,12 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, if (class == BPF_ALU || class == BPF_ALU64) { if (!bt_is_reg_set(bt, dreg)) return 0; - if (opcode == BPF_MOV) { + if (opcode == BPF_END || opcode == BPF_NEG) { + /* sreg is reserved and unused + * dreg still need precision before this insn + */ + return 0; + } else if (opcode == BPF_MOV) { if (BPF_SRC(insn->code) == BPF_X) { /* dreg = sreg or dreg = (s8, s16, s32)sreg * dreg needs precision after this insn From b6b6294f823983440e754bbecec9a674b081276c Mon Sep 17 00:00:00 2001 From: Ranjan Kumar Date: Fri, 20 Oct 2023 16:28:49 +0530 Subject: [PATCH 0245/1397] scsi: mpt3sas: Fix loop logic commit 3c978492c333f0c08248a8d51cecbe5eb5f617c9 upstream. The retry loop continues to iterate until the count reaches 30, even after receiving the correct value. Exit loop when a non-zero value is read. Fixes: 4ca10f3e3174 ("scsi: mpt3sas: Perform additional retries if doorbell read returns 0") Cc: stable@vger.kernel.org Signed-off-by: Ranjan Kumar Link: https://lore.kernel.org/r/20231020105849.6350-1-ranjan.kumar@broadcom.com Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/mpt3sas/mpt3sas_base.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 61a32bf00747..a75f670bf551 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -223,8 +223,8 @@ _base_readl_ext_retry(const void __iomem *addr) for (i = 0 ; i < 30 ; i++) { ret_val = readl(addr); - if (ret_val == 0) - continue; + if (ret_val != 0) + break; } return ret_val; From ad347c46384b9b95bf3403c4dd819700167a3754 Mon Sep 17 00:00:00 2001 From: Chandrakanth patil Date: Tue, 3 Oct 2023 16:30:18 +0530 Subject: [PATCH 0246/1397] scsi: megaraid_sas: Increase register read retry rount from 3 to 30 for selected registers commit 8e3ed9e786511ad800c33605ed904b9de49323cf upstream. In BMC environments with concurrent access to multiple registers, certain registers occasionally yield a value of 0 even after 3 retries due to hardware errata. As a fix, we have extended the retry count from 3 to 30. The same errata applies to the mpt3sas driver, and a similar patch has been accepted. Please find more details in the mpt3sas patch reference link. Link: https://lore.kernel.org/r/20230829090020.5417-2-ranjan.kumar@broadcom.com Fixes: 272652fcbf1a ("scsi: megaraid_sas: add retry logic in megasas_readl") Cc: stable@vger.kernel.org Signed-off-by: Chandrakanth patil Signed-off-by: Sumit Saxena Link: https://lore.kernel.org/r/20231003110021.168862-2-chandrakanth.patil@broadcom.com Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/megaraid/megaraid_sas_base.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index e1aa667dae66..3d4f13da1ae8 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -263,13 +263,13 @@ u32 megasas_readl(struct megasas_instance *instance, * Fusion registers could intermittently return all zeroes. * This behavior is transient in nature and subsequent reads will * return valid value. As a workaround in driver, retry readl for - * upto three times until a non-zero value is read. + * up to thirty times until a non-zero value is read. */ if (instance->adapter_type == AERO_SERIES) { do { ret_val = readl(addr); i++; - } while (ret_val == 0 && i < 3); + } while (ret_val == 0 && i < 30); return ret_val; } else { return readl(addr); From 98b3777034e39dfc10a9aff8a5888bd57a6fd2fd Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Fri, 8 Sep 2023 20:23:28 +0530 Subject: [PATCH 0247/1397] scsi: ufs: qcom: Update PHY settings only when scaling to higher gears commit fc88ca19ad0989dc0e4d4b126d5d0ba91f6cb616 upstream. The "hs_gear" variable is used to program the PHY settings (submode) during ufs_qcom_power_up_sequence(). Currently, it is being updated every time the agreed gear changes. Due to this, if the gear got downscaled before suspend (runtime/system), then while resuming, the PHY settings for the lower gear will be applied first and later when scaling to max gear with REINIT, the PHY settings for the max gear will be applied. This adds a latency while resuming and also really not needed as the PHY gear settings are backwards compatible i.e., we can continue using the PHY settings for max gear with lower gear speed. So let's update the "hs_gear" variable _only_ when the agreed gear is greater than the current one. This guarantees that the PHY settings will be changed only during probe time and fatal error condition. Due to this, UFSHCD_QUIRK_REINIT_AFTER_MAX_GEAR_SWITCH can now be skipped when the PM operation is in progress. Cc: stable@vger.kernel.org Fixes: 96a7141da332 ("scsi: ufs: core: Add support for reinitializing the UFS device") Reported-by: Can Guo Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20230908145329.154024-1-manivannan.sadhasivam@linaro.org Reviewed-by: Can Guo Tested-by: Can Guo Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/ufs/core/ufshcd.c | 3 ++- drivers/ufs/host/ufs-qcom.c | 9 +++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 5767642982c1..a4b483a393c9 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -8723,7 +8723,8 @@ static int ufshcd_probe_hba(struct ufs_hba *hba, bool init_dev_params) if (ret) goto out; - if (hba->quirks & UFSHCD_QUIRK_REINIT_AFTER_MAX_GEAR_SWITCH) { + if (!hba->pm_op_in_progress && + (hba->quirks & UFSHCD_QUIRK_REINIT_AFTER_MAX_GEAR_SWITCH)) { /* Reset the device and controller before doing reinit */ ufshcd_device_reset(hba); ufshcd_hba_stop(hba); diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c index d1149b1c3ed5..b1d720031251 100644 --- a/drivers/ufs/host/ufs-qcom.c +++ b/drivers/ufs/host/ufs-qcom.c @@ -909,8 +909,13 @@ static int ufs_qcom_pwr_change_notify(struct ufs_hba *hba, return ret; } - /* Use the agreed gear */ - host->hs_gear = dev_req_params->gear_tx; + /* + * Update hs_gear only when the gears are scaled to a higher value. This is because, + * the PHY gear settings are backwards compatible and we only need to change the PHY + * settings while scaling to higher gears. + */ + if (dev_req_params->gear_tx > host->hs_gear) + host->hs_gear = dev_req_params->gear_tx; /* enable the device ref clock before changing to HS mode */ if (!ufshcd_is_hs_mode(&hba->pwr_info) && From d03346d4ab4b68ca2bde66f5582d6f4e3433886d Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Mon, 30 Oct 2023 12:19:12 +0530 Subject: [PATCH 0248/1397] scsi: qla2xxx: Fix system crash due to bad pointer access commit 19597cad64d608aa8ac2f8aef50a50187a565223 upstream. User experiences system crash when running AER error injection. The perturbation causes the abort-all-I/O path to trigger. The driver assumes all I/O on this path is FCP only. If there is both NVMe & FCP traffic, a system crash happens. Add additional check to see if I/O is FCP or not before access. PID: 999019 TASK: ff35d769f24722c0 CPU: 53 COMMAND: "kworker/53:1" 0 [ff3f78b964847b58] machine_kexec at ffffffffae86973d 1 [ff3f78b964847ba8] __crash_kexec at ffffffffae9be29d 2 [ff3f78b964847c70] crash_kexec at ffffffffae9bf528 3 [ff3f78b964847c78] oops_end at ffffffffae8282ab 4 [ff3f78b964847c98] exc_page_fault at ffffffffaf2da502 5 [ff3f78b964847cc0] asm_exc_page_fault at ffffffffaf400b62 [exception RIP: qla2x00_abort_srb+444] RIP: ffffffffc07b5f8c RSP: ff3f78b964847d78 RFLAGS: 00010046 RAX: 0000000000000282 RBX: ff35d74a0195a200 RCX: ff35d76886fd03a0 RDX: 0000000000000001 RSI: ffffffffc07c5ec8 RDI: ff35d74a0195a200 RBP: ff35d76913d22080 R8: ff35d7694d103200 R9: ff35d7694d103200 R10: 0000000100000000 R11: ffffffffb05d6630 R12: 0000000000010000 R13: ff3f78b964847df8 R14: ff35d768d8754000 R15: ff35d768877248e0 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 6 [ff3f78b964847d70] qla2x00_abort_srb at ffffffffc07b5f84 [qla2xxx] 7 [ff3f78b964847de0] __qla2x00_abort_all_cmds at ffffffffc07b6238 [qla2xxx] 8 [ff3f78b964847e38] qla2x00_abort_all_cmds at ffffffffc07ba635 [qla2xxx] 9 [ff3f78b964847e58] qla2x00_terminate_rport_io at ffffffffc08145eb [qla2xxx] 10 [ff3f78b964847e70] fc_terminate_rport_io at ffffffffc045987e [scsi_transport_fc] 11 [ff3f78b964847e88] process_one_work at ffffffffae914f15 12 [ff3f78b964847ed0] worker_thread at ffffffffae9154c0 13 [ff3f78b964847f10] kthread at ffffffffae91c456 14 [ff3f78b964847f50] ret_from_fork at ffffffffae8036ef Cc: stable@vger.kernel.org Fixes: f45bca8c5052 ("scsi: qla2xxx: Fix double scsi_done for abort path") Signed-off-by: Quinn Tran Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20231030064912.37912-1-njavali@marvell.com Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_os.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index dcae09a37d49..c45eef743c45 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -1836,8 +1836,16 @@ static void qla2x00_abort_srb(struct qla_qpair *qp, srb_t *sp, const int res, } spin_lock_irqsave(qp->qp_lock_ptr, *flags); - if (ret_cmd && blk_mq_request_started(scsi_cmd_to_rq(cmd))) - sp->done(sp, res); + switch (sp->type) { + case SRB_SCSI_CMD: + if (ret_cmd && blk_mq_request_started(scsi_cmd_to_rq(cmd))) + sp->done(sp, res); + break; + default: + if (ret_cmd) + sp->done(sp, res); + break; + } } else { sp->done(sp, res); } From f84d461f33a6b27304d468d9cfb56c0cefdb4ee7 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Mon, 6 Nov 2023 15:51:17 +0800 Subject: [PATCH 0249/1397] scsi: ufs: core: Fix racing issue between ufshcd_mcq_abort() and ISR commit 27900d7119c464b43cd9eac69c85884d17bae240 upstream. If command timeout happens and cq complete IRQ is raised at the same time, ufshcd_mcq_abort clears lprb->cmd and a NULL pointer deref happens in the ISR. Error log: ufshcd_abort: Device abort task at tag 18 Unable to handle kernel NULL pointer dereference at virtual address 0000000000000108 pc : [0xffffffe27ef867ac] scsi_dma_unmap+0xc/0x44 lr : [0xffffffe27f1b898c] ufshcd_release_scsi_cmd+0x24/0x114 Fixes: f1304d442077 ("scsi: ufs: mcq: Added ufshcd_mcq_abort()") Cc: stable@vger.kernel.org Signed-off-by: Peter Wang Link: https://lore.kernel.org/r/20231106075117.8995-1-peter.wang@mediatek.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/ufs/core/ufs-mcq.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c index 5c75ab9d6bb5..0787456c2b89 100644 --- a/drivers/ufs/core/ufs-mcq.c +++ b/drivers/ufs/core/ufs-mcq.c @@ -630,6 +630,7 @@ int ufshcd_mcq_abort(struct scsi_cmnd *cmd) int tag = scsi_cmd_to_rq(cmd)->tag; struct ufshcd_lrb *lrbp = &hba->lrb[tag]; struct ufs_hw_queue *hwq; + unsigned long flags; int err = FAILED; if (!ufshcd_cmd_inflight(lrbp->cmd)) { @@ -670,8 +671,10 @@ int ufshcd_mcq_abort(struct scsi_cmnd *cmd) } err = SUCCESS; + spin_lock_irqsave(&hwq->cq_lock, flags); if (ufshcd_cmd_inflight(lrbp->cmd)) ufshcd_release_scsi_cmd(hba, lrbp); + spin_unlock_irqrestore(&hwq->cq_lock, flags); out: return err; From 4cb2be4c7305e90e1ffa50af4d6d34b4ba5181bc Mon Sep 17 00:00:00 2001 From: Rick Edgecombe Date: Tue, 7 Nov 2023 10:22:51 -0800 Subject: [PATCH 0250/1397] x86/shstk: Delay signal entry SSP write until after user accesses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 31255e072b2e91f97645d792d25b2db744186dd1 upstream. When a signal is being delivered, the kernel needs to make accesses to userspace. These accesses could encounter an access error, in which case the signal delivery itself will trigger a segfault. Usually this would result in the kernel killing the process. But in the case of a SEGV signal handler being configured, the failure of the first signal delivery will result in *another* signal getting delivered. The second signal may succeed if another thread has resolved the issue that triggered the segfault (i.e. a well timed mprotect()/mmap()), or the second signal is being delivered to another stack (i.e. an alt stack). On x86, in the non-shadow stack case, all the accesses to userspace are done before changes to the registers (in pt_regs). The operation is aborted when an access error occurs, so although there may be writes done for the first signal, control flow changes for the signal (regs->ip, regs->sp, etc) are not committed until all the accesses have already completed successfully. This means that the second signal will be delivered as if it happened at the time of the first signal. It will effectively replace the first aborted signal, overwriting the half-written frame of the aborted signal. So on sigreturn from the second signal, control flow will resume happily from the point of control flow where the original signal was delivered. The problem is, when shadow stack is active, the shadow stack SSP register/MSR is updated *before* some of the userspace accesses. This means if the earlier accesses succeed and the later ones fail, the second signal will not be delivered at the same spot on the shadow stack as the first one. So on sigreturn from the second signal, the SSP will be pointing to the wrong location on the shadow stack (off by a frame). Pengfei privately reported that while using a shadow stack enabled glibc, the “signal06” test in the LTP test-suite hung. It turns out it is testing the above described double signal scenario. When this test was compiled with shadow stack, the first signal pushed a shadow stack sigframe, then the second pushed another. When the second signal was handled, the SSP was at the first shadow stack signal frame instead of the original location. The test then got stuck as the #CP from the twice incremented SSP was incorrect and generated segfaults in a loop. Fix this by adjusting the SSP register only after any userspace accesses, such that there can be no failures after the SSP is adjusted. Do this by moving the shadow stack sigframe push logic to happen after all other userspace accesses. Note, sigreturn (as opposed to the signal delivery dealt with in this patch) has ordering behavior that could lead to similar failures. The ordering issues there extend beyond shadow stack to include the alt stack restoration. Fixing that would require cross-arch changes, and the ordering today does not cause any known test or apps breakages. So leave it as is, for now. [ dhansen: minor changelog/subject tweak ] Fixes: 05e36022c054 ("x86/shstk: Handle signals for shadow stack") Reported-by: Pengfei Xu Signed-off-by: Rick Edgecombe Signed-off-by: Dave Hansen Tested-by: Pengfei Xu Cc:stable@vger.kernel.org Link: https://lore.kernel.org/all/20231107182251.91276-1-rick.p.edgecombe%40intel.com Link: https://github.com/linux-test-project/ltp/blob/master/testcases/kernel/syscalls/signal/signal06.c Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/signal_64.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index cacf2ede6217..23d8aaf8d9fd 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -175,9 +175,6 @@ int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) frame = get_sigframe(ksig, regs, sizeof(struct rt_sigframe), &fp); uc_flags = frame_uc_flags(regs); - if (setup_signal_shadow_stack(ksig)) - return -EFAULT; - if (!user_access_begin(frame, sizeof(*frame))) return -EFAULT; @@ -198,6 +195,9 @@ int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) return -EFAULT; } + if (setup_signal_shadow_stack(ksig)) + return -EFAULT; + /* Set up registers for signal handler */ regs->di = ksig->sig; /* In case the signal handler was declared without prototypes */ From 20b951fcdce135e2013e82e1e837d286872d8880 Mon Sep 17 00:00:00 2001 From: Roxana Nicolescu Date: Fri, 15 Sep 2023 12:23:25 +0200 Subject: [PATCH 0251/1397] crypto: x86/sha - load modules based on CPU features commit 1c43c0f1f84aa59dfc98ce66f0a67b2922aa7f9d upstream. x86 optimized crypto modules are built as modules rather than build-in and they are not loaded when the crypto API is initialized, resulting in the generic builtin module (sha1-generic) being used instead. It was discovered when creating a sha1/sha256 checksum of a 2Gb file by using kcapi-tools because it would take significantly longer than creating a sha512 checksum of the same file. trace-cmd showed that for sha1/256 the generic module was used, whereas for sha512 the optimized module was used instead. Add module aliases() for these x86 optimized crypto modules based on CPU feature bits so udev gets a chance to load them later in the boot process. This resulted in ~3x decrease in the real-time execution of kcapi-dsg. Fix is inspired from commit aa031b8f702e ("crypto: x86/sha512 - load based on CPU features") where a similar fix was done for sha512. Cc: stable@vger.kernel.org # 5.15+ Suggested-by: Dimitri John Ledkov Suggested-by: Julian Andres Klode Signed-off-by: Roxana Nicolescu Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/x86/crypto/sha1_ssse3_glue.c | 12 ++++++++++++ arch/x86/crypto/sha256_ssse3_glue.c | 12 ++++++++++++ 2 files changed, 24 insertions(+) diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index 44340a1139e0..959afa705e95 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -24,8 +24,17 @@ #include #include #include +#include #include +static const struct x86_cpu_id module_cpu_ids[] = { + X86_MATCH_FEATURE(X86_FEATURE_AVX2, NULL), + X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL), + X86_MATCH_FEATURE(X86_FEATURE_SSSE3, NULL), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids); + static int sha1_update(struct shash_desc *desc, const u8 *data, unsigned int len, sha1_block_fn *sha1_xform) { @@ -301,6 +310,9 @@ static inline void unregister_sha1_ni(void) { } static int __init sha1_ssse3_mod_init(void) { + if (!x86_match_cpu(module_cpu_ids)) + return -ENODEV; + if (register_sha1_ssse3()) goto fail; diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index 3a5f6be7dbba..d25235f0ccaf 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c @@ -38,11 +38,20 @@ #include #include #include +#include #include asmlinkage void sha256_transform_ssse3(struct sha256_state *state, const u8 *data, int blocks); +static const struct x86_cpu_id module_cpu_ids[] = { + X86_MATCH_FEATURE(X86_FEATURE_AVX2, NULL), + X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL), + X86_MATCH_FEATURE(X86_FEATURE_SSSE3, NULL), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids); + static int _sha256_update(struct shash_desc *desc, const u8 *data, unsigned int len, sha256_block_fn *sha256_xform) { @@ -366,6 +375,9 @@ static inline void unregister_sha256_ni(void) { } static int __init sha256_ssse3_mod_init(void) { + if (!x86_match_cpu(module_cpu_ids)) + return -ENODEV; + if (register_sha256_ssse3()) goto fail; From 3f6154cb2908228225ab92be51fe99ee30459864 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 4 Oct 2023 09:49:59 -0500 Subject: [PATCH 0252/1397] x86/PCI: Avoid PME from D3hot/D3cold for AMD Rembrandt and Phoenix USB4 commit 7d08f21f8c6307cb05cabb8d86e90ff6ccba57e9 upstream. Iain reports that USB devices can't be used to wake a Lenovo Z13 from suspend. This occurs because on some AMD platforms, even though the Root Ports advertise PME_Support for D3hot and D3cold, wakeup events from devices on a USB4 controller don't result in wakeup interrupts from the Root Port when amd-pmc has put the platform in a hardware sleep state. If amd-pmc will be involved in the suspend, remove D3hot and D3cold from the PME_Support mask of Root Ports above USB4 controllers so we avoid those states if we need wakeups. Restore D3 support at resume so that it can be used by runtime suspend. This affects both AMD Rembrandt and Phoenix SoCs. "pm_suspend_target_state == PM_SUSPEND_ON" means we're doing runtime suspend, and amd-pmc will not be involved. In that case PMEs work as advertised in D3hot/D3cold, so we don't need to do anything. Note that amd-pmc is technically optional, and there's no need for this quirk if it's not present, but we assume it's always present because power consumption is so high without it. Fixes: 9d26d3a8f1b0 ("PCI: Put PCIe ports into D3 during suspend") Link: https://lore.kernel.org/r/20231004144959.158840-1-mario.limonciello@amd.com Reported-by: Iain Lane Closes: https://forums.lenovo.com/t5/Ubuntu/Z13-can-t-resume-from-suspend-with-external-USB-keyboard/m-p/5217121 Signed-off-by: Mario Limonciello [bhelgaas: commit log, move to arch/x86/pci/fixup.c, add #includes] Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/pci/fixup.c | 59 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index e3ec02e6ac9f..f347c20247d3 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -3,9 +3,11 @@ * Exceptions for specific devices. Usually work-arounds for fatal design flaws. */ +#include #include #include #include +#include #include #include #include @@ -904,3 +906,60 @@ static void chromeos_fixup_apl_pci_l1ss_capability(struct pci_dev *dev) } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x5ad6, chromeos_save_apl_pci_l1ss_capability); DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, 0x5ad6, chromeos_fixup_apl_pci_l1ss_capability); + +#ifdef CONFIG_SUSPEND +/* + * Root Ports on some AMD SoCs advertise PME_Support for D3hot and D3cold, but + * if the SoC is put into a hardware sleep state by the amd-pmc driver, the + * Root Ports don't generate wakeup interrupts for USB devices. + * + * When suspending, remove D3hot and D3cold from the PME_Support advertised + * by the Root Port so we don't use those states if we're expecting wakeup + * interrupts. Restore the advertised PME_Support when resuming. + */ +static void amd_rp_pme_suspend(struct pci_dev *dev) +{ + struct pci_dev *rp; + + /* + * PM_SUSPEND_ON means we're doing runtime suspend, which means + * amd-pmc will not be involved so PMEs during D3 work as advertised. + * + * The PMEs *do* work if amd-pmc doesn't put the SoC in the hardware + * sleep state, but we assume amd-pmc is always present. + */ + if (pm_suspend_target_state == PM_SUSPEND_ON) + return; + + rp = pcie_find_root_port(dev); + if (!rp->pm_cap) + return; + + rp->pme_support &= ~((PCI_PM_CAP_PME_D3hot|PCI_PM_CAP_PME_D3cold) >> + PCI_PM_CAP_PME_SHIFT); + dev_info_once(&rp->dev, "quirk: disabling D3cold for suspend\n"); +} + +static void amd_rp_pme_resume(struct pci_dev *dev) +{ + struct pci_dev *rp; + u16 pmc; + + rp = pcie_find_root_port(dev); + if (!rp->pm_cap) + return; + + pci_read_config_word(rp, rp->pm_cap + PCI_PM_PMC, &pmc); + rp->pme_support = FIELD_GET(PCI_PM_CAP_PME_MASK, pmc); +} +/* Rembrandt (yellow_carp) */ +DECLARE_PCI_FIXUP_SUSPEND(PCI_VENDOR_ID_AMD, 0x162e, amd_rp_pme_suspend); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x162e, amd_rp_pme_resume); +DECLARE_PCI_FIXUP_SUSPEND(PCI_VENDOR_ID_AMD, 0x162f, amd_rp_pme_suspend); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x162f, amd_rp_pme_resume); +/* Phoenix (pink_sardine) */ +DECLARE_PCI_FIXUP_SUSPEND(PCI_VENDOR_ID_AMD, 0x1668, amd_rp_pme_suspend); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1668, amd_rp_pme_resume); +DECLARE_PCI_FIXUP_SUSPEND(PCI_VENDOR_ID_AMD, 0x1669, amd_rp_pme_suspend); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1669, amd_rp_pme_resume); +#endif /* CONFIG_SUSPEND */ From 119f7373b0cba03c4b5a86a936c0384c074a8c94 Mon Sep 17 00:00:00 2001 From: Koichiro Den Date: Thu, 26 Oct 2023 12:20:36 +0900 Subject: [PATCH 0253/1397] x86/apic/msi: Fix misconfigured non-maskable MSI quirk commit b56ebe7c896dc78b5865ec2c4b1dae3c93537517 upstream. commit ef8dd01538ea ("genirq/msi: Make interrupt allocation less convoluted"), reworked the code so that the x86 specific quirk for affinity setting of non-maskable PCI/MSI interrupts is not longer activated if necessary. This could be solved by restoring the original logic in the core MSI code, but after a deeper analysis it turned out that the quirk flag is not required at all. The quirk is only required when the PCI/MSI device cannot mask the MSI interrupts, which in turn also prevents reservation mode from being enabled for the affected interrupt. This allows ot remove the NOMASK quirk bit completely as msi_set_affinity() can instead check whether reservation mode is enabled for the interrupt, which gives exactly the same answer. Even in the momentary non-existing case that the reservation mode would be not set for a maskable MSI interrupt this would not cause any harm as it just would cause msi_set_affinity() to go needlessly through the functionaly equivalent slow path, which works perfectly fine with maskable interrupts as well. Rework msi_set_affinity() to query the reservation mode and remove all NOMASK quirk logic from the core code. [ tglx: Massaged changelog ] Fixes: ef8dd01538ea ("genirq/msi: Make interrupt allocation less convoluted") Suggested-by: Thomas Gleixner Signed-off-by: Koichiro Den Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231026032036.2462428-1-den@valinux.co.jp Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/apic/msi.c | 8 +++----- include/linux/irq.h | 26 ++++---------------------- include/linux/msi.h | 6 ------ kernel/irq/debugfs.c | 1 - kernel/irq/msi.c | 12 +----------- 5 files changed, 8 insertions(+), 45 deletions(-) diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 6b6b711678fe..d9651f15ae4f 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -55,14 +55,14 @@ msi_set_affinity(struct irq_data *irqd, const struct cpumask *mask, bool force) * caused by the non-atomic update of the address/data pair. * * Direct update is possible when: - * - The MSI is maskable (remapped MSI does not use this code path)). - * The quirk bit is not set in this case. + * - The MSI is maskable (remapped MSI does not use this code path). + * The reservation mode bit is set in this case. * - The new vector is the same as the old vector * - The old vector is MANAGED_IRQ_SHUTDOWN_VECTOR (interrupt starts up) * - The interrupt is not yet started up * - The new destination CPU is the same as the old destination CPU */ - if (!irqd_msi_nomask_quirk(irqd) || + if (!irqd_can_reserve(irqd) || cfg->vector == old_cfg.vector || old_cfg.vector == MANAGED_IRQ_SHUTDOWN_VECTOR || !irqd_is_started(irqd) || @@ -215,8 +215,6 @@ static bool x86_init_dev_msi_info(struct device *dev, struct irq_domain *domain, if (WARN_ON_ONCE(domain != real_parent)) return false; info->chip->irq_set_affinity = msi_set_affinity; - /* See msi_set_affinity() for the gory details */ - info->flags |= MSI_FLAG_NOMASK_QUIRK; break; case DOMAIN_BUS_DMAR: case DOMAIN_BUS_AMDVI: diff --git a/include/linux/irq.h b/include/linux/irq.h index d8a6fdce9373..90081afa10ce 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -215,8 +215,6 @@ struct irq_data { * IRQD_SINGLE_TARGET - IRQ allows only a single affinity target * IRQD_DEFAULT_TRIGGER_SET - Expected trigger already been set * IRQD_CAN_RESERVE - Can use reservation mode - * IRQD_MSI_NOMASK_QUIRK - Non-maskable MSI quirk for affinity change - * required * IRQD_HANDLE_ENFORCE_IRQCTX - Enforce that handle_irq_*() is only invoked * from actual interrupt context. * IRQD_AFFINITY_ON_ACTIVATE - Affinity is set on activation. Don't call @@ -247,11 +245,10 @@ enum { IRQD_SINGLE_TARGET = BIT(24), IRQD_DEFAULT_TRIGGER_SET = BIT(25), IRQD_CAN_RESERVE = BIT(26), - IRQD_MSI_NOMASK_QUIRK = BIT(27), - IRQD_HANDLE_ENFORCE_IRQCTX = BIT(28), - IRQD_AFFINITY_ON_ACTIVATE = BIT(29), - IRQD_IRQ_ENABLED_ON_SUSPEND = BIT(30), - IRQD_RESEND_WHEN_IN_PROGRESS = BIT(31), + IRQD_HANDLE_ENFORCE_IRQCTX = BIT(27), + IRQD_AFFINITY_ON_ACTIVATE = BIT(28), + IRQD_IRQ_ENABLED_ON_SUSPEND = BIT(29), + IRQD_RESEND_WHEN_IN_PROGRESS = BIT(30), }; #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) @@ -426,21 +423,6 @@ static inline bool irqd_can_reserve(struct irq_data *d) return __irqd_to_state(d) & IRQD_CAN_RESERVE; } -static inline void irqd_set_msi_nomask_quirk(struct irq_data *d) -{ - __irqd_to_state(d) |= IRQD_MSI_NOMASK_QUIRK; -} - -static inline void irqd_clr_msi_nomask_quirk(struct irq_data *d) -{ - __irqd_to_state(d) &= ~IRQD_MSI_NOMASK_QUIRK; -} - -static inline bool irqd_msi_nomask_quirk(struct irq_data *d) -{ - return __irqd_to_state(d) & IRQD_MSI_NOMASK_QUIRK; -} - static inline void irqd_set_affinity_on_activate(struct irq_data *d) { __irqd_to_state(d) |= IRQD_AFFINITY_ON_ACTIVATE; diff --git a/include/linux/msi.h b/include/linux/msi.h index a50ea79522f8..ddace8c34dcf 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -547,12 +547,6 @@ enum { MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS = (1 << 5), /* Free MSI descriptors */ MSI_FLAG_FREE_MSI_DESCS = (1 << 6), - /* - * Quirk to handle MSI implementations which do not provide - * masking. Currently known to affect x86, but has to be partially - * handled in the core MSI code. - */ - MSI_FLAG_NOMASK_QUIRK = (1 << 7), /* Mask for the generic functionality */ MSI_GENERIC_FLAGS_MASK = GENMASK(15, 0), diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c index 5971a66be034..aae0402507ed 100644 --- a/kernel/irq/debugfs.c +++ b/kernel/irq/debugfs.c @@ -121,7 +121,6 @@ static const struct irq_bit_descr irqdata_states[] = { BIT_MASK_DESCR(IRQD_AFFINITY_ON_ACTIVATE), BIT_MASK_DESCR(IRQD_MANAGED_SHUTDOWN), BIT_MASK_DESCR(IRQD_CAN_RESERVE), - BIT_MASK_DESCR(IRQD_MSI_NOMASK_QUIRK), BIT_MASK_DESCR(IRQD_FORWARDED_TO_VCPU), diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index b4c31a5c1147..79b4a58ba9c3 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -1204,7 +1204,6 @@ static int msi_handle_pci_fail(struct irq_domain *domain, struct msi_desc *desc, #define VIRQ_CAN_RESERVE 0x01 #define VIRQ_ACTIVATE 0x02 -#define VIRQ_NOMASK_QUIRK 0x04 static int msi_init_virq(struct irq_domain *domain, int virq, unsigned int vflags) { @@ -1213,8 +1212,6 @@ static int msi_init_virq(struct irq_domain *domain, int virq, unsigned int vflag if (!(vflags & VIRQ_CAN_RESERVE)) { irqd_clr_can_reserve(irqd); - if (vflags & VIRQ_NOMASK_QUIRK) - irqd_set_msi_nomask_quirk(irqd); /* * If the interrupt is managed but no CPU is available to @@ -1275,15 +1272,8 @@ static int __msi_domain_alloc_irqs(struct device *dev, struct irq_domain *domain * Interrupt can use a reserved vector and will not occupy * a real device vector until the interrupt is requested. */ - if (msi_check_reservation_mode(domain, info, dev)) { + if (msi_check_reservation_mode(domain, info, dev)) vflags |= VIRQ_CAN_RESERVE; - /* - * MSI affinity setting requires a special quirk (X86) when - * reservation mode is active. - */ - if (info->flags & MSI_FLAG_NOMASK_QUIRK) - vflags |= VIRQ_NOMASK_QUIRK; - } xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) { if (!msi_desc_match(desc, MSI_DESC_NOTASSOCIATED)) From 8e780a58c181539d3286f9d03fc945476c79cc6f Mon Sep 17 00:00:00 2001 From: Pu Wen Date: Mon, 14 Aug 2023 10:18:26 +0200 Subject: [PATCH 0254/1397] x86/cpu/hygon: Fix the CPU topology evaluation for real commit ee545b94d39a00c93dc98b1dbcbcf731d2eadeb4 upstream. Hygon processors with a model ID > 3 have CPUID leaf 0xB correctly populated and don't need the fixed package ID shift workaround. The fixup is also incorrect when running in a guest. Fixes: e0ceeae708ce ("x86/CPU/hygon: Fix phys_proc_id calculation logic for multi-die processors") Signed-off-by: Pu Wen Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Cc: Link: https://lore.kernel.org/r/tencent_594804A808BD93A4EBF50A994F228E3A7F07@qq.com Link: https://lore.kernel.org/r/20230814085112.089607918@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/hygon.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c index defdc594be14..a7b3ef4c4de9 100644 --- a/arch/x86/kernel/cpu/hygon.c +++ b/arch/x86/kernel/cpu/hygon.c @@ -87,8 +87,12 @@ static void hygon_get_topology(struct cpuinfo_x86 *c) if (!err) c->x86_coreid_bits = get_count_order(c->x86_max_cores); - /* Socket ID is ApicId[6] for these processors. */ - c->phys_proc_id = c->apicid >> APICID_SOCKET_ID_BIT; + /* + * Socket ID is ApicId[6] for the processors with model <= 0x3 + * when running on host. + */ + if (!boot_cpu_has(X86_FEATURE_HYPERVISOR) && c->x86_model <= 0x3) + c->phys_proc_id = c->apicid >> APICID_SOCKET_ID_BIT; cacheinfo_hygon_init_llc_id(c, cpu); } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { From e0c63803d729cd2d81394d6d29b8601f09c262a2 Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Tue, 17 Oct 2023 15:51:02 +0000 Subject: [PATCH 0255/1397] KVM: x86: hyper-v: Don't auto-enable stimer on write from user-space commit d6800af51c76b6dae20e6023bbdc9b3da3ab5121 upstream. Don't apply the stimer's counter side effects when modifying its value from user-space, as this may trigger spurious interrupts. For example: - The stimer is configured in auto-enable mode. - The stimer's count is set and the timer enabled. - The stimer expires, an interrupt is injected. - The VM is live migrated. - The stimer config and count are deserialized, auto-enable is ON, the stimer is re-enabled. - The stimer expires right away, and injects an unwarranted interrupt. Cc: stable@vger.kernel.org Fixes: 1f4b34f825e8 ("kvm/x86: Hyper-V SynIC timers") Signed-off-by: Nicolas Saenz Julienne Reviewed-by: Vitaly Kuznetsov Link: https://lore.kernel.org/r/20231017155101.40677-1-nsaenz@amazon.com Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/hyperv.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 7c2dac6824e2..238afd7335e4 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -727,10 +727,12 @@ static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count, stimer_cleanup(stimer); stimer->count = count; - if (stimer->count == 0) - stimer->config.enable = 0; - else if (stimer->config.auto_enable) - stimer->config.enable = 1; + if (!host) { + if (stimer->count == 0) + stimer->config.enable = 0; + else if (stimer->config.auto_enable) + stimer->config.enable = 1; + } if (stimer->config.enable) stimer_mark_pending(stimer, false); From d3719df92e836f56a94f5fea12ff768a54b580cc Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Thu, 19 Oct 2023 18:06:57 +0200 Subject: [PATCH 0256/1397] KVM: x86: Ignore MSR_AMD64_TW_CFG access commit 2770d4722036d6bd24bcb78e9cd7f6e572077d03 upstream. Hyper-V enabled Windows Server 2022 KVM VM cannot be started on Zen1 Ryzen since it crashes at boot with SYSTEM_THREAD_EXCEPTION_NOT_HANDLED + STATUS_PRIVILEGED_INSTRUCTION (in other words, because of an unexpected #GP in the guest kernel). This is because Windows tries to set bit 8 in MSR_AMD64_TW_CFG and can't handle receiving a #GP when doing so. Give this MSR the same treatment that commit 2e32b7190641 ("x86, kvm: Add MSR_AMD64_BU_CFG2 to the list of ignored MSRs") gave MSR_AMD64_BU_CFG2 under justification that this MSR is baremetal-relevant only. Although apparently it was then needed for Linux guests, not Windows as in this case. With this change, the aforementioned guest setup is able to finish booting successfully. This issue can be reproduced either on a Summit Ridge Ryzen (with just "-cpu host") or on a Naples EPYC (with "-cpu host,stepping=1" since EPYC is ordinarily stepping 2). Alternatively, userspace could solve the problem by using MSR filters, but forcing every userspace to define a filter isn't very friendly and doesn't add much, if any, value. The only potential hiccup is if one of these "baremetal-only" MSRs ever requires actual emulation and/or has F/M/S specific behavior. But if that happens, then KVM can still punt *that* handling to userspace since userspace MSR filters "win" over KVM's default handling. Signed-off-by: Maciej S. Szmigiero Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/1ce85d9c7c9e9632393816cf19c902e0a3f411f1.1697731406.git.maciej.szmigiero@oracle.com [sean: call out MSR filtering alternative] Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/msr-index.h | 1 + arch/x86/kvm/x86.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index b37abb55e948..389f9594746e 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -553,6 +553,7 @@ #define MSR_AMD64_CPUID_FN_1 0xc0011004 #define MSR_AMD64_LS_CFG 0xc0011020 #define MSR_AMD64_DC_CFG 0xc0011022 +#define MSR_AMD64_TW_CFG 0xc0011023 #define MSR_AMD64_DE_CFG 0xc0011029 #define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT 1 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 41cce5031126..e179db7c17da 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3641,6 +3641,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_AMD64_PATCH_LOADER: case MSR_AMD64_BU_CFG2: case MSR_AMD64_DC_CFG: + case MSR_AMD64_TW_CFG: case MSR_F15H_EX_CFG: break; @@ -4065,6 +4066,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_AMD64_BU_CFG2: case MSR_IA32_PERF_CTL: case MSR_AMD64_DC_CFG: + case MSR_AMD64_TW_CFG: case MSR_F15H_EX_CFG: /* * Intel Sandy Bridge CPUs must support the RAPL (running average power From fb05c7b93c67876fafff6bd6277f210d442171ee Mon Sep 17 00:00:00 2001 From: Tao Su Date: Thu, 14 Sep 2023 13:55:04 +0800 Subject: [PATCH 0257/1397] KVM: x86: Clear bit12 of ICR after APIC-write VM-exit commit 629d3698f6958ee6f8131ea324af794f973b12ac upstream. When IPI virtualization is enabled, a WARN is triggered if bit12 of ICR MSR is set after APIC-write VM-exit. The reason is kvm_apic_send_ipi() thinks the APIC_ICR_BUSY bit should be cleared because KVM has no delay, but kvm_apic_write_nodecode() doesn't clear the APIC_ICR_BUSY bit. Under the x2APIC section, regarding ICR, the SDM says: It remains readable only to aid in debugging; however, software should not assume the value returned by reading the ICR is the last written value. I.e. the guest is allowed to set bit 12. However, the SDM also gives KVM free reign to do whatever it wants with the bit, so long as KVM's behavior doesn't confuse userspace or break KVM's ABI. Clear bit 12 so that it reads back as '0'. This approach is safer than "do nothing" and is consistent with the case where IPI virtualization is disabled or not supported, i.e., handle_fastpath_set_x2apic_icr_irqoff() -> kvm_x2apic_icr_write() Opportunistically replace the TODO with a comment calling out that eating the write is likely faster than a conditional branch around the busy bit. Link: https://lore.kernel.org/all/ZPj6iF0Q7iynn62p@google.com/ Fixes: 5413bcba7ed5 ("KVM: x86: Add support for vICR APIC-write VM-Exits in x2APIC mode") Cc: stable@vger.kernel.org Signed-off-by: Tao Su Tested-by: Yi Lai Reviewed-by: Chao Gao Link: https://lore.kernel.org/r/20230914055504.151365-1-tao1.su@linux.intel.com [sean: tweak changelog, replace TODO with comment, drop local "val"] Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/lapic.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 3e977dbbf993..23e4db6cd99d 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2444,22 +2444,22 @@ EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) { struct kvm_lapic *apic = vcpu->arch.apic; - u64 val; /* - * ICR is a single 64-bit register when x2APIC is enabled. For legacy - * xAPIC, ICR writes need to go down the common (slightly slower) path - * to get the upper half from ICR2. + * ICR is a single 64-bit register when x2APIC is enabled, all others + * registers hold 32-bit values. For legacy xAPIC, ICR writes need to + * go down the common path to get the upper half from ICR2. + * + * Note, using the write helpers may incur an unnecessary write to the + * virtual APIC state, but KVM needs to conditionally modify the value + * in certain cases, e.g. to clear the ICR busy bit. The cost of extra + * conditional branches is likely a wash relative to the cost of the + * maybe-unecessary write, and both are in the noise anyways. */ - if (apic_x2apic_mode(apic) && offset == APIC_ICR) { - val = kvm_lapic_get_reg64(apic, APIC_ICR); - kvm_apic_send_ipi(apic, (u32)val, (u32)(val >> 32)); - trace_kvm_apic_write(APIC_ICR, val); - } else { - /* TODO: optimize to just emulate side effect w/o one more write */ - val = kvm_lapic_get_reg(apic, offset); - kvm_lapic_reg_write(apic, offset, (u32)val); - } + if (apic_x2apic_mode(apic) && offset == APIC_ICR) + kvm_x2apic_icr_write(apic, kvm_lapic_get_reg64(apic, APIC_ICR)); + else + kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset)); } EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode); From 7de33b0fc98dc90f17416d9353b1faf514e93d18 Mon Sep 17 00:00:00 2001 From: Haitao Shan Date: Tue, 12 Sep 2023 16:55:45 -0700 Subject: [PATCH 0258/1397] KVM: x86: Fix lapic timer interrupt lost after loading a snapshot. commit 9cfec6d097c607e36199cf0cfbb8cf5acbd8e9b2 upstream. When running android emulator (which is based on QEMU 2.12) on certain Intel hosts with kernel version 6.3-rc1 or above, guest will freeze after loading a snapshot. This is almost 100% reproducible. By default, the android emulator will use snapshot to speed up the next launching of the same android guest. So this breaks the android emulator badly. I tested QEMU 8.0.4 from Debian 12 with an Ubuntu 22.04 guest by running command "loadvm" after "savevm". The same issue is observed. At the same time, none of our AMD platforms is impacted. More experiments show that loading the KVM module with "enable_apicv=false" can workaround it. The issue started to show up after commit 8e6ed96cdd50 ("KVM: x86: fire timer when it is migrated and expired, and in oneshot mode"). However, as is pointed out by Sean Christopherson, it is introduced by commit 967235d32032 ("KVM: vmx: clear pending interrupts on KVM_SET_LAPIC"). commit 8e6ed96cdd50 ("KVM: x86: fire timer when it is migrated and expired, and in oneshot mode") just makes it easier to hit the issue. Having both commits, the oneshot lapic timer gets fired immediately inside the KVM_SET_LAPIC call when loading the snapshot. On Intel platforms with APIC virtualization and posted interrupt processing, this eventually leads to setting the corresponding PIR bit. However, the whole PIR bits get cleared later in the same KVM_SET_LAPIC call by apicv_post_state_restore. This leads to timer interrupt lost. The fix is to move vmx_apicv_post_state_restore to the beginning of the KVM_SET_LAPIC call and rename to vmx_apicv_pre_state_restore. What vmx_apicv_post_state_restore does is actually clearing any former apicv state and this behavior is more suitable to carry out in the beginning. Fixes: 967235d32032 ("KVM: vmx: clear pending interrupts on KVM_SET_LAPIC") Cc: stable@vger.kernel.org Suggested-by: Sean Christopherson Signed-off-by: Haitao Shan Link: https://lore.kernel.org/r/20230913000215.478387-1-hshan@google.com Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/kvm-x86-ops.h | 1 + arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/lapic.c | 4 ++++ arch/x86/kvm/vmx/vmx.c | 4 ++-- 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index e3054e3e46d5..9b419f0de713 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -108,6 +108,7 @@ KVM_X86_OP_OPTIONAL(vcpu_blocking) KVM_X86_OP_OPTIONAL(vcpu_unblocking) KVM_X86_OP_OPTIONAL(pi_update_irte) KVM_X86_OP_OPTIONAL(pi_start_assignment) +KVM_X86_OP_OPTIONAL(apicv_pre_state_restore) KVM_X86_OP_OPTIONAL(apicv_post_state_restore) KVM_X86_OP_OPTIONAL_RET0(dy_apicv_has_pending_interrupt) KVM_X86_OP_OPTIONAL(set_hv_timer) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 70d139406bc8..fb9f5fa96cc9 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1708,6 +1708,7 @@ struct kvm_x86_ops { int (*pi_update_irte)(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, bool set); void (*pi_start_assignment)(struct kvm *kvm); + void (*apicv_pre_state_restore)(struct kvm_vcpu *vcpu); void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu); bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 23e4db6cd99d..245b20973cae 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2670,6 +2670,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) u64 msr_val; int i; + static_call_cond(kvm_x86_apicv_pre_state_restore)(vcpu); + if (!init_event) { msr_val = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE; if (kvm_vcpu_is_reset_bsp(vcpu)) @@ -2981,6 +2983,8 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) struct kvm_lapic *apic = vcpu->arch.apic; int r; + static_call_cond(kvm_x86_apicv_pre_state_restore)(vcpu); + kvm_lapic_set_base(vcpu, vcpu->arch.apic_base); /* set SPIV separately to get count of SW disabled APICs right */ apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV))); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 72e3943f3693..9bba5352582c 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6912,7 +6912,7 @@ static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]); } -static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu) +static void vmx_apicv_pre_state_restore(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -8286,7 +8286,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .set_apic_access_page_addr = vmx_set_apic_access_page_addr, .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl, .load_eoi_exitmap = vmx_load_eoi_exitmap, - .apicv_post_state_restore = vmx_apicv_post_state_restore, + .apicv_pre_state_restore = vmx_apicv_pre_state_restore, .required_apicv_inhibits = VMX_REQUIRED_APICV_INHIBITS, .hwapic_irr_update = vmx_hwapic_irr_update, .hwapic_isr_update = vmx_hwapic_isr_update, From fcf890eca49364b24f6903f47b91abc0e3e6fc6a Mon Sep 17 00:00:00 2001 From: Victor Shih Date: Tue, 7 Nov 2023 17:57:41 +0800 Subject: [PATCH 0259/1397] mmc: sdhci-pci-gli: GL9755: Mask the replay timer timeout of AER commit 85dd3af64965c1c0eb7373b340a1b1f7773586b0 upstream. Due to a flaw in the hardware design, the GL9755 replay timer frequently times out when ASPM is enabled. As a result, the warning messages will often appear in the system log when the system accesses the GL9755 PCI config. Therefore, the replay timer timeout must be masked. Fixes: 36ed2fd32b2c ("mmc: sdhci-pci-gli: A workaround to allow GL9755 to enter ASPM L1.2") Signed-off-by: Victor Shih Acked-by: Adrian Hunter Acked-by: Kai-Heng Feng Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231107095741.8832-3-victorshihgli@gmail.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-pci-gli.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/mmc/host/sdhci-pci-gli.c b/drivers/mmc/host/sdhci-pci-gli.c index 109d4b010f97..6f4dea4efa39 100644 --- a/drivers/mmc/host/sdhci-pci-gli.c +++ b/drivers/mmc/host/sdhci-pci-gli.c @@ -149,6 +149,9 @@ #define PCI_GLI_9755_PM_CTRL 0xFC #define PCI_GLI_9755_PM_STATE GENMASK(1, 0) +#define PCI_GLI_9755_CORRERR_MASK 0x214 +#define PCI_GLI_9755_CORRERR_MASK_REPLAY_TIMER_TIMEOUT BIT(12) + #define SDHCI_GLI_9767_GM_BURST_SIZE 0x510 #define SDHCI_GLI_9767_GM_BURST_SIZE_AXI_ALWAYS_SET BIT(8) @@ -756,6 +759,11 @@ static void gl9755_hw_setting(struct sdhci_pci_slot *slot) value &= ~PCI_GLI_9755_PM_STATE; pci_write_config_dword(pdev, PCI_GLI_9755_PM_CTRL, value); + /* mask the replay timer timeout of AER */ + pci_read_config_dword(pdev, PCI_GLI_9755_CORRERR_MASK, &value); + value |= PCI_GLI_9755_CORRERR_MASK_REPLAY_TIMER_TIMEOUT; + pci_write_config_dword(pdev, PCI_GLI_9755_CORRERR_MASK, value); + gl9755_wt_off(pdev); } From 04d2fea2b7fa7bf21a1bd7520a067e75e469c999 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 26 Oct 2023 12:41:14 -0400 Subject: [PATCH 0260/1397] sched: psi: fix unprivileged polling against cgroups commit 8b39d20eceeda6c4eb23df1497f9ed2fffdc8f69 upstream. 519fabc7aaba ("psi: remove 500ms min window size limitation for triggers") breaks unprivileged psi polling on cgroups. Historically, we had a privilege check for polling in the open() of a pressure file in /proc, but were erroneously missing it for the open() of cgroup pressure files. When unprivileged polling was introduced in d82caa273565 ("sched/psi: Allow unprivileged polling of N*2s period"), it needed to filter privileges depending on the exact polling parameters, and as such moved the CAP_SYS_RESOURCE check from the proc open() callback to psi_trigger_create(). Both the proc files as well as cgroup files go through this during write(). This implicitly added the missing check for privileges required for HT polling for cgroups. When 519fabc7aaba ("psi: remove 500ms min window size limitation for triggers") followed right after to remove further restrictions on the RT polling window, it incorrectly assumed the cgroup privilege check was still missing and added it to the cgroup open(), mirroring what we used to do for proc files in the past. As a result, unprivileged poll requests that would be supported now get rejected when opening the cgroup pressure file for writing. Remove the cgroup open() check. psi_trigger_create() handles it. Fixes: 519fabc7aaba ("psi: remove 500ms min window size limitation for triggers") Reported-by: Luca Boccassi Signed-off-by: Johannes Weiner Signed-off-by: Peter Zijlstra (Intel) Acked-by: Luca Boccassi Acked-by: Suren Baghdasaryan Cc: stable@vger.kernel.org # 6.5+ Link: https://lore.kernel.org/r/20231026164114.2488682-1-hannes@cmpxchg.org Signed-off-by: Greg Kroah-Hartman --- kernel/cgroup/cgroup.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 1fb7f562289d..518725b57200 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -3867,14 +3867,6 @@ static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of, return psi_trigger_poll(&ctx->psi.trigger, of->file, pt); } -static int cgroup_pressure_open(struct kernfs_open_file *of) -{ - if (of->file->f_mode & FMODE_WRITE && !capable(CAP_SYS_RESOURCE)) - return -EPERM; - - return 0; -} - static void cgroup_pressure_release(struct kernfs_open_file *of) { struct cgroup_file_ctx *ctx = of->priv; @@ -5275,7 +5267,6 @@ static struct cftype cgroup_psi_files[] = { { .name = "io.pressure", .file_offset = offsetof(struct cgroup, psi_files[PSI_IO]), - .open = cgroup_pressure_open, .seq_show = cgroup_io_pressure_show, .write = cgroup_io_pressure_write, .poll = cgroup_pressure_poll, @@ -5284,7 +5275,6 @@ static struct cftype cgroup_psi_files[] = { { .name = "memory.pressure", .file_offset = offsetof(struct cgroup, psi_files[PSI_MEM]), - .open = cgroup_pressure_open, .seq_show = cgroup_memory_pressure_show, .write = cgroup_memory_pressure_write, .poll = cgroup_pressure_poll, @@ -5293,7 +5283,6 @@ static struct cftype cgroup_psi_files[] = { { .name = "cpu.pressure", .file_offset = offsetof(struct cgroup, psi_files[PSI_CPU]), - .open = cgroup_pressure_open, .seq_show = cgroup_cpu_pressure_show, .write = cgroup_cpu_pressure_write, .poll = cgroup_pressure_poll, @@ -5303,7 +5292,6 @@ static struct cftype cgroup_psi_files[] = { { .name = "irq.pressure", .file_offset = offsetof(struct cgroup, psi_files[PSI_IRQ]), - .open = cgroup_pressure_open, .seq_show = cgroup_irq_pressure_show, .write = cgroup_irq_pressure_write, .poll = cgroup_pressure_poll, From a2a2a2a51b6788bc044458bc3c331a6c2795b94d Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Mon, 9 Oct 2023 13:18:49 -0400 Subject: [PATCH 0261/1397] audit: don't take task_lock() in audit_exe_compare() code path commit 47846d51348dd62e5231a83be040981b17c955fa upstream. The get_task_exe_file() function locks the given task with task_lock() which when used inside audit_exe_compare() can cause deadlocks on systems that generate audit records when the task_lock() is held. We resolve this problem with two changes: ignoring those cases where the task being audited is not the current task, and changing our approach to obtaining the executable file struct to not require task_lock(). With the intent of the audit exe filter being to filter on audit events generated by processes started by the specified executable, it makes sense that we would only want to use the exe filter on audit records associated with the currently executing process, e.g. @current. If we are asked to filter records using a non-@current task_struct we can safely ignore the exe filter without negatively impacting the admin's expectations for the exe filter. Knowing that we only have to worry about filtering the currently executing task in audit_exe_compare() we can do away with the task_lock() and call get_mm_exe_file() with @current->mm directly. Cc: Fixes: 5efc244346f9 ("audit: fix exe_file access in audit_exe_compare") Reported-by: Andreas Steinmetz Reviewed-by: John Johansen Reviewed-by: Mateusz Guzik Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- kernel/audit_watch.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 65075f1e4ac8..91e82e34b51e 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -527,11 +527,18 @@ int audit_exe_compare(struct task_struct *tsk, struct audit_fsnotify_mark *mark) unsigned long ino; dev_t dev; - exe_file = get_task_exe_file(tsk); + /* only do exe filtering if we are recording @current events/records */ + if (tsk != current) + return 0; + + if (WARN_ON_ONCE(!current->mm)) + return 0; + exe_file = get_mm_exe_file(current->mm); if (!exe_file) return 0; ino = file_inode(exe_file)->i_ino; dev = file_inode(exe_file)->i_sb->s_dev; fput(exe_file); + return audit_mark_compare(mark, ino, dev); } From d567eb7366073d33c4b4254f0e2b28a7f13f1d1f Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 14 Nov 2023 17:25:48 -0500 Subject: [PATCH 0262/1397] audit: don't WARN_ON_ONCE(!current->mm) in audit_exe_compare() commit 969d90ec212bae4b45bf9d21d7daa30aa6cf055e upstream. eBPF can end up calling into the audit code from some odd places, and some of these places don't have @current set properly so we end up tripping the `WARN_ON_ONCE(!current->mm)` near the top of `audit_exe_compare()`. While the basic `!current->mm` check is good, the `WARN_ON_ONCE()` results in some scary console messages so let's drop that and just do the regular `!current->mm` check to avoid problems. Cc: Fixes: 47846d51348d ("audit: don't take task_lock() in audit_exe_compare() code path") Reported-by: Artem Savkov Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- kernel/audit_watch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 91e82e34b51e..7a98cd176a12 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -531,7 +531,7 @@ int audit_exe_compare(struct task_struct *tsk, struct audit_fsnotify_mark *mark) if (tsk != current) return 0; - if (WARN_ON_ONCE(!current->mm)) + if (!current->mm) return 0; exe_file = get_mm_exe_file(current->mm); if (!exe_file) From 2b1ee516b16a4174a906010d2d42bae5ce78ba04 Mon Sep 17 00:00:00 2001 From: Krister Johansen Date: Fri, 27 Oct 2023 14:46:40 -0700 Subject: [PATCH 0263/1397] proc: sysctl: prevent aliased sysctls from getting passed to init commit 8001f49394e353f035306a45bcf504f06fca6355 upstream. The code that checks for unknown boot options is unaware of the sysctl alias facility, which maps bootparams to sysctl values. If a user sets an old value that has a valid alias, a message about an invalid parameter will be printed during boot, and the parameter will get passed to init. Fix by checking for the existence of aliased parameters in the unknown boot parameter code. If an alias exists, don't return an error or pass the value to init. Signed-off-by: Krister Johansen Cc: stable@vger.kernel.org Fixes: 0a477e1ae21b ("kernel/sysctl: support handling command line aliases") Signed-off-by: Luis Chamberlain Signed-off-by: Greg Kroah-Hartman --- fs/proc/proc_sysctl.c | 7 +++++++ include/linux/sysctl.h | 6 ++++++ init/main.c | 4 ++++ 3 files changed, 17 insertions(+) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index c88854df0b62..1c9635dddb70 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1592,6 +1592,13 @@ static const char *sysctl_find_alias(char *param) return NULL; } +bool sysctl_is_alias(char *param) +{ + const char *alias = sysctl_find_alias(param); + + return alias != NULL; +} + /* Set sysctl value passed on kernel command line. */ static int process_sysctl_arg(char *param, char *val, const char *unused, void *arg) diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 09d7429d67c0..61b40ea81f4d 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -242,6 +242,7 @@ extern void __register_sysctl_init(const char *path, struct ctl_table *table, extern struct ctl_table_header *register_sysctl_mount_point(const char *path); void do_sysctl_args(void); +bool sysctl_is_alias(char *param); int do_proc_douintvec(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos, int (*conv)(unsigned long *lvalp, @@ -287,6 +288,11 @@ static inline void setup_sysctl_set(struct ctl_table_set *p, static inline void do_sysctl_args(void) { } + +static inline bool sysctl_is_alias(char *param) +{ + return false; +} #endif /* CONFIG_SYSCTL */ int sysctl_max_threads(struct ctl_table *table, int write, void *buffer, diff --git a/init/main.c b/init/main.c index 436d73261810..e24b0780fdff 100644 --- a/init/main.c +++ b/init/main.c @@ -530,6 +530,10 @@ static int __init unknown_bootoption(char *param, char *val, { size_t len = strlen(param); + /* Handle params aliased to sysctls */ + if (sysctl_is_alias(param)) + return 0; + repair_env_string(param, val); /* Handle obsolete-style parameters */ From aa267cfcfcdb0c2bbf541fcada4e0d7378ff1ec2 Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Mon, 9 Oct 2023 21:20:20 +0500 Subject: [PATCH 0264/1397] tty/sysrq: replace smp_processor_id() with get_cpu() commit dd976a97d15b47656991e185a94ef42a0fa5cfd4 upstream. The smp_processor_id() shouldn't be called from preemptible code. Instead use get_cpu() and put_cpu() which disables preemption in addition to getting the processor id. Enable preemption back after calling schedule_work() to make sure that the work gets scheduled on all cores other than the current core. We want to avoid a scenario where current core's stack trace is printed multiple times and one core's stack trace isn't printed because of scheduling of current task. This fixes the following bug: [ 119.143590] sysrq: Show backtrace of all active CPUs [ 119.143902] BUG: using smp_processor_id() in preemptible [00000000] code: bash/873 [ 119.144586] caller is debug_smp_processor_id+0x20/0x30 [ 119.144827] CPU: 6 PID: 873 Comm: bash Not tainted 5.10.124-dirty #3 [ 119.144861] Hardware name: QEMU QEMU Virtual Machine, BIOS 2023.05-1 07/22/2023 [ 119.145053] Call trace: [ 119.145093] dump_backtrace+0x0/0x1a0 [ 119.145122] show_stack+0x18/0x70 [ 119.145141] dump_stack+0xc4/0x11c [ 119.145159] check_preemption_disabled+0x100/0x110 [ 119.145175] debug_smp_processor_id+0x20/0x30 [ 119.145195] sysrq_handle_showallcpus+0x20/0xc0 [ 119.145211] __handle_sysrq+0x8c/0x1a0 [ 119.145227] write_sysrq_trigger+0x94/0x12c [ 119.145247] proc_reg_write+0xa8/0xe4 [ 119.145266] vfs_write+0xec/0x280 [ 119.145282] ksys_write+0x6c/0x100 [ 119.145298] __arm64_sys_write+0x20/0x30 [ 119.145315] el0_svc_common.constprop.0+0x78/0x1e4 [ 119.145332] do_el0_svc+0x24/0x8c [ 119.145348] el0_svc+0x10/0x20 [ 119.145364] el0_sync_handler+0x134/0x140 [ 119.145381] el0_sync+0x180/0x1c0 Cc: jirislaby@kernel.org Cc: stable@vger.kernel.org Fixes: 47cab6a722d4 ("debug lockups: Improve lockup detection, fix generic arch fallback") Signed-off-by: Muhammad Usama Anjum Link: https://lore.kernel.org/r/20231009162021.3607632-1-usama.anjum@collabora.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/tty/sysrq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index 23198e3f1461..6b4a28bcf2f5 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -262,13 +262,14 @@ static void sysrq_handle_showallcpus(u8 key) if (in_hardirq()) regs = get_irq_regs(); - pr_info("CPU%d:\n", smp_processor_id()); + pr_info("CPU%d:\n", get_cpu()); if (regs) show_regs(regs); else show_stack(NULL, NULL, KERN_INFO); schedule_work(&sysrq_showallcpus); + put_cpu(); } } From 065ae2ec2b8af7c959fad1533620b83835d71c20 Mon Sep 17 00:00:00 2001 From: Pavel Krasavin Date: Sat, 14 Oct 2023 11:39:26 +0000 Subject: [PATCH 0265/1397] tty: serial: meson: fix hard LOCKUP on crtscts mode commit 2a1d728f20edeee7f26dc307ed9df4e0d23947ab upstream. There might be hard lockup if we set crtscts mode on port without RTS/CTS configured: # stty -F /dev/ttyAML6 crtscts; echo 1 > /dev/ttyAML6; echo 2 > /dev/ttyAML6 [ 95.890386] rcu: INFO: rcu_preempt detected stalls on CPUs/tasks: [ 95.890857] rcu: 3-...0: (201 ticks this GP) idle=e33c/1/0x4000000000000000 softirq=5844/5846 fqs=4984 [ 95.900212] rcu: (detected by 2, t=21016 jiffies, g=7753, q=296 ncpus=4) [ 95.906972] Task dump for CPU 3: [ 95.910178] task:bash state:R running task stack:0 pid:205 ppid:1 flags:0x00000202 [ 95.920059] Call trace: [ 95.922485] __switch_to+0xe4/0x168 [ 95.925951] 0xffffff8003477508 [ 95.974379] watchdog: Watchdog detected hard LOCKUP on cpu 3 [ 95.974424] Modules linked in: 88x2cs(O) rtc_meson_vrtc Possible solution would be to not allow to setup crtscts on such port. Tested on S905X3 based board. Fixes: ff7693d079e5 ("ARM: meson: serial: add MesonX SoC on-chip uart driver") Cc: stable@vger.kernel.org Signed-off-by: Pavel Krasavin Reviewed-by: Neil Armstrong Reviewed-by: Dmitry Rokosov Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/meson_uart.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/tty/serial/meson_uart.c b/drivers/tty/serial/meson_uart.c index 790d910dafa5..9388b9ddea3b 100644 --- a/drivers/tty/serial/meson_uart.c +++ b/drivers/tty/serial/meson_uart.c @@ -380,10 +380,14 @@ static void meson_uart_set_termios(struct uart_port *port, else val |= AML_UART_STOP_BIT_1SB; - if (cflags & CRTSCTS) - val &= ~AML_UART_TWO_WIRE_EN; - else + if (cflags & CRTSCTS) { + if (port->flags & UPF_HARD_FLOW) + val &= ~AML_UART_TWO_WIRE_EN; + else + termios->c_cflag &= ~CRTSCTS; + } else { val |= AML_UART_TWO_WIRE_EN; + } writel(val, port->membase + AML_UART_CONTROL); @@ -705,6 +709,7 @@ static int meson_uart_probe(struct platform_device *pdev) u32 fifosize = 64; /* Default is 64, 128 for EE UART_0 */ int ret = 0; int irq; + bool has_rtscts; if (pdev->dev.of_node) pdev->id = of_alias_get_id(pdev->dev.of_node, "serial"); @@ -732,6 +737,7 @@ static int meson_uart_probe(struct platform_device *pdev) return irq; of_property_read_u32(pdev->dev.of_node, "fifo-size", &fifosize); + has_rtscts = of_property_read_bool(pdev->dev.of_node, "uart-has-rtscts"); if (meson_ports[pdev->id]) { return dev_err_probe(&pdev->dev, -EBUSY, @@ -762,6 +768,8 @@ static int meson_uart_probe(struct platform_device *pdev) port->mapsize = resource_size(res_mem); port->irq = irq; port->flags = UPF_BOOT_AUTOCONF | UPF_LOW_LATENCY; + if (has_rtscts) + port->flags |= UPF_HARD_FLOW; port->has_sysrq = IS_ENABLED(CONFIG_SERIAL_MESON_CONSOLE); port->dev = &pdev->dev; port->line = pdev->id; From 32ca78deedce610f28a85174a86429d66e1e0e02 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Wed, 8 Nov 2023 16:25:15 -0500 Subject: [PATCH 0266/1397] acpi/processor: sanitize _OSC/_PDC capabilities for Xen dom0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit bfa993b355d33a438a746523e7129391c8664e8a upstream. The Processor capability bits notify ACPI of the OS capabilities, and so ACPI can adjust the return of other Processor methods taking the OS capabilities into account. When Linux is running as a Xen dom0, the hypervisor is the entity in charge of processor power management, and hence Xen needs to make sure the capabilities reported by _OSC/_PDC match the capabilities of the driver in Xen. Introduce a small helper to sanitize the buffer when running as Xen dom0. When Xen supports HWP, this serves as the equivalent of commit a21211672c9a ("ACPI / processor: Request native thermal interrupt handling via _OSC") to avoid SMM crashes. Xen will set bit ACPI_PROC_CAP_COLLAB_PROC_PERF (bit 12) in the capability bits and the _OSC/_PDC call will apply it. [ jandryuk: Mention Xen HWP's need. Support _OSC & _PDC ] Signed-off-by: Roger Pau Monné Cc: stable@vger.kernel.org Signed-off-by: Jason Andryuk Reviewed-by: Michal Wilczynski Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20231108212517.72279-1-jandryuk@gmail.com Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/acpi.h | 14 ++++++++++++++ arch/x86/include/asm/xen/hypervisor.h | 9 +++++++++ drivers/xen/pcpu.c | 22 ++++++++++++++++++++++ 3 files changed, 45 insertions(+) diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index c8a7fc23f63c..f896eed4516c 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -16,6 +16,9 @@ #include #include #include +#include + +#include #ifdef CONFIG_ACPI_APEI # include @@ -127,6 +130,17 @@ static inline void arch_acpi_set_proc_cap_bits(u32 *cap) if (!cpu_has(c, X86_FEATURE_MWAIT) || boot_option_idle_override == IDLE_NOMWAIT) *cap &= ~(ACPI_PROC_CAP_C_C1_FFH | ACPI_PROC_CAP_C_C2C3_FFH); + + if (xen_initial_domain()) { + /* + * When Linux is running as Xen dom0, the hypervisor is the + * entity in charge of the processor power management, and so + * Xen needs to check the OS capabilities reported in the + * processor capabilities buffer matches what the hypervisor + * driver supports. + */ + xen_sanitize_proc_cap_bits(cap); + } } static inline bool acpi_has_cpu_in_madt(void) diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index 7048dfacc04b..a9088250770f 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -100,4 +100,13 @@ static inline void leave_lazy(enum xen_lazy_mode mode) enum xen_lazy_mode xen_get_lazy_mode(void); +#if defined(CONFIG_XEN_DOM0) && defined(CONFIG_ACPI) +void xen_sanitize_proc_cap_bits(uint32_t *buf); +#else +static inline void xen_sanitize_proc_cap_bits(uint32_t *buf) +{ + BUG(); +} +#endif + #endif /* _ASM_X86_XEN_HYPERVISOR_H */ diff --git a/drivers/xen/pcpu.c b/drivers/xen/pcpu.c index b3e3d1bb37f3..508655273145 100644 --- a/drivers/xen/pcpu.c +++ b/drivers/xen/pcpu.c @@ -47,6 +47,9 @@ #include #include +#ifdef CONFIG_ACPI +#include +#endif /* * @cpu_id: Xen physical cpu logic number @@ -400,4 +403,23 @@ bool __init xen_processor_present(uint32_t acpi_id) return online; } + +void xen_sanitize_proc_cap_bits(uint32_t *cap) +{ + struct xen_platform_op op = { + .cmd = XENPF_set_processor_pminfo, + .u.set_pminfo.id = -1, + .u.set_pminfo.type = XEN_PM_PDC, + }; + u32 buf[3] = { ACPI_PDC_REVISION_ID, 1, *cap }; + int ret; + + set_xen_guest_handle(op.u.set_pminfo.pdc, buf); + ret = HYPERVISOR_platform_op(&op); + if (ret) + pr_err("sanitize of _PDC buffer bits from Xen failed: %d\n", + ret); + else + *cap = buf[2]; +} #endif From 11e5dac750de7fbaccd2023f144627c59897aead Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 20 Oct 2023 17:15:29 +0100 Subject: [PATCH 0267/1397] hvc/xen: fix console unplug commit a30badfd7c13fc8763a9e10c5a12ba7f81515a55 upstream. On unplug of a Xen console, xencons_disconnect_backend() unconditionally calls free_irq() via unbind_from_irqhandler(), causing a warning of freeing an already-free IRQ: (qemu) device_del con1 [ 32.050919] ------------[ cut here ]------------ [ 32.050942] Trying to free already-free IRQ 33 [ 32.050990] WARNING: CPU: 0 PID: 51 at kernel/irq/manage.c:1895 __free_irq+0x1d4/0x330 It should be using evtchn_put() to tear down the event channel binding, and let the Linux IRQ side of it be handled by notifier_del_irq() through the HVC code. On which topic... xencons_disconnect_backend() should call hvc_remove() *first*, rather than tearing down the event channel and grant mapping while they are in use. And then the IRQ is guaranteed to be freed by the time it's torn down by evtchn_put(). Since evtchn_put() also closes the actual event channel, avoid calling xenbus_free_evtchn() except in the failure path where the IRQ was not successfully set up. However, calling hvc_remove() at the start of xencons_disconnect_backend() still isn't early enough. An unplug request is indicated by the backend setting its state to XenbusStateClosing, which triggers a notification to xencons_backend_changed(), which... does nothing except set its own frontend state directly to XenbusStateClosed without *actually* tearing down the HVC device or, you know, making sure it isn't actively in use. So the backend sees the guest frontend set its state to XenbusStateClosed and stops servicing the interrupt... and the guest spins for ever in the domU_write_console() function waiting for the ring to drain. Fix that one by calling hvc_remove() from xencons_backend_changed() before signalling to the backend that it's OK to proceed with the removal. Tested with 'dd if=/dev/zero of=/dev/hvc1' while telling Qemu to remove the console device. Signed-off-by: David Woodhouse Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231020161529.355083-4-dwmw2@infradead.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/tty/hvc/hvc_xen.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index 98764e740c07..c3cee11ed7d9 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -377,18 +377,21 @@ void xen_console_resume(void) #ifdef CONFIG_HVC_XEN_FRONTEND static void xencons_disconnect_backend(struct xencons_info *info) { - if (info->irq > 0) - unbind_from_irqhandler(info->irq, NULL); - info->irq = 0; + if (info->hvc != NULL) + hvc_remove(info->hvc); + info->hvc = NULL; + if (info->irq > 0) { + evtchn_put(info->evtchn); + info->irq = 0; + info->evtchn = 0; + } + /* evtchn_put() will also close it so this is only an error path */ if (info->evtchn > 0) xenbus_free_evtchn(info->xbdev, info->evtchn); info->evtchn = 0; if (info->gntref > 0) gnttab_free_grant_references(info->gntref); info->gntref = 0; - if (info->hvc != NULL) - hvc_remove(info->hvc); - info->hvc = NULL; } static void xencons_free(struct xencons_info *info) @@ -553,10 +556,23 @@ static void xencons_backend_changed(struct xenbus_device *dev, if (dev->state == XenbusStateClosed) break; fallthrough; /* Missed the backend's CLOSING state */ - case XenbusStateClosing: + case XenbusStateClosing: { + struct xencons_info *info = dev_get_drvdata(&dev->dev);; + + /* + * Don't tear down the evtchn and grant ref before the other + * end has disconnected, but do stop userspace from trying + * to use the device before we allow the backend to close. + */ + if (info->hvc) { + hvc_remove(info->hvc); + info->hvc = NULL; + } + xenbus_frontend_closed(dev); break; } + } } static const struct xenbus_device_id xencons_ids[] = { @@ -616,7 +632,7 @@ static int __init xen_hvc_init(void) list_del(&info->list); spin_unlock_irqrestore(&xencons_lock, flags); if (info->irq) - unbind_from_irqhandler(info->irq, NULL); + evtchn_put(info->evtchn); kfree(info); return r; } From 202f4d78d16b5fa920f0d28c22ed2c07f189879a Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 20 Oct 2023 17:15:28 +0100 Subject: [PATCH 0268/1397] hvc/xen: fix error path in xen_hvc_init() to always register frontend driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2704c9a5593f4a47620c12dad78838ca62b52f48 upstream. The xen_hvc_init() function should always register the frontend driver, even when there's no primary console — as there may be secondary consoles. (Qemu can always add secondary consoles, but only the toolstack can add the primary because it's special.) Signed-off-by: David Woodhouse Reviewed-by: Juergen Gross Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231020161529.355083-3-dwmw2@infradead.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/hvc/hvc_xen.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index c3cee11ed7d9..81627442027a 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -604,7 +604,7 @@ static int __init xen_hvc_init(void) ops = &dom0_hvc_ops; r = xen_initial_domain_console_init(); if (r < 0) - return r; + goto register_fe; info = vtermno_to_xencons(HVC_COOKIE); } else { ops = &domU_hvc_ops; @@ -613,7 +613,7 @@ static int __init xen_hvc_init(void) else r = xen_pv_console_init(); if (r < 0) - return r; + goto register_fe; info = vtermno_to_xencons(HVC_COOKIE); info->irq = bind_evtchn_to_irq_lateeoi(info->evtchn); @@ -638,6 +638,7 @@ static int __init xen_hvc_init(void) } r = 0; + register_fe: #ifdef CONFIG_HVC_XEN_FRONTEND r = xenbus_register_frontend(&xencons_driver); #endif From 192e6eba4e72655571907dddcbf2ad61a373cea7 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 20 Oct 2023 17:15:27 +0100 Subject: [PATCH 0269/1397] hvc/xen: fix event channel handling for secondary consoles MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ef5dd8ec88ac11e8e353164407d55b73c988b369 upstream. The xencons_connect_backend() function allocates a local interdomain event channel with xenbus_alloc_evtchn(), then calls bind_interdomain_evtchn_to_irq_lateeoi() to bind to that port# on the *remote* domain. That doesn't work very well: (qemu) device_add xen-console,id=con1,chardev=pty0 [ 44.323872] xenconsole console-1: 2 xenbus_dev_probe on device/console/1 [ 44.323995] xenconsole: probe of console-1 failed with error -2 Fix it to use bind_evtchn_to_irq_lateeoi(), which does the right thing by just binding that *local* event channel to an irq. The backend will do the interdomain binding. This didn't affect the primary console because the setup for that is special — the toolstack allocates the guest event channel and the guest discovers it with HVMOP_get_param. Fixes: fe415186b43d ("xen/console: harden hvc_xen against event channel storms") Signed-off-by: David Woodhouse Reviewed-by: Juergen Gross Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231020161529.355083-2-dwmw2@infradead.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/hvc/hvc_xen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index 81627442027a..34c01874f45b 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -436,7 +436,7 @@ static int xencons_connect_backend(struct xenbus_device *dev, if (ret) return ret; info->evtchn = evtchn; - irq = bind_interdomain_evtchn_to_irq_lateeoi(dev, evtchn); + irq = bind_evtchn_to_irq_lateeoi(evtchn); if (irq < 0) return irq; info->irq = irq; From 0763bcef474d2a520def66900455e950127b4eff Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Mon, 18 Sep 2023 14:48:01 +0200 Subject: [PATCH 0270/1397] PCI/sysfs: Protect driver's D3cold preference from user space commit 70b70a4307cccebe91388337b1c85735ce4de6ff upstream. struct pci_dev contains two flags which govern whether the device may suspend to D3cold: * no_d3cold provides an opt-out for drivers (e.g. if a device is known to not wake from D3cold) * d3cold_allowed provides an opt-out for user space (default is true, user space may set to false) Since commit 9d26d3a8f1b0 ("PCI: Put PCIe ports into D3 during suspend"), the user space setting overwrites the driver setting. Essentially user space is trusted to know better than the driver whether D3cold is working. That feels unsafe and wrong. Assume that the change was introduced inadvertently and do not overwrite no_d3cold when d3cold_allowed is modified. Instead, consider d3cold_allowed in addition to no_d3cold when choosing a suspend state for the device. That way, user space may opt out of D3cold if the driver hasn't, but it may no longer force an opt in if the driver has opted out. Fixes: 9d26d3a8f1b0 ("PCI: Put PCIe ports into D3 during suspend") Link: https://lore.kernel.org/r/b8a7f4af2b73f6b506ad8ddee59d747cbf834606.1695025365.git.lukas@wunner.de Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Reviewed-by: Mika Westerberg Reviewed-by: Mario Limonciello Cc: stable@vger.kernel.org # v4.8+ Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci-acpi.c | 2 +- drivers/pci/pci-sysfs.c | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index a05350a4e49c..05b7357bd258 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -911,7 +911,7 @@ pci_power_t acpi_pci_choose_state(struct pci_dev *pdev) { int acpi_state, d_max; - if (pdev->no_d3cold) + if (pdev->no_d3cold || !pdev->d3cold_allowed) d_max = ACPI_STATE_D3_HOT; else d_max = ACPI_STATE_D3_COLD; diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 5a6241044c3c..3317b9354716 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -529,10 +529,7 @@ static ssize_t d3cold_allowed_store(struct device *dev, return -EINVAL; pdev->d3cold_allowed = !!val; - if (pdev->d3cold_allowed) - pci_d3cold_enable(pdev); - else - pci_d3cold_disable(pdev); + pci_bridge_d3_update(pdev); pm_runtime_resume(dev); From 1f8d1e4796a9e9e605495c0725a1a748af578b40 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 22 Oct 2023 21:07:33 +0000 Subject: [PATCH 0271/1397] mm/damon/sysfs: remove requested targets when online-commit inputs commit 19467a950b49432a84bf6dbadbbb17bdf89418b7 upstream. damon_sysfs_set_targets(), which updates the targets of the context for online commitment, do not remove targets that removed from the corresponding sysfs files. As a result, more than intended targets of the context can exist and hence consume memory and monitoring CPU resource more than expected. Fix it by removing all targets of the context and fill up again using the user input. This could cause unnecessary memory dealloc and realloc operations, but this is not a hot code path. Also, note that damon_target is stateless, and hence no data is lost. [sj@kernel.org: fix unnecessary monitoring results removal] Link: https://lkml.kernel.org/r/20231028213353.45397-1-sj@kernel.org Link: https://lkml.kernel.org/r/20231022210735.46409-2-sj@kernel.org Fixes: da87878010e5 ("mm/damon/sysfs: support online inputs update") Signed-off-by: SeongJae Park Cc: Brendan Higgins Cc: [5.19.x] Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/damon/sysfs.c | 70 +++++++++++++++++++++++++----------------------- 1 file changed, 36 insertions(+), 34 deletions(-) diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index f60e56150feb..02973273a20d 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -1150,58 +1150,60 @@ static int damon_sysfs_add_target(struct damon_sysfs_target *sys_target, return err; } -/* - * Search a target in a context that corresponds to the sysfs target input. - * - * Return: pointer to the target if found, NULL if not found, or negative - * error code if the search failed. - */ -static struct damon_target *damon_sysfs_existing_target( - struct damon_sysfs_target *sys_target, struct damon_ctx *ctx) +static int damon_sysfs_update_target(struct damon_target *target, + struct damon_ctx *ctx, + struct damon_sysfs_target *sys_target) { struct pid *pid; - struct damon_target *t; + struct damon_region *r, *next; - if (!damon_target_has_pid(ctx)) { - /* Up to only one target for paddr could exist */ - damon_for_each_target(t, ctx) - return t; - return NULL; - } + if (!damon_target_has_pid(ctx)) + return 0; - /* ops.id should be DAMON_OPS_VADDR or DAMON_OPS_FVADDR */ pid = find_get_pid(sys_target->pid); if (!pid) - return ERR_PTR(-EINVAL); - damon_for_each_target(t, ctx) { - if (t->pid == pid) { - put_pid(pid); - return t; - } + return -EINVAL; + + /* no change to the target */ + if (pid == target->pid) { + put_pid(pid); + return 0; } - put_pid(pid); - return NULL; + + /* remove old monitoring results and update the target's pid */ + damon_for_each_region_safe(r, next, target) + damon_destroy_region(r, target); + put_pid(target->pid); + target->pid = pid; + return 0; } static int damon_sysfs_set_targets(struct damon_ctx *ctx, struct damon_sysfs_targets *sysfs_targets) { - int i, err; + struct damon_target *t, *next; + int i = 0, err; /* Multiple physical address space monitoring targets makes no sense */ if (ctx->ops.id == DAMON_OPS_PADDR && sysfs_targets->nr > 1) return -EINVAL; - for (i = 0; i < sysfs_targets->nr; i++) { + damon_for_each_target_safe(t, next, ctx) { + if (i < sysfs_targets->nr) { + damon_sysfs_update_target(t, ctx, + sysfs_targets->targets_arr[i]); + } else { + if (damon_target_has_pid(ctx)) + put_pid(t->pid); + damon_destroy_target(t); + } + i++; + } + + for (; i < sysfs_targets->nr; i++) { struct damon_sysfs_target *st = sysfs_targets->targets_arr[i]; - struct damon_target *t = damon_sysfs_existing_target(st, ctx); - - if (IS_ERR(t)) - return PTR_ERR(t); - if (!t) - err = damon_sysfs_add_target(st, ctx); - else - err = damon_sysfs_set_regions(t, st->regions); + + err = damon_sysfs_add_target(st, ctx); if (err) return err; } From 938e5d28842afa842c48b0fdb15f9faff66c730e Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 31 Oct 2023 17:01:31 +0000 Subject: [PATCH 0272/1397] mm/damon/sysfs: update monitoring target regions for online input commit commit 9732336006764e2ee61225387e3c70eae9139035 upstream. When user input is committed online, DAMON sysfs interface is ignoring the user input for the monitoring target regions. Such request is valid and useful for fixed monitoring target regions-based monitoring ops like 'paddr' or 'fvaddr'. Update the region boundaries as user specified, too. Note that the monitoring results of the regions that overlap between the latest monitoring target regions and the new target regions are preserved. Treat empty monitoring target regions user request as a request to just make no change to the monitoring target regions. Otherwise, users should set the monitoring target regions same to current one for every online input commit, and it could be challenging for dynamic monitoring target regions update DAMON ops like 'vaddr'. If the user really need to remove all monitoring target regions, they can simply remove the target and then create the target again with empty target regions. Link: https://lkml.kernel.org/r/20231031170131.46972-1-sj@kernel.org Fixes: da87878010e5 ("mm/damon/sysfs: support online inputs update") Signed-off-by: SeongJae Park Cc: [5.19+] Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/damon/sysfs.c | 47 ++++++++++++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index 02973273a20d..7e040286cf01 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -1150,34 +1150,47 @@ static int damon_sysfs_add_target(struct damon_sysfs_target *sys_target, return err; } -static int damon_sysfs_update_target(struct damon_target *target, - struct damon_ctx *ctx, - struct damon_sysfs_target *sys_target) +static int damon_sysfs_update_target_pid(struct damon_target *target, int pid) { - struct pid *pid; - struct damon_region *r, *next; - - if (!damon_target_has_pid(ctx)) - return 0; + struct pid *pid_new; - pid = find_get_pid(sys_target->pid); - if (!pid) + pid_new = find_get_pid(pid); + if (!pid_new) return -EINVAL; - /* no change to the target */ - if (pid == target->pid) { - put_pid(pid); + if (pid_new == target->pid) { + put_pid(pid_new); return 0; } - /* remove old monitoring results and update the target's pid */ - damon_for_each_region_safe(r, next, target) - damon_destroy_region(r, target); put_pid(target->pid); - target->pid = pid; + target->pid = pid_new; return 0; } +static int damon_sysfs_update_target(struct damon_target *target, + struct damon_ctx *ctx, + struct damon_sysfs_target *sys_target) +{ + int err; + + if (damon_target_has_pid(ctx)) { + err = damon_sysfs_update_target_pid(target, sys_target->pid); + if (err) + return err; + } + + /* + * Do monitoring target region boundary update only if one or more + * regions are set by the user. This is for keeping current monitoring + * target results and range easier, especially for dynamic monitoring + * target regions update ops like 'vaddr'. + */ + if (sys_target->regions->nr) + err = damon_sysfs_set_regions(target, sys_target->regions); + return err; +} + static int damon_sysfs_set_targets(struct damon_ctx *ctx, struct damon_sysfs_targets *sysfs_targets) { From dbfbac0f94a113611a8b2016eb126ceeabaa60c5 Mon Sep 17 00:00:00 2001 From: Krister Johansen Date: Fri, 27 Oct 2023 14:46:53 -0700 Subject: [PATCH 0273/1397] watchdog: move softlockup_panic back to early_param commit 8b793bcda61f6c3ed4f5b2ded7530ef6749580cb upstream. Setting softlockup_panic from do_sysctl_args() causes it to take effect later in boot. The lockup detector is enabled before SMP is brought online, but do_sysctl_args runs afterwards. If a user wants to set softlockup_panic on boot and have it trigger should a softlockup occur during onlining of the non-boot processors, they could do this prior to commit f117955a2255 ("kernel/watchdog.c: convert {soft/hard}lockup boot parameters to sysctl aliases"). However, after this commit the value of softlockup_panic is set too late to be of help for this type of problem. Restore the prior behavior. Signed-off-by: Krister Johansen Cc: stable@vger.kernel.org Fixes: f117955a2255 ("kernel/watchdog.c: convert {soft/hard}lockup boot parameters to sysctl aliases") Signed-off-by: Luis Chamberlain Signed-off-by: Greg Kroah-Hartman --- fs/proc/proc_sysctl.c | 1 - kernel/watchdog.c | 7 +++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 1c9635dddb70..de484195f49f 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1576,7 +1576,6 @@ static const struct sysctl_alias sysctl_aliases[] = { {"hung_task_panic", "kernel.hung_task_panic" }, {"numa_zonelist_order", "vm.numa_zonelist_order" }, {"softlockup_all_cpu_backtrace", "kernel.softlockup_all_cpu_backtrace" }, - {"softlockup_panic", "kernel.softlockup_panic" }, { } }; diff --git a/kernel/watchdog.c b/kernel/watchdog.c index d145305d95fe..5cd6d4e26915 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -283,6 +283,13 @@ static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer); static DEFINE_PER_CPU(bool, softlockup_touch_sync); static unsigned long soft_lockup_nmi_warn; +static int __init softlockup_panic_setup(char *str) +{ + softlockup_panic = simple_strtoul(str, NULL, 0); + return 1; +} +__setup("softlockup_panic=", softlockup_panic_setup); + static int __init nowatchdog_setup(char *str) { watchdog_user_enabled = 0; From fcb32111f01ddf3cbd04644cde1773428e31de6a Mon Sep 17 00:00:00 2001 From: Koichiro Den Date: Sat, 28 Oct 2023 01:29:42 +0900 Subject: [PATCH 0274/1397] iommufd: Fix missing update of domains_itree after splitting iopt_area commit e7250ab7ca4998fe026f2149805b03e09dc32498 upstream. In iopt_area_split(), if the original iopt_area has filled a domain and is linked to domains_itree, pages_nodes have to be properly reinserted. Otherwise the domains_itree becomes corrupted and we will UAF. Fixes: 51fe6141f0f6 ("iommufd: Data structure to provide IOVA to PFN mapping") Link: https://lore.kernel.org/r/20231027162941.2864615-2-den@valinux.co.jp Cc: stable@vger.kernel.org Signed-off-by: Koichiro Den Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/iommufd/io_pagetable.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c index a4da1817e19d..117a39ae2e4a 100644 --- a/drivers/iommu/iommufd/io_pagetable.c +++ b/drivers/iommu/iommufd/io_pagetable.c @@ -1060,6 +1060,16 @@ static int iopt_area_split(struct iopt_area *area, unsigned long iova) if (WARN_ON(rc)) goto err_remove_lhs; + /* + * If the original area has filled a domain, domains_itree has to be + * updated. + */ + if (area->storage_domain) { + interval_tree_remove(&area->pages_node, &pages->domains_itree); + interval_tree_insert(&lhs->pages_node, &pages->domains_itree); + interval_tree_insert(&rhs->pages_node, &pages->domains_itree); + } + lhs->storage_domain = area->storage_domain; lhs->pages = area->pages; rhs->storage_domain = area->storage_domain; From 5cadd780883275cd7c1f968284e1a8cde4eb9426 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 27 Oct 2023 13:36:48 +0200 Subject: [PATCH 0275/1397] fbdev: stifb: Make the STI next font pointer a 32-bit signed offset commit 8a32aa17c1cd48df1ddaa78e45abcb8c7a2220d6 upstream. The pointer to the next STI font is actually a signed 32-bit offset. With this change the 64-bit kernel will correctly subract the (signed 32-bit) offset instead of adding a (unsigned 32-bit) offset. It has no effect on 32-bit kernels. This fixes the stifb driver with a 64-bit kernel on qemu. Signed-off-by: Helge Deller Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- include/video/sticore.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/video/sticore.h b/include/video/sticore.h index 945ad60463a1..012b5b46ad7d 100644 --- a/include/video/sticore.h +++ b/include/video/sticore.h @@ -232,7 +232,7 @@ struct sti_rom_font { u8 height; u8 font_type; /* language type */ u8 bytes_per_char; - u32 next_font; + s32 next_font; /* note: signed int */ u8 underline_height; u8 underline_pos; u8 res008[2]; From 0b1f2f895470a4a663b471054ba25ce88ba86d09 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 31 Oct 2023 19:12:54 +0100 Subject: [PATCH 0276/1397] dm crypt: account large pages in cc->n_allocated_pages commit 9793c269da6cd339757de6ba5b2c8681b54c99af upstream. The commit 5054e778fcd9c ("dm crypt: allocate compound pages if possible") changed dm-crypt to use compound pages to improve performance. Unfortunately, there was an oversight: the allocation of compound pages was not accounted at all. Normal pages are accounted in a percpu counter cc->n_allocated_pages and dm-crypt is limited to allocate at most 2% of memory. Because compound pages were not accounted at all, dm-crypt could allocate memory over the 2% limit. Fix this by adding the accounting of compound pages, so that memory consumption of dm-crypt is properly limited. Signed-off-by: Mikulas Patocka Fixes: 5054e778fcd9c ("dm crypt: allocate compound pages if possible") Cc: stable@vger.kernel.org # v6.5+ Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-crypt.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 5315fd261c23..cef9353370b2 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1699,11 +1699,17 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned int size) order = min(order, remaining_order); while (order > 0) { + if (unlikely(percpu_counter_read_positive(&cc->n_allocated_pages) + + (1 << order) > dm_crypt_pages_per_client)) + goto decrease_order; pages = alloc_pages(gfp_mask | __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | __GFP_COMP, order); - if (likely(pages != NULL)) + if (likely(pages != NULL)) { + percpu_counter_add(&cc->n_allocated_pages, 1 << order); goto have_pages; + } +decrease_order: order--; } @@ -1741,10 +1747,13 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone) if (clone->bi_vcnt > 0) { /* bio_for_each_folio_all crashes with an empty bio */ bio_for_each_folio_all(fi, clone) { - if (folio_test_large(fi.folio)) + if (folio_test_large(fi.folio)) { + percpu_counter_sub(&cc->n_allocated_pages, + 1 << folio_order(fi.folio)); folio_put(fi.folio); - else + } else { mempool_free(&fi.folio->page, &cc->page_pool); + } } } } From 9e4b80adf560d2649a48c785ba8bbb9dac2492d2 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 19 Oct 2023 19:49:23 +0000 Subject: [PATCH 0277/1397] mm/damon/lru_sort: avoid divide-by-zero in hot threshold calculation commit 44063f125af4bb4efd1d500d8091fa33a98af325 upstream. When calculating the hotness threshold for lru_prio scheme of DAMON_LRU_SORT, the module divides some values by the maximum nr_accesses. However, due to the type of the related variables, simple division-based calculation of the divisor can return zero. As a result, divide-by-zero is possible. Fix it by using damon_max_nr_accesses(), which handles the case. Link: https://lkml.kernel.org/r/20231019194924.100347-5-sj@kernel.org Fixes: 40e983cca927 ("mm/damon: introduce DAMON-based LRU-lists Sorting") Signed-off-by: SeongJae Park Reported-by: Jakub Acs Cc: [6.0+] Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/damon/lru_sort.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mm/damon/lru_sort.c b/mm/damon/lru_sort.c index 7b8fce2f67a8..3071e08e8b8f 100644 --- a/mm/damon/lru_sort.c +++ b/mm/damon/lru_sort.c @@ -193,9 +193,7 @@ static int damon_lru_sort_apply_parameters(void) if (err) return err; - /* aggr_interval / sample_interval is the maximum nr_accesses */ - hot_thres = damon_lru_sort_mon_attrs.aggr_interval / - damon_lru_sort_mon_attrs.sample_interval * + hot_thres = damon_max_nr_accesses(&damon_lru_sort_mon_attrs) * hot_thres_access_freq / 1000; scheme = damon_lru_sort_new_hot_scheme(hot_thres); if (!scheme) From 10186f29f4f97955954b9e6a45639c5d12a4a11d Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 19 Oct 2023 19:49:22 +0000 Subject: [PATCH 0278/1397] mm/damon/ops-common: avoid divide-by-zero during region hotness calculation commit 3bafc47d3c4a2fc4d3b382aeb3c087f8fc84d9fd upstream. When calculating the hotness of each region for the under-quota regions prioritization, DAMON divides some values by the maximum nr_accesses. However, due to the type of the related variables, simple division-based calculation of the divisor can return zero. As a result, divide-by-zero is possible. Fix it by using damon_max_nr_accesses(), which handles the case. Link: https://lkml.kernel.org/r/20231019194924.100347-4-sj@kernel.org Fixes: 198f0f4c58b9 ("mm/damon/vaddr,paddr: support pageout prioritization") Signed-off-by: SeongJae Park Reported-by: Jakub Acs Cc: [5.16+] Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/damon/ops-common.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/mm/damon/ops-common.c b/mm/damon/ops-common.c index ac1c3fa80f98..d25d99cb5f2b 100644 --- a/mm/damon/ops-common.c +++ b/mm/damon/ops-common.c @@ -73,7 +73,6 @@ void damon_pmdp_mkold(pmd_t *pmd, struct vm_area_struct *vma, unsigned long addr int damon_hot_score(struct damon_ctx *c, struct damon_region *r, struct damos *s) { - unsigned int max_nr_accesses; int freq_subscore; unsigned int age_in_sec; int age_in_log, age_subscore; @@ -81,8 +80,8 @@ int damon_hot_score(struct damon_ctx *c, struct damon_region *r, unsigned int age_weight = s->quota.weight_age; int hotness; - max_nr_accesses = c->attrs.aggr_interval / c->attrs.sample_interval; - freq_subscore = r->nr_accesses * DAMON_MAX_SUBSCORE / max_nr_accesses; + freq_subscore = r->nr_accesses * DAMON_MAX_SUBSCORE / + damon_max_nr_accesses(&c->attrs); age_in_sec = (unsigned long)r->age * c->attrs.aggr_interval / 1000000; for (age_in_log = 0; age_in_log < DAMON_MAX_AGE_IN_LOG && age_in_sec; From 557547ad87599ad79536ac3216e1529d84ec21eb Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 19 Oct 2023 19:49:20 +0000 Subject: [PATCH 0279/1397] mm/damon: implement a function for max nr_accesses safe calculation commit 35f5d94187a6a3a8df2cba54beccca1c2379edb8 upstream. Patch series "avoid divide-by-zero due to max_nr_accesses overflow". The maximum nr_accesses of given DAMON context can be calculated by dividing the aggregation interval by the sampling interval. Some logics in DAMON uses the maximum nr_accesses as a divisor. Hence, the value shouldn't be zero. Such case is avoided since DAMON avoids setting the agregation interval as samller than the sampling interval. However, since nr_accesses is unsigned int while the intervals are unsigned long, the maximum nr_accesses could be zero while casting. Avoid the divide-by-zero by implementing a function that handles the corner case (first patch), and replaces the vulnerable direct max nr_accesses calculations (remaining patches). Note that the patches for the replacements are divided for broken commits, to make backporting on required tres easier. Especially, the last patch is for a patch that not yet merged into the mainline but in mm tree. This patch (of 4): The maximum nr_accesses of given DAMON context can be calculated by dividing the aggregation interval by the sampling interval. Some logics in DAMON uses the maximum nr_accesses as a divisor. Hence, the value shouldn't be zero. Such case is avoided since DAMON avoids setting the agregation interval as samller than the sampling interval. However, since nr_accesses is unsigned int while the intervals are unsigned long, the maximum nr_accesses could be zero while casting. Implement a function that handles the corner case. Note that this commit is not fixing the real issue since this is only introducing the safe function that will replaces the problematic divisions. The replacements will be made by followup commits, to make backporting on stable series easier. Link: https://lkml.kernel.org/r/20231019194924.100347-1-sj@kernel.org Link: https://lkml.kernel.org/r/20231019194924.100347-2-sj@kernel.org Fixes: 198f0f4c58b9 ("mm/damon/vaddr,paddr: support pageout prioritization") Signed-off-by: SeongJae Park Reported-by: Jakub Acs Cc: [5.16+] Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- include/linux/damon.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/linux/damon.h b/include/linux/damon.h index ae2664d1d5f1..c70cca8a839f 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -642,6 +642,13 @@ static inline bool damon_target_has_pid(const struct damon_ctx *ctx) return ctx->ops.id == DAMON_OPS_VADDR || ctx->ops.id == DAMON_OPS_FVADDR; } +static inline unsigned int damon_max_nr_accesses(const struct damon_attrs *attrs) +{ + /* {aggr,sample}_interval are unsigned long, hence could overflow */ + return min(attrs->aggr_interval / attrs->sample_interval, + (unsigned long)UINT_MAX); +} + int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive); int damon_stop(struct damon_ctx **ctxs, int nr_ctxs); From 74ed10979060d089d813f65dc91b7a9f0a30fa06 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 19 Oct 2023 19:49:21 +0000 Subject: [PATCH 0280/1397] mm/damon/core: avoid divide-by-zero during monitoring results update commit d35963bfb05877455228ecec6b194f624489f96a upstream. When monitoring attributes are changed, DAMON updates access rate of the monitoring results accordingly. For that, it divides some values by the maximum nr_accesses. However, due to the type of the related variables, simple division-based calculation of the divisor can return zero. As a result, divide-by-zero is possible. Fix it by using damon_max_nr_accesses(), which handles the case. Link: https://lkml.kernel.org/r/20231019194924.100347-3-sj@kernel.org Fixes: 2f5bef5a590b ("mm/damon/core: update monitoring results for new monitoring attributes") Signed-off-by: SeongJae Park Reported-by: Jakub Acs Cc: [6.3+] Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/damon/core.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/mm/damon/core.c b/mm/damon/core.c index bcd2bd9d6c10..42fea05ecde4 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -476,20 +476,14 @@ static unsigned int damon_age_for_new_attrs(unsigned int age, static unsigned int damon_accesses_bp_to_nr_accesses( unsigned int accesses_bp, struct damon_attrs *attrs) { - unsigned int max_nr_accesses = - attrs->aggr_interval / attrs->sample_interval; - - return accesses_bp * max_nr_accesses / 10000; + return accesses_bp * damon_max_nr_accesses(attrs) / 10000; } /* convert nr_accesses to access ratio in bp (per 10,000) */ static unsigned int damon_nr_accesses_to_accesses_bp( unsigned int nr_accesses, struct damon_attrs *attrs) { - unsigned int max_nr_accesses = - attrs->aggr_interval / attrs->sample_interval; - - return nr_accesses * 10000 / max_nr_accesses; + return nr_accesses * 10000 / damon_max_nr_accesses(attrs); } static unsigned int damon_nr_accesses_for_new_attrs(unsigned int nr_accesses, From 0bf5055d5d33133c3a7cc5e4032eeb0ba7832c44 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 6 Nov 2023 23:34:08 +0000 Subject: [PATCH 0281/1397] mm/damon/sysfs-schemes: handle tried region directory allocation failure commit ae636ae2bbfd9279f5681dbf320d1da817e52b68 upstream. DAMON sysfs interface's before_damos_apply callback (damon_sysfs_before_damos_apply()), which creates the DAMOS tried regions for each DAMOS action applied region, is not handling the allocation failure for the sysfs directory data. As a result, NULL pointer derefeence is possible. Fix it by handling the case. Link: https://lkml.kernel.org/r/20231106233408.51159-4-sj@kernel.org Fixes: f1d13cacabe1 ("mm/damon/sysfs: implement DAMOS tried regions update command") Signed-off-by: SeongJae Park Cc: [6.2+] Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/damon/sysfs-schemes.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c index 527e7d17eb3b..13e29fe065ab 100644 --- a/mm/damon/sysfs-schemes.c +++ b/mm/damon/sysfs-schemes.c @@ -1752,6 +1752,8 @@ static int damon_sysfs_before_damos_apply(struct damon_ctx *ctx, return 0; region = damon_sysfs_scheme_region_alloc(r); + if (!region) + return 0; list_add_tail(®ion->list, &sysfs_regions->regions_list); sysfs_regions->nr_regions++; if (kobject_init_and_add(®ion->kobj, From 76801f32d963e5854ea69fb9821d2d86b62790b9 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 6 Nov 2023 23:34:07 +0000 Subject: [PATCH 0282/1397] mm/damon/sysfs-schemes: handle tried regions sysfs directory allocation failure commit 84055688b6bc075c92a88e2d6c3ad26ab93919f9 upstream. DAMOS tried regions sysfs directory allocation function (damon_sysfs_scheme_regions_alloc()) is not handling the memory allocation failure. In the case, the code will dereference NULL pointer. Handle the failure to avoid such invalid access. Link: https://lkml.kernel.org/r/20231106233408.51159-3-sj@kernel.org Fixes: 9277d0367ba1 ("mm/damon/sysfs-schemes: implement scheme region directory") Signed-off-by: SeongJae Park Cc: [6.2+] Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/damon/sysfs-schemes.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c index 13e29fe065ab..36dcd881a19c 100644 --- a/mm/damon/sysfs-schemes.c +++ b/mm/damon/sysfs-schemes.c @@ -126,6 +126,9 @@ damon_sysfs_scheme_regions_alloc(void) struct damon_sysfs_scheme_regions *regions = kmalloc(sizeof(*regions), GFP_KERNEL); + if (!regions) + return NULL; + regions->kobj = (struct kobject){}; INIT_LIST_HEAD(®ions->regions_list); regions->nr_regions = 0; From 1777bb5c740d6d8837dbae61b4694ef90f363221 Mon Sep 17 00:00:00 2001 From: Hyeongtak Ji Date: Fri, 10 Nov 2023 14:37:09 +0900 Subject: [PATCH 0283/1397] mm/damon/core.c: avoid unintentional filtering out of schemes commit 13b2a4b22e98ff80b888a160a2acd92d81b05925 upstream. The function '__damos_filter_out()' causes DAMON to always filter out schemes whose filter type is anon or memcg if its matching value is set to false. This commit addresses the issue by ensuring that '__damos_filter_out()' no longer applies to filters whose type is 'anon' or 'memcg'. Link: https://lkml.kernel.org/r/1699594629-3816-1-git-send-email-hyeongtak.ji@gmail.com Fixes: ab9bda001b681 ("mm/damon/core: introduce address range type damos filter") Signed-off-by: Hyeongtak Ji Reviewed-by: SeongJae Park Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/damon/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/damon/core.c b/mm/damon/core.c index 42fea05ecde4..fd5be73f699f 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -914,7 +914,7 @@ static bool __damos_filter_out(struct damon_ctx *ctx, struct damon_target *t, matched = true; break; default: - break; + return false; } return matched == filter->matching; From 33edac1f26a4ae186aa4236608d4c20f7b19176d Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 6 Nov 2023 23:34:06 +0000 Subject: [PATCH 0284/1397] mm/damon/sysfs: check error from damon_sysfs_update_target() commit b4936b544b08ed44949055b92bd25f77759ebafc upstream. Patch series "mm/damon/sysfs: fix unhandled return values". Some of DAMON sysfs interface code is not handling return values from some functions. As a result, confusing user input handling or NULL-dereference is possible. Check those properly. This patch (of 3): damon_sysfs_update_target() returns error code for failures, but its caller, damon_sysfs_set_targets() is ignoring that. The update function seems making no critical change in case of such failures, but the behavior will look like DAMON sysfs is silently ignoring or only partially accepting the user input. Fix it. Link: https://lkml.kernel.org/r/20231106233408.51159-1-sj@kernel.org Link: https://lkml.kernel.org/r/20231106233408.51159-2-sj@kernel.org Fixes: 19467a950b49 ("mm/damon/sysfs: remove requested targets when online-commit inputs") Signed-off-by: SeongJae Park Cc: [5.19+] Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/damon/sysfs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index 7e040286cf01..faaef5098e26 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -1203,8 +1203,10 @@ static int damon_sysfs_set_targets(struct damon_ctx *ctx, damon_for_each_target_safe(t, next, ctx) { if (i < sysfs_targets->nr) { - damon_sysfs_update_target(t, ctx, + err = damon_sysfs_update_target(t, ctx, sysfs_targets->targets_arr[i]); + if (err) + return err; } else { if (damon_target_has_pid(ctx)) put_pid(t->pid); From c3b5552b3a02215eaefab6b85e161cb25fe01c18 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Fri, 20 Oct 2023 20:49:07 +0000 Subject: [PATCH 0285/1397] parisc: Add nop instructions after TLB inserts commit ad4aa06e1d92b06ed56c7240252927bd60632efe upstream. An excerpt from the PA8800 ERS states: * The PA8800 violates the seven instruction pipeline rule when performing TLB inserts or PxTLBE instructions with the PSW C bit on. The instruction will take effect by the 12th instruction after the insert or purge. I believe we have a problem with handling TLB misses. We don't fill the pipeline following TLB inserts. As a result, we likely fault again after returning from the interruption. The above statement indicates that we need at least seven instructions after the insert on pre PA8800 processors and we need 12 instructions on PA8800/PA8900 processors. Here we add macros and code to provide the required number instructions after a TLB insert. Signed-off-by: John David Anglin Suggested-by: Helge Deller Cc: stable@vger.kernel.org Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/entry.S | 81 ++++++++++++++++++++++++-------------- 1 file changed, 52 insertions(+), 29 deletions(-) diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index ae03b8679696..cab1ec23e0d7 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -36,6 +36,24 @@ .level 2.0 #endif +/* + * We need seven instructions after a TLB insert for it to take effect. + * The PA8800/PA8900 processors are an exception and need 12 instructions. + * The RFI changes both IAOQ_Back and IAOQ_Front, so it counts as one. + */ +#ifdef CONFIG_64BIT +#define NUM_PIPELINE_INSNS 12 +#else +#define NUM_PIPELINE_INSNS 7 +#endif + + /* Insert num nops */ + .macro insert_nops num + .rept \num + nop + .endr + .endm + /* Get aligned page_table_lock address for this mm from cr28/tr4 */ .macro get_ptl reg mfctl %cr28,\reg @@ -415,24 +433,20 @@ 3: .endm - /* Release page_table_lock without reloading lock address. - We use an ordered store to ensure all prior accesses are - performed prior to releasing the lock. */ - .macro ptl_unlock0 spc,tmp,tmp2 + /* Release page_table_lock if for user space. We use an ordered + store to ensure all prior accesses are performed prior to + releasing the lock. Note stw may not be executed, so we + provide one extra nop when CONFIG_TLB_PTLOCK is defined. */ + .macro ptl_unlock spc,tmp,tmp2 #ifdef CONFIG_TLB_PTLOCK -98: ldi __ARCH_SPIN_LOCK_UNLOCKED_VAL, \tmp2 +98: get_ptl \tmp + ldi __ARCH_SPIN_LOCK_UNLOCKED_VAL, \tmp2 or,COND(=) %r0,\spc,%r0 stw,ma \tmp2,0(\tmp) 99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP) -#endif - .endm - - /* Release page_table_lock. */ - .macro ptl_unlock1 spc,tmp,tmp2 -#ifdef CONFIG_TLB_PTLOCK -98: get_ptl \tmp - ptl_unlock0 \spc,\tmp,\tmp2 -99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP) + insert_nops NUM_PIPELINE_INSNS - 4 +#else + insert_nops NUM_PIPELINE_INSNS - 1 #endif .endm @@ -1124,7 +1138,7 @@ dtlb_miss_20w: idtlbt pte,prot - ptl_unlock1 spc,t0,t1 + ptl_unlock spc,t0,t1 rfir nop @@ -1133,6 +1147,7 @@ dtlb_check_alias_20w: idtlbt pte,prot + insert_nops NUM_PIPELINE_INSNS - 1 rfir nop @@ -1150,7 +1165,7 @@ nadtlb_miss_20w: idtlbt pte,prot - ptl_unlock1 spc,t0,t1 + ptl_unlock spc,t0,t1 rfir nop @@ -1159,6 +1174,7 @@ nadtlb_check_alias_20w: idtlbt pte,prot + insert_nops NUM_PIPELINE_INSNS - 1 rfir nop @@ -1184,7 +1200,7 @@ dtlb_miss_11: mtsp t1, %sr1 /* Restore sr1 */ - ptl_unlock1 spc,t0,t1 + ptl_unlock spc,t0,t1 rfir nop @@ -1194,6 +1210,7 @@ dtlb_check_alias_11: idtlba pte,(va) idtlbp prot,(va) + insert_nops NUM_PIPELINE_INSNS - 1 rfir nop @@ -1217,7 +1234,7 @@ nadtlb_miss_11: mtsp t1, %sr1 /* Restore sr1 */ - ptl_unlock1 spc,t0,t1 + ptl_unlock spc,t0,t1 rfir nop @@ -1227,6 +1244,7 @@ nadtlb_check_alias_11: idtlba pte,(va) idtlbp prot,(va) + insert_nops NUM_PIPELINE_INSNS - 1 rfir nop @@ -1246,7 +1264,7 @@ dtlb_miss_20: idtlbt pte,prot - ptl_unlock1 spc,t0,t1 + ptl_unlock spc,t0,t1 rfir nop @@ -1255,6 +1273,7 @@ dtlb_check_alias_20: idtlbt pte,prot + insert_nops NUM_PIPELINE_INSNS - 1 rfir nop @@ -1274,7 +1293,7 @@ nadtlb_miss_20: idtlbt pte,prot - ptl_unlock1 spc,t0,t1 + ptl_unlock spc,t0,t1 rfir nop @@ -1283,6 +1302,7 @@ nadtlb_check_alias_20: idtlbt pte,prot + insert_nops NUM_PIPELINE_INSNS - 1 rfir nop @@ -1319,7 +1339,7 @@ itlb_miss_20w: iitlbt pte,prot - ptl_unlock1 spc,t0,t1 + ptl_unlock spc,t0,t1 rfir nop @@ -1343,7 +1363,7 @@ naitlb_miss_20w: iitlbt pte,prot - ptl_unlock1 spc,t0,t1 + ptl_unlock spc,t0,t1 rfir nop @@ -1352,6 +1372,7 @@ naitlb_check_alias_20w: iitlbt pte,prot + insert_nops NUM_PIPELINE_INSNS - 1 rfir nop @@ -1377,7 +1398,7 @@ itlb_miss_11: mtsp t1, %sr1 /* Restore sr1 */ - ptl_unlock1 spc,t0,t1 + ptl_unlock spc,t0,t1 rfir nop @@ -1401,7 +1422,7 @@ naitlb_miss_11: mtsp t1, %sr1 /* Restore sr1 */ - ptl_unlock1 spc,t0,t1 + ptl_unlock spc,t0,t1 rfir nop @@ -1411,6 +1432,7 @@ naitlb_check_alias_11: iitlba pte,(%sr0, va) iitlbp prot,(%sr0, va) + insert_nops NUM_PIPELINE_INSNS - 1 rfir nop @@ -1431,7 +1453,7 @@ itlb_miss_20: iitlbt pte,prot - ptl_unlock1 spc,t0,t1 + ptl_unlock spc,t0,t1 rfir nop @@ -1451,7 +1473,7 @@ naitlb_miss_20: iitlbt pte,prot - ptl_unlock1 spc,t0,t1 + ptl_unlock spc,t0,t1 rfir nop @@ -1460,6 +1482,7 @@ naitlb_check_alias_20: iitlbt pte,prot + insert_nops NUM_PIPELINE_INSNS - 1 rfir nop @@ -1481,7 +1504,7 @@ dbit_trap_20w: idtlbt pte,prot - ptl_unlock0 spc,t0,t1 + ptl_unlock spc,t0,t1 rfir nop #else @@ -1507,7 +1530,7 @@ dbit_trap_11: mtsp t1, %sr1 /* Restore sr1 */ - ptl_unlock0 spc,t0,t1 + ptl_unlock spc,t0,t1 rfir nop @@ -1527,7 +1550,7 @@ dbit_trap_20: idtlbt pte,prot - ptl_unlock0 spc,t0,t1 + ptl_unlock spc,t0,t1 rfir nop #endif From 52c96f442c192c4e1614ea3a801d7659af3a126b Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Mon, 16 Oct 2023 18:08:28 +0200 Subject: [PATCH 0286/1397] ACPI: resource: Do IRQ override on TongFang GMxXGxx commit 0da9eccde3270b832c059ad618bf66e510c75d33 upstream. The TongFang GMxXGxx/TUXEDO Stellaris/Pollaris Gen5 needs IRQ overriding for the keyboard to work. Adding an entry for this laptop to the override_table makes the internal keyboard functional. Signed-off-by: Werner Sembach Cc: All applicable Reviewed-by: Hans de Goede Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/resource.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 297a88587031..80fbd385e8b4 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -495,6 +495,18 @@ static const struct dmi_system_id maingear_laptop[] = { DMI_MATCH(DMI_PRODUCT_NAME, "MG-VCP2-15A3070T"), } }, + { + /* TongFang GMxXGxx/TUXEDO Polaris 15 Gen5 AMD */ + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GMxXGxx"), + }, + }, + { + /* TongFang GM6XGxX/TUXEDO Stellaris 16 Gen5 AMD */ + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GM6XGxX"), + }, + }, { .ident = "MAINGEAR Vector Pro 2 17", .matches = { From 1985fab7f8520e4629c09eba8d40ebf57a862851 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 26 Oct 2023 16:49:19 +0100 Subject: [PATCH 0287/1397] regmap: Ensure range selector registers are updated after cache sync commit 0ec7731655de196bc1e4af99e495b38778109d22 upstream. When we sync the register cache we do so with the cache bypassed in order to avoid overhead from writing the synced values back into the cache. If the regmap has ranges and the selector register for those ranges is in a register which is cached this has the unfortunate side effect of meaning that the physical and cached copies of the selector register can be out of sync after a cache sync. The cache will have whatever the selector was when the sync started and the hardware will have the selector for the register that was synced last. Fix this by rewriting all cached selector registers after every sync, ensuring that the hardware and cache have the same content. This will result in extra writes that wouldn't otherwise be needed but is simple so hopefully robust. We don't read from the hardware since not all devices have physical read support. Given that nobody noticed this until now it is likely that we are rarely if ever hitting this case. Reported-by: Hector Martin Cc: stable@vger.kernel.org Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20231026-regmap-fix-selector-sync-v1-1-633ded82770d@kernel.org Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/base/regmap/regcache.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c index c5d151e9c481..92592f944a3d 100644 --- a/drivers/base/regmap/regcache.c +++ b/drivers/base/regmap/regcache.c @@ -334,6 +334,11 @@ static int regcache_default_sync(struct regmap *map, unsigned int min, return 0; } +static int rbtree_all(const void *key, const struct rb_node *node) +{ + return 0; +} + /** * regcache_sync - Sync the register cache with the hardware. * @@ -351,6 +356,7 @@ int regcache_sync(struct regmap *map) unsigned int i; const char *name; bool bypass; + struct rb_node *node; if (WARN_ON(map->cache_type == REGCACHE_NONE)) return -EINVAL; @@ -392,6 +398,30 @@ int regcache_sync(struct regmap *map) /* Restore the bypass state */ map->cache_bypass = bypass; map->no_sync_defaults = false; + + /* + * If we did any paging with cache bypassed and a cached + * paging register then the register and cache state might + * have gone out of sync, force writes of all the paging + * registers. + */ + rb_for_each(node, 0, &map->range_tree, rbtree_all) { + struct regmap_range_node *this = + rb_entry(node, struct regmap_range_node, node); + + /* If there's nothing in the cache there's nothing to sync */ + ret = regcache_read(map, this->selector_reg, &i); + if (ret != 0) + continue; + + ret = _regmap_write(map, this->selector_reg, i); + if (ret != 0) { + dev_err(map->dev, "Failed to write %x = %x: %d\n", + this->selector_reg, i, ret); + break; + } + } + map->unlock(map->lock_arg); regmap_async_complete(map); From c5b914528e55dd22fd616d1f7cb96a877aa54284 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 19 Oct 2023 17:31:14 +0200 Subject: [PATCH 0288/1397] wifi: ath11k: fix temperature event locking commit 1a5352a81b4720ba43d9c899974e3bddf7ce0ce8 upstream. The ath11k active pdevs are protected by RCU but the temperature event handling code calling ath11k_mac_get_ar_by_pdev_id() was not marked as a read-side critical section as reported by RCU lockdep: ============================= WARNING: suspicious RCU usage 6.6.0-rc6 #7 Not tainted ----------------------------- drivers/net/wireless/ath/ath11k/mac.c:638 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 no locks held by swapper/0/0. ... Call trace: ... lockdep_rcu_suspicious+0x16c/0x22c ath11k_mac_get_ar_by_pdev_id+0x194/0x1b0 [ath11k] ath11k_wmi_tlv_op_rx+0xa84/0x2c1c [ath11k] ath11k_htc_rx_completion_handler+0x388/0x510 [ath11k] Mark the code in question as an RCU read-side critical section to avoid any potential use-after-free issues. Tested-on: WCN6855 hw2.1 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.23 Fixes: a41d10348b01 ("ath11k: add thermal sensor device support") Cc: stable@vger.kernel.org # 5.7 Signed-off-by: Johan Hovold Acked-by: Jeff Johnson Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20231019153115.26401-2-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath11k/wmi.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c index 23ad6825e5be..71e3cda4219b 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.c +++ b/drivers/net/wireless/ath/ath11k/wmi.c @@ -8383,15 +8383,19 @@ ath11k_wmi_pdev_temperature_event(struct ath11k_base *ab, ath11k_dbg(ab, ATH11K_DBG_WMI, "event pdev temperature ev temp %d pdev_id %d\n", ev->temp, ev->pdev_id); + rcu_read_lock(); + ar = ath11k_mac_get_ar_by_pdev_id(ab, ev->pdev_id); if (!ar) { ath11k_warn(ab, "invalid pdev id in pdev temperature ev %d", ev->pdev_id); - kfree(tb); - return; + goto exit; } ath11k_thermal_event_temperature(ar, ev->temp); +exit: + rcu_read_unlock(); + kfree(tb); } From 21ebb0aba580d347e12f01ce5f6e75044427b3d5 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 19 Oct 2023 17:31:15 +0200 Subject: [PATCH 0289/1397] wifi: ath11k: fix dfs radar event locking commit 3b6c14833165f689cc5928574ebafe52bbce5f1e upstream. The ath11k active pdevs are protected by RCU but the DFS radar event handling code calling ath11k_mac_get_ar_by_pdev_id() was not marked as a read-side critical section. Mark the code in question as an RCU read-side critical section to avoid any potential use-after-free issues. Compile tested only. Fixes: d5c65159f289 ("ath11k: driver for Qualcomm IEEE 802.11ax devices") Cc: stable@vger.kernel.org # 5.6 Acked-by: Jeff Johnson Signed-off-by: Johan Hovold Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20231019153115.26401-3-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath11k/wmi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c index 71e3cda4219b..333aff56f440 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.c +++ b/drivers/net/wireless/ath/ath11k/wmi.c @@ -8337,6 +8337,8 @@ ath11k_wmi_pdev_dfs_radar_detected_event(struct ath11k_base *ab, struct sk_buff ev->detector_id, ev->segment_id, ev->timestamp, ev->is_chirp, ev->freq_offset, ev->sidx); + rcu_read_lock(); + ar = ath11k_mac_get_ar_by_pdev_id(ab, ev->pdev_id); if (!ar) { @@ -8354,6 +8356,8 @@ ath11k_wmi_pdev_dfs_radar_detected_event(struct ath11k_base *ab, struct sk_buff ieee80211_radar_detected(ar->hw); exit: + rcu_read_unlock(); + kfree(tb); } From 69cede2a5a5f60e3f5602b901b52cb64edd2ea6c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 19 Oct 2023 13:25:21 +0200 Subject: [PATCH 0290/1397] wifi: ath11k: fix htt pktlog locking commit 3f77c7d605b29df277d77e9ee75d96e7ad145d2d upstream. The ath11k active pdevs are protected by RCU but the htt pktlog handling code calling ath11k_mac_get_ar_by_pdev_id() was not marked as a read-side critical section. Mark the code in question as an RCU read-side critical section to avoid any potential use-after-free issues. Compile tested only. Fixes: d5c65159f289 ("ath11k: driver for Qualcomm IEEE 802.11ax devices") Cc: stable@vger.kernel.org # 5.6 Signed-off-by: Johan Hovold Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20231019112521.2071-1-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath11k/dp_rx.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c index 62bc98852f0f..a993e74bbae8 100644 --- a/drivers/net/wireless/ath/ath11k/dp_rx.c +++ b/drivers/net/wireless/ath/ath11k/dp_rx.c @@ -1621,14 +1621,20 @@ static void ath11k_htt_pktlog(struct ath11k_base *ab, struct sk_buff *skb) u8 pdev_id; pdev_id = FIELD_GET(HTT_T2H_PPDU_STATS_INFO_PDEV_ID, data->hdr); + + rcu_read_lock(); + ar = ath11k_mac_get_ar_by_pdev_id(ab, pdev_id); if (!ar) { ath11k_warn(ab, "invalid pdev id %d on htt pktlog\n", pdev_id); - return; + goto out; } trace_ath11k_htt_pktlog(ar, data->payload, hdr->size, ar->ab->pktlog_defs_checksum); + +out: + rcu_read_unlock(); } static void ath11k_htt_backpressure_event_handler(struct ath11k_base *ab, From e83246ecd3b193f8d91fce778e8a5ba747fc7d8a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 19 Oct 2023 17:53:42 +0200 Subject: [PATCH 0291/1397] wifi: ath11k: fix gtk offload status event locking commit 1dea3c0720a146bd7193969f2847ccfed5be2221 upstream. The ath11k active pdevs are protected by RCU but the gtk offload status event handling code calling ath11k_mac_get_arvif_by_vdev_id() was not marked as a read-side critical section. Mark the code in question as an RCU read-side critical section to avoid any potential use-after-free issues. Compile tested only. Fixes: a16d9b50cfba ("ath11k: support GTK rekey offload") Cc: stable@vger.kernel.org # 5.18 Cc: Carl Huang Signed-off-by: Johan Hovold Acked-by: Jeff Johnson Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20231019155342.31631-1-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath11k/wmi.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c index 333aff56f440..1c07f55c25e6 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.c +++ b/drivers/net/wireless/ath/ath11k/wmi.c @@ -8619,12 +8619,13 @@ static void ath11k_wmi_gtk_offload_status_event(struct ath11k_base *ab, return; } + rcu_read_lock(); + arvif = ath11k_mac_get_arvif_by_vdev_id(ab, ev->vdev_id); if (!arvif) { ath11k_warn(ab, "failed to get arvif for vdev_id:%d\n", ev->vdev_id); - kfree(tb); - return; + goto exit; } ath11k_dbg(ab, ATH11K_DBG_WMI, "event gtk offload refresh_cnt %d\n", @@ -8641,6 +8642,8 @@ static void ath11k_wmi_gtk_offload_status_event(struct ath11k_base *ab, ieee80211_gtk_rekey_notify(arvif->vif, arvif->bssid, (void *)&replay_ctr_be, GFP_ATOMIC); +exit: + rcu_read_unlock(); kfree(tb); } From afd3425bd69610f318403084fe491e24a1357fb9 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 19 Oct 2023 13:36:50 +0200 Subject: [PATCH 0292/1397] wifi: ath12k: fix htt mlo-offset event locking commit 6afc57ea315e0f660b1f870a681737bb7b71faef upstream. The ath12k active pdevs are protected by RCU but the htt mlo-offset event handling code calling ath12k_mac_get_ar_by_pdev_id() was not marked as a read-side critical section. Mark the code in question as an RCU read-side critical section to avoid any potential use-after-free issues. Compile tested only. Fixes: d889913205cf ("wifi: ath12k: driver for Qualcomm Wi-Fi 7 devices") Cc: stable@vger.kernel.org # v6.2 Signed-off-by: Johan Hovold Acked-by: Jeff Johnson Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20231019113650.9060-3-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath12k/dp_rx.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c index e1c84fc97460..dbcbe7e0cd2a 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/dp_rx.c @@ -1658,11 +1658,12 @@ static void ath12k_htt_mlo_offset_event_handler(struct ath12k_base *ab, msg = (struct ath12k_htt_mlo_offset_msg *)skb->data; pdev_id = u32_get_bits(__le32_to_cpu(msg->info), HTT_T2H_MLO_OFFSET_INFO_PDEV_ID); - ar = ath12k_mac_get_ar_by_pdev_id(ab, pdev_id); + rcu_read_lock(); + ar = ath12k_mac_get_ar_by_pdev_id(ab, pdev_id); if (!ar) { ath12k_warn(ab, "invalid pdev id %d on htt mlo offset\n", pdev_id); - return; + goto exit; } spin_lock_bh(&ar->data_lock); @@ -1678,6 +1679,8 @@ static void ath12k_htt_mlo_offset_event_handler(struct ath12k_base *ab, pdev->timestamp.mlo_comp_timer = __le32_to_cpu(msg->mlo_comp_timer); spin_unlock_bh(&ar->data_lock); +exit: + rcu_read_unlock(); } void ath12k_dp_htt_htc_t2h_msg_handler(struct ath12k_base *ab, From d7a5f7f76568e48869916d769e28b9f3ca70c78e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 19 Oct 2023 13:36:49 +0200 Subject: [PATCH 0293/1397] wifi: ath12k: fix dfs-radar and temperature event locking commit 69bd216e049349886405b1c87a55dce3d35d1ba7 upstream. The ath12k active pdevs are protected by RCU but the DFS-radar and temperature event handling code calling ath12k_mac_get_ar_by_pdev_id() was not marked as a read-side critical section. Mark the code in question as RCU read-side critical sections to avoid any potential use-after-free issues. Note that the temperature event handler looks like a place holder currently but would still trigger an RCU lockdep splat. Compile tested only. Fixes: d889913205cf ("wifi: ath12k: driver for Qualcomm Wi-Fi 7 devices") Cc: stable@vger.kernel.org # v6.2 Signed-off-by: Johan Hovold Acked-by: Jeff Johnson Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20231019113650.9060-2-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath12k/wmi.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index bddf4571d50c..d217b70a7a8f 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -6482,6 +6482,8 @@ ath12k_wmi_pdev_dfs_radar_detected_event(struct ath12k_base *ab, struct sk_buff ev->detector_id, ev->segment_id, ev->timestamp, ev->is_chirp, ev->freq_offset, ev->sidx); + rcu_read_lock(); + ar = ath12k_mac_get_ar_by_pdev_id(ab, le32_to_cpu(ev->pdev_id)); if (!ar) { @@ -6499,6 +6501,8 @@ ath12k_wmi_pdev_dfs_radar_detected_event(struct ath12k_base *ab, struct sk_buff ieee80211_radar_detected(ar->hw); exit: + rcu_read_unlock(); + kfree(tb); } @@ -6517,11 +6521,16 @@ ath12k_wmi_pdev_temperature_event(struct ath12k_base *ab, ath12k_dbg(ab, ATH12K_DBG_WMI, "pdev temperature ev temp %d pdev_id %d\n", ev.temp, ev.pdev_id); + rcu_read_lock(); + ar = ath12k_mac_get_ar_by_pdev_id(ab, le32_to_cpu(ev.pdev_id)); if (!ar) { ath12k_warn(ab, "invalid pdev id in pdev temperature ev %d", ev.pdev_id); - return; + goto exit; } + +exit: + rcu_read_unlock(); } static void ath12k_fils_discovery_event(struct ath12k_base *ab, From d5b648567bfb6a1737fb5dda8a87bfde4916dd8c Mon Sep 17 00:00:00 2001 From: Rong Chen Date: Thu, 26 Oct 2023 15:31:56 +0800 Subject: [PATCH 0294/1397] mmc: meson-gx: Remove setting of CMD_CFG_ERROR commit 57925e16c9f7d18012bcf45bfa658f92c087981a upstream. For the t7 and older SoC families, the CMD_CFG_ERROR has no effect. Starting from SoC family C3, setting this bit without SG LINK data address will cause the controller to generate an IRQ and stop working. To fix it, don't set the bit CMD_CFG_ERROR anymore. Fixes: 18f92bc02f17 ("mmc: meson-gx: make sure the descriptor is stopped on errors") Signed-off-by: Rong Chen Reviewed-by: Jerome Brunet Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231026073156.2868310-1-rong.chen@amlogic.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/meson-gx-mmc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c index 9837dab096e6..c7c067b9415a 100644 --- a/drivers/mmc/host/meson-gx-mmc.c +++ b/drivers/mmc/host/meson-gx-mmc.c @@ -801,7 +801,6 @@ static void meson_mmc_start_cmd(struct mmc_host *mmc, struct mmc_command *cmd) cmd_cfg |= FIELD_PREP(CMD_CFG_CMD_INDEX_MASK, cmd->opcode); cmd_cfg |= CMD_CFG_OWNER; /* owned by CPU */ - cmd_cfg |= CMD_CFG_ERROR; /* stop in case of error */ meson_mmc_set_response_bits(cmd, &cmd_cfg); From c9400a9dba346177ba1d34ef0981fa8b1ee51001 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Tue, 24 Oct 2023 17:03:35 +0200 Subject: [PATCH 0295/1397] genirq/generic_chip: Make irq_remove_generic_chip() irqdomain aware commit 5e7afb2eb7b2a7c81e9f608cbdf74a07606fd1b5 upstream. irq_remove_generic_chip() calculates the Linux interrupt number for removing the handler and interrupt chip based on gc::irq_base as a linear function of the bit positions of set bits in the @msk argument. When the generic chip is present in an irq domain, i.e. created with a call to irq_alloc_domain_generic_chips(), gc::irq_base contains not the base Linux interrupt number. It contains the base hardware interrupt for this chip. It is set to 0 for the first chip in the domain, 0 + N for the next chip, where $N is the number of hardware interrupts per chip. That means the Linux interrupt number cannot be calculated based on gc::irq_base for irqdomain based chips without a domain map lookup, which is currently missing. Rework the code to take the irqdomain case into account and calculate the Linux interrupt number by a irqdomain lookup of the domain specific hardware interrupt number. [ tglx: Massage changelog. Reshuffle the logic and add a proper comment. ] Fixes: cfefd21e693d ("genirq: Add chip suspend and resume callbacks") Signed-off-by: Herve Codina Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231024150335.322282-1-herve.codina@bootlin.com Signed-off-by: Greg Kroah-Hartman --- kernel/irq/generic-chip.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index c653cd31548d..5a452b94b643 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c @@ -544,21 +544,34 @@ EXPORT_SYMBOL_GPL(irq_setup_alt_chip); void irq_remove_generic_chip(struct irq_chip_generic *gc, u32 msk, unsigned int clr, unsigned int set) { - unsigned int i = gc->irq_base; + unsigned int i, virq; raw_spin_lock(&gc_lock); list_del(&gc->list); raw_spin_unlock(&gc_lock); - for (; msk; msk >>= 1, i++) { + for (i = 0; msk; msk >>= 1, i++) { if (!(msk & 0x01)) continue; + /* + * Interrupt domain based chips store the base hardware + * interrupt number in gc::irq_base. Otherwise gc::irq_base + * contains the base Linux interrupt number. + */ + if (gc->domain) { + virq = irq_find_mapping(gc->domain, gc->irq_base + i); + if (!virq) + continue; + } else { + virq = gc->irq_base + i; + } + /* Remove handler first. That will mask the irq line */ - irq_set_handler(i, NULL); - irq_set_chip(i, &no_irq_chip); - irq_set_chip_data(i, NULL); - irq_modify_status(i, clr, set); + irq_set_handler(virq, NULL); + irq_set_chip(virq, &no_irq_chip); + irq_set_chip_data(virq, NULL); + irq_modify_status(virq, clr, set); } } EXPORT_SYMBOL_GPL(irq_remove_generic_chip); From bf5dd542721a4f5145b241cf51e92f3809855d27 Mon Sep 17 00:00:00 2001 From: Hao Jia Date: Thu, 12 Oct 2023 17:00:03 +0800 Subject: [PATCH 0296/1397] sched/core: Fix RQCF_ACT_SKIP leak commit 5ebde09d91707a4a9bec1e3d213e3c12ffde348f upstream. Igor Raits and Bagas Sanjaya report a RQCF_ACT_SKIP leak warning. This warning may be triggered in the following situations: CPU0 CPU1 __schedule() *rq->clock_update_flags <<= 1;* unregister_fair_sched_group() pick_next_task_fair+0x4a/0x410 destroy_cfs_bandwidth() newidle_balance+0x115/0x3e0 for_each_possible_cpu(i) *i=0* rq_unpin_lock(this_rq, rf) __cfsb_csd_unthrottle() raw_spin_rq_unlock(this_rq) rq_lock(*CPU0_rq*, &rf) rq_clock_start_loop_update() rq->clock_update_flags & RQCF_ACT_SKIP <-- raw_spin_rq_lock(this_rq) The purpose of RQCF_ACT_SKIP is to skip the update rq clock, but the update is very early in __schedule(), but we clear RQCF_*_SKIP very late, causing it to span that gap above and triggering this warning. In __schedule() we can clear the RQCF_*_SKIP flag immediately after update_rq_clock() to avoid this RQCF_ACT_SKIP leak warning. And set rq->clock_update_flags to RQCF_UPDATED to avoid rq->clock_update_flags < RQCF_ACT_SKIP warning that may be triggered later. Fixes: ebb83d84e49b ("sched/core: Avoid multiple calling update_rq_clock() in __cfsb_csd_unthrottle()") Closes: https://lore.kernel.org/all/20230913082424.73252-1-jiahao.os@bytedance.com Reported-by: Igor Raits Reported-by: Bagas Sanjaya Suggested-by: Peter Zijlstra (Intel) Signed-off-by: Hao Jia Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/a5dd536d-041a-2ce9-f4b7-64d8d85c86dc@gmail.com Signed-off-by: Greg Kroah-Hartman --- kernel/sched/core.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9e9a45a3394f..a854b71836dd 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5378,8 +5378,6 @@ context_switch(struct rq *rq, struct task_struct *prev, /* switch_mm_cid() requires the memory barriers above. */ switch_mm_cid(rq, prev, next); - rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP); - prepare_lock_switch(rq, next, rf); /* Here we just switch the register state and the stack. */ @@ -6619,6 +6617,7 @@ static void __sched notrace __schedule(unsigned int sched_mode) /* Promote REQ to ACT */ rq->clock_update_flags <<= 1; update_rq_clock(rq); + rq->clock_update_flags = RQCF_UPDATED; switch_count = &prev->nivcsw; @@ -6698,8 +6697,6 @@ static void __sched notrace __schedule(unsigned int sched_mode) /* Also unlocks the rq: */ rq = context_switch(rq, prev, next, &rf); } else { - rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP); - rq_unpin_lock(rq, &rf); __balance_callbacks(rq); raw_spin_rq_unlock_irq(rq); From fc6e70a26516cdef5bdd0abcef9582d6c95b42a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Tue, 24 Oct 2023 07:10:40 -0300 Subject: [PATCH 0297/1397] pmdomain: bcm: bcm2835-power: check if the ASB register is equal to enable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2e75396f1df61e1f1d26d0d703fc7292c4ae4371 upstream. The commit c494a447c14e ("soc: bcm: bcm2835-power: Refactor ASB control") refactored the ASB control by using a general function to handle both the enable and disable. But this patch introduced a subtle regression: we need to check if !!(readl(base + reg) & ASB_ACK) == enable, not just check if (readl(base + reg) & ASB_ACK) == true. Currently, this is causing an invalid register state in V3D when unloading and loading the driver, because `bcm2835_asb_disable()` will return -ETIMEDOUT and `bcm2835_asb_power_off()` will fail to disable the ASB slave for V3D. Fixes: c494a447c14e ("soc: bcm: bcm2835-power: Refactor ASB control") Signed-off-by: Maíra Canal Reviewed-by: Florian Fainelli Reviewed-by: Stefan Wahren Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231024101251.6357-2-mcanal@igalia.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/pmdomain/bcm/bcm2835-power.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pmdomain/bcm/bcm2835-power.c b/drivers/pmdomain/bcm/bcm2835-power.c index 1a179d4e011c..d2f0233cb620 100644 --- a/drivers/pmdomain/bcm/bcm2835-power.c +++ b/drivers/pmdomain/bcm/bcm2835-power.c @@ -175,7 +175,7 @@ static int bcm2835_asb_control(struct bcm2835_power *power, u32 reg, bool enable } writel(PM_PASSWORD | val, base + reg); - while (readl(base + reg) & ASB_ACK) { + while (!!(readl(base + reg) & ASB_ACK) == enable) { cpu_relax(); if (ktime_get_ns() - start >= 1000) return -ETIMEDOUT; From fe822ce87cb88b4af838506542c9e2080ac090f4 Mon Sep 17 00:00:00 2001 From: Sumit Garg Date: Tue, 22 Aug 2023 16:59:33 +0530 Subject: [PATCH 0298/1397] KEYS: trusted: tee: Refactor register SHM usage commit c745cd1718b7825d69315fe7127e2e289e617598 upstream. The OP-TEE driver using the old SMC based ABI permits overlapping shared buffers, but with the new FF-A based ABI each physical page may only be registered once. As the key and blob buffer are allocated adjancently, there is no need for redundant register shared memory invocation. Also, it is incompatibile with FF-A based ABI limitation. So refactor register shared memory implementation to use only single invocation to register both key and blob buffers. [jarkko: Added cc to stable.] Cc: stable@vger.kernel.org # v5.16+ Fixes: 4615e5a34b95 ("optee: add FF-A support") Reported-by: Jens Wiklander Signed-off-by: Sumit Garg Tested-by: Jens Wiklander Reviewed-by: Jens Wiklander Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- security/keys/trusted-keys/trusted_tee.c | 64 ++++++++---------------- 1 file changed, 20 insertions(+), 44 deletions(-) diff --git a/security/keys/trusted-keys/trusted_tee.c b/security/keys/trusted-keys/trusted_tee.c index ac3e270ade69..aa3d477de6db 100644 --- a/security/keys/trusted-keys/trusted_tee.c +++ b/security/keys/trusted-keys/trusted_tee.c @@ -65,24 +65,16 @@ static int trusted_tee_seal(struct trusted_key_payload *p, char *datablob) int ret; struct tee_ioctl_invoke_arg inv_arg; struct tee_param param[4]; - struct tee_shm *reg_shm_in = NULL, *reg_shm_out = NULL; + struct tee_shm *reg_shm = NULL; memset(&inv_arg, 0, sizeof(inv_arg)); memset(¶m, 0, sizeof(param)); - reg_shm_in = tee_shm_register_kernel_buf(pvt_data.ctx, p->key, - p->key_len); - if (IS_ERR(reg_shm_in)) { - dev_err(pvt_data.dev, "key shm register failed\n"); - return PTR_ERR(reg_shm_in); - } - - reg_shm_out = tee_shm_register_kernel_buf(pvt_data.ctx, p->blob, - sizeof(p->blob)); - if (IS_ERR(reg_shm_out)) { - dev_err(pvt_data.dev, "blob shm register failed\n"); - ret = PTR_ERR(reg_shm_out); - goto out; + reg_shm = tee_shm_register_kernel_buf(pvt_data.ctx, p->key, + sizeof(p->key) + sizeof(p->blob)); + if (IS_ERR(reg_shm)) { + dev_err(pvt_data.dev, "shm register failed\n"); + return PTR_ERR(reg_shm); } inv_arg.func = TA_CMD_SEAL; @@ -90,13 +82,13 @@ static int trusted_tee_seal(struct trusted_key_payload *p, char *datablob) inv_arg.num_params = 4; param[0].attr = TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT; - param[0].u.memref.shm = reg_shm_in; + param[0].u.memref.shm = reg_shm; param[0].u.memref.size = p->key_len; param[0].u.memref.shm_offs = 0; param[1].attr = TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT; - param[1].u.memref.shm = reg_shm_out; + param[1].u.memref.shm = reg_shm; param[1].u.memref.size = sizeof(p->blob); - param[1].u.memref.shm_offs = 0; + param[1].u.memref.shm_offs = sizeof(p->key); ret = tee_client_invoke_func(pvt_data.ctx, &inv_arg, param); if ((ret < 0) || (inv_arg.ret != 0)) { @@ -107,11 +99,7 @@ static int trusted_tee_seal(struct trusted_key_payload *p, char *datablob) p->blob_len = param[1].u.memref.size; } -out: - if (reg_shm_out) - tee_shm_free(reg_shm_out); - if (reg_shm_in) - tee_shm_free(reg_shm_in); + tee_shm_free(reg_shm); return ret; } @@ -124,24 +112,16 @@ static int trusted_tee_unseal(struct trusted_key_payload *p, char *datablob) int ret; struct tee_ioctl_invoke_arg inv_arg; struct tee_param param[4]; - struct tee_shm *reg_shm_in = NULL, *reg_shm_out = NULL; + struct tee_shm *reg_shm = NULL; memset(&inv_arg, 0, sizeof(inv_arg)); memset(¶m, 0, sizeof(param)); - reg_shm_in = tee_shm_register_kernel_buf(pvt_data.ctx, p->blob, - p->blob_len); - if (IS_ERR(reg_shm_in)) { - dev_err(pvt_data.dev, "blob shm register failed\n"); - return PTR_ERR(reg_shm_in); - } - - reg_shm_out = tee_shm_register_kernel_buf(pvt_data.ctx, p->key, - sizeof(p->key)); - if (IS_ERR(reg_shm_out)) { - dev_err(pvt_data.dev, "key shm register failed\n"); - ret = PTR_ERR(reg_shm_out); - goto out; + reg_shm = tee_shm_register_kernel_buf(pvt_data.ctx, p->key, + sizeof(p->key) + sizeof(p->blob)); + if (IS_ERR(reg_shm)) { + dev_err(pvt_data.dev, "shm register failed\n"); + return PTR_ERR(reg_shm); } inv_arg.func = TA_CMD_UNSEAL; @@ -149,11 +129,11 @@ static int trusted_tee_unseal(struct trusted_key_payload *p, char *datablob) inv_arg.num_params = 4; param[0].attr = TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT; - param[0].u.memref.shm = reg_shm_in; + param[0].u.memref.shm = reg_shm; param[0].u.memref.size = p->blob_len; - param[0].u.memref.shm_offs = 0; + param[0].u.memref.shm_offs = sizeof(p->key); param[1].attr = TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT; - param[1].u.memref.shm = reg_shm_out; + param[1].u.memref.shm = reg_shm; param[1].u.memref.size = sizeof(p->key); param[1].u.memref.shm_offs = 0; @@ -166,11 +146,7 @@ static int trusted_tee_unseal(struct trusted_key_payload *p, char *datablob) p->key_len = param[1].u.memref.size; } -out: - if (reg_shm_out) - tee_shm_free(reg_shm_out); - if (reg_shm_in) - tee_shm_free(reg_shm_in); + tee_shm_free(reg_shm); return ret; } From f009347417a3d1188206b00991a78b1ac3736a6e Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Wed, 11 Oct 2023 02:08:25 +0300 Subject: [PATCH 0299/1397] KEYS: trusted: Rollback init_trusted() consistently commit 31de287345f41bbfaec36a5c8cbdba035cf76442 upstream. Do bind neither static calls nor trusted_key_exit() before a successful init, in order to maintain a consistent state. In addition, depart the init_trusted() in the case of a real error (i.e. getting back something else than -ENODEV). Reported-by: Linus Torvalds Closes: https://lore.kernel.org/linux-integrity/CAHk-=whOPoLaWM8S8GgoOPT7a2+nMH5h3TLKtn=R_3w4R1_Uvg@mail.gmail.com/ Cc: stable@vger.kernel.org # v5.13+ Fixes: 5d0682be3189 ("KEYS: trusted: Add generic trusted keys framework") Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- security/keys/trusted-keys/trusted_core.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/security/keys/trusted-keys/trusted_core.c b/security/keys/trusted-keys/trusted_core.c index 85fb5c22529a..fee1ab2c734d 100644 --- a/security/keys/trusted-keys/trusted_core.c +++ b/security/keys/trusted-keys/trusted_core.c @@ -358,17 +358,17 @@ static int __init init_trusted(void) if (!get_random) get_random = kernel_get_random; - static_call_update(trusted_key_seal, - trusted_key_sources[i].ops->seal); - static_call_update(trusted_key_unseal, - trusted_key_sources[i].ops->unseal); - static_call_update(trusted_key_get_random, - get_random); - trusted_key_exit = trusted_key_sources[i].ops->exit; - migratable = trusted_key_sources[i].ops->migratable; - ret = trusted_key_sources[i].ops->init(); - if (!ret) + if (!ret) { + static_call_update(trusted_key_seal, trusted_key_sources[i].ops->seal); + static_call_update(trusted_key_unseal, trusted_key_sources[i].ops->unseal); + static_call_update(trusted_key_get_random, get_random); + + trusted_key_exit = trusted_key_sources[i].ops->exit; + migratable = trusted_key_sources[i].ops->migratable; + } + + if (!ret || ret != -ENODEV) break; } From 10bec413b75f0e70f622a1b16c088c15a263b9ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 1 Oct 2023 19:02:53 +0200 Subject: [PATCH 0300/1397] PCI: keystone: Don't discard .remove() callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 200bddbb3f5202bbce96444fdc416305de14f547 upstream. With CONFIG_PCIE_KEYSTONE=y and ks_pcie_remove() marked with __exit, the function is discarded from the driver. In this case a bound device can still get unbound, e.g via sysfs. Then no cleanup code is run resulting in resource leaks or worse. The right thing to do is do always have the remove callback available. Note that this driver cannot be compiled as a module, so ks_pcie_remove() was always discarded before this change and modpost couldn't warn about this issue. Furthermore the __ref annotation also prevents a warning. Fixes: 0c4ffcfe1fbc ("PCI: keystone: Add TI Keystone PCIe driver") Link: https://lore.kernel.org/r/20231001170254.2506508-4-u.kleine-koenig@pengutronix.de Signed-off-by: Uwe Kleine-König Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/dwc/pci-keystone.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pci/controller/dwc/pci-keystone.c b/drivers/pci/controller/dwc/pci-keystone.c index 49aea6ce3e87..eb3fa17b243f 100644 --- a/drivers/pci/controller/dwc/pci-keystone.c +++ b/drivers/pci/controller/dwc/pci-keystone.c @@ -1302,7 +1302,7 @@ static int __init ks_pcie_probe(struct platform_device *pdev) return ret; } -static int __exit ks_pcie_remove(struct platform_device *pdev) +static int ks_pcie_remove(struct platform_device *pdev) { struct keystone_pcie *ks_pcie = platform_get_drvdata(pdev); struct device_link **link = ks_pcie->link; @@ -1320,7 +1320,7 @@ static int __exit ks_pcie_remove(struct platform_device *pdev) static struct platform_driver ks_pcie_driver __refdata = { .probe = ks_pcie_probe, - .remove = __exit_p(ks_pcie_remove), + .remove = ks_pcie_remove, .driver = { .name = "keystone-pcie", .of_match_table = ks_pcie_of_match, From b99a36afa2d93327cc3d7b8df6e99145ee234ae6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 1 Oct 2023 19:02:54 +0200 Subject: [PATCH 0301/1397] PCI: keystone: Don't discard .probe() callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7994db905c0fd692cf04c527585f08a91b560144 upstream. The __init annotation makes the ks_pcie_probe() function disappear after booting completes. However a device can also be bound later. In that case, we try to call ks_pcie_probe(), but the backing memory is likely already overwritten. The right thing to do is do always have the probe callback available. Note that the (wrong) __refdata annotation prevented this issue to be noticed by modpost. Fixes: 0c4ffcfe1fbc ("PCI: keystone: Add TI Keystone PCIe driver") Link: https://lore.kernel.org/r/20231001170254.2506508-5-u.kleine-koenig@pengutronix.de Signed-off-by: Uwe Kleine-König Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/dwc/pci-keystone.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pci/controller/dwc/pci-keystone.c b/drivers/pci/controller/dwc/pci-keystone.c index eb3fa17b243f..0def919f89fa 100644 --- a/drivers/pci/controller/dwc/pci-keystone.c +++ b/drivers/pci/controller/dwc/pci-keystone.c @@ -1100,7 +1100,7 @@ static const struct of_device_id ks_pcie_of_match[] = { { }, }; -static int __init ks_pcie_probe(struct platform_device *pdev) +static int ks_pcie_probe(struct platform_device *pdev) { const struct dw_pcie_host_ops *host_ops; const struct dw_pcie_ep_ops *ep_ops; @@ -1318,7 +1318,7 @@ static int ks_pcie_remove(struct platform_device *pdev) return 0; } -static struct platform_driver ks_pcie_driver __refdata = { +static struct platform_driver ks_pcie_driver = { .probe = ks_pcie_probe, .remove = ks_pcie_remove, .driver = { From fc14784cc5ba069f41aa16023dc10cab929935a2 Mon Sep 17 00:00:00 2001 From: Tomeu Vizoso Date: Mon, 16 Oct 2023 10:02:04 +0200 Subject: [PATCH 0302/1397] pmdomain: amlogic: Fix mask for the second NNA mem PD domain commit b131329b9bfbd1b4c0c5e088cb0c6ec03a12930f upstream. Without this change, the NPU hangs when the 8th NN core is used. It matches what the out-of-tree driver does. Signed-off-by: Tomeu Vizoso Fixes: 9a217b7e8953 ("soc: amlogic: meson-pwrc: Add NNA power domain for A311D") Acked-by: Neil Armstrong Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231016080205.41982-2-tomeu@tomeuvizoso.net Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/pmdomain/amlogic/meson-ee-pwrc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pmdomain/amlogic/meson-ee-pwrc.c b/drivers/pmdomain/amlogic/meson-ee-pwrc.c index cfb796d40d9d..0dd71cd814c5 100644 --- a/drivers/pmdomain/amlogic/meson-ee-pwrc.c +++ b/drivers/pmdomain/amlogic/meson-ee-pwrc.c @@ -228,7 +228,7 @@ static struct meson_ee_pwrc_mem_domain sm1_pwrc_mem_audio[] = { static struct meson_ee_pwrc_mem_domain g12a_pwrc_mem_nna[] = { { G12A_HHI_NANOQ_MEM_PD_REG0, GENMASK(31, 0) }, - { G12A_HHI_NANOQ_MEM_PD_REG1, GENMASK(23, 0) }, + { G12A_HHI_NANOQ_MEM_PD_REG1, GENMASK(31, 0) }, }; #define VPU_PD(__name, __top_pd, __mem, __is_pwr_off, __resets, __clks) \ From 69e619d2fd056fe1f5d0adf01584f2da669e0d28 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 25 Oct 2023 10:21:28 -0700 Subject: [PATCH 0303/1397] arm64: Restrict CPU_BIG_ENDIAN to GNU as or LLVM IAS 15.x or newer commit 146a15b873353f8ac28dc281c139ff611a3c4848 upstream. Prior to LLVM 15.0.0, LLVM's integrated assembler would incorrectly byte-swap NOP when compiling for big-endian, and the resulting series of bytes happened to match the encoding of FNMADD S21, S30, S0, S0. This went unnoticed until commit: 34f66c4c4d5518c1 ("arm64: Use a positive cpucap for FP/SIMD") Prior to that commit, the kernel would always enable the use of FPSIMD early in boot when __cpu_setup() initialized CPACR_EL1, and so usage of FNMADD within the kernel was not detected, but could result in the corruption of user or kernel FPSIMD state. After that commit, the instructions happen to trap during boot prior to FPSIMD being detected and enabled, e.g. | Unhandled 64-bit el1h sync exception on CPU0, ESR 0x000000001fe00000 -- ASIMD | CPU: 0 PID: 0 Comm: swapper Not tainted 6.6.0-rc3-00013-g34f66c4c4d55 #1 | Hardware name: linux,dummy-virt (DT) | pstate: 400000c9 (nZcv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) | pc : __pi_strcmp+0x1c/0x150 | lr : populate_properties+0xe4/0x254 | sp : ffffd014173d3ad0 | x29: ffffd014173d3af0 x28: fffffbfffddffcb8 x27: 0000000000000000 | x26: 0000000000000058 x25: fffffbfffddfe054 x24: 0000000000000008 | x23: fffffbfffddfe000 x22: fffffbfffddfe000 x21: fffffbfffddfe044 | x20: ffffd014173d3b70 x19: 0000000000000001 x18: 0000000000000005 | x17: 0000000000000010 x16: 0000000000000000 x15: 00000000413e7000 | x14: 0000000000000000 x13: 0000000000001bcc x12: 0000000000000000 | x11: 00000000d00dfeed x10: ffffd414193f2cd0 x9 : 0000000000000000 | x8 : 0101010101010101 x7 : ffffffffffffffc0 x6 : 0000000000000000 | x5 : 0000000000000000 x4 : 0101010101010101 x3 : 000000000000002a | x2 : 0000000000000001 x1 : ffffd014171f2988 x0 : fffffbfffddffcb8 | Kernel panic - not syncing: Unhandled exception | CPU: 0 PID: 0 Comm: swapper Not tainted 6.6.0-rc3-00013-g34f66c4c4d55 #1 | Hardware name: linux,dummy-virt (DT) | Call trace: | dump_backtrace+0xec/0x108 | show_stack+0x18/0x2c | dump_stack_lvl+0x50/0x68 | dump_stack+0x18/0x24 | panic+0x13c/0x340 | el1t_64_irq_handler+0x0/0x1c | el1_abort+0x0/0x5c | el1h_64_sync+0x64/0x68 | __pi_strcmp+0x1c/0x150 | unflatten_dt_nodes+0x1e8/0x2d8 | __unflatten_device_tree+0x5c/0x15c | unflatten_device_tree+0x38/0x50 | setup_arch+0x164/0x1e0 | start_kernel+0x64/0x38c | __primary_switched+0xbc/0xc4 Restrict CONFIG_CPU_BIG_ENDIAN to a known good assembler, which is either GNU as or LLVM's IAS 15.0.0 and newer, which contains the linked commit. Closes: https://github.com/ClangBuiltLinux/linux/issues/1948 Link: https://github.com/llvm/llvm-project/commit/1379b150991f70a5782e9a143c2ba5308da1161c Signed-off-by: Nathan Chancellor Cc: stable@vger.kernel.org Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20231025-disable-arm64-be-ias-b4-llvm-15-v1-1-b25263ed8b23@kernel.org Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 78f20e632712..6062a52a084f 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1368,6 +1368,8 @@ choice config CPU_BIG_ENDIAN bool "Build big-endian kernel" depends on !LD_IS_LLD || LLD_VERSION >= 130000 + # https://github.com/llvm/llvm-project/commit/1379b150991f70a5782e9a143c2ba5308da1161c + depends on AS_IS_GNU || AS_VERSION >= 150000 help Say Y if you plan on running a kernel with a big-endian userspace. From e7c5f137e5aa8081b2d2c608da521b2f48a6dbc1 Mon Sep 17 00:00:00 2001 From: Maria Yu Date: Tue, 24 Oct 2023 09:09:54 +0800 Subject: [PATCH 0304/1397] arm64: module: Fix PLT counting when CONFIG_RANDOMIZE_BASE=n commit d35686444fc80950c731e33a2f6ad4a55822be9b upstream. The counting of module PLTs has been broken when CONFIG_RANDOMIZE_BASE=n since commit: 3e35d303ab7d22c4 ("arm64: module: rework module VA range selection") Prior to that commit, when CONFIG_RANDOMIZE_BASE=n, the kernel image and all modules were placed within a 128M region, and no PLTs were necessary for B or BL. Hence count_plts() and partition_branch_plt_relas() skipped handling B and BL when CONFIG_RANDOMIZE_BASE=n. After that commit, modules can be placed anywhere within a 2G window regardless of CONFIG_RANDOMIZE_BASE, and hence PLTs may be necessary for B and BL even when CONFIG_RANDOMIZE_BASE=n. Unfortunately that commit failed to update count_plts() and partition_branch_plt_relas() accordingly. Due to this, module_emit_plt_entry() may fail if an insufficient number of PLT entries have been reserved, resulting in modules failing to load with -ENOEXEC. Fix this by counting PLTs regardless of CONFIG_RANDOMIZE_BASE in count_plts() and partition_branch_plt_relas(). Fixes: 3e35d303ab7d ("arm64: module: rework module VA range selection") Signed-off-by: Maria Yu Cc: # 6.5.x Acked-by: Ard Biesheuvel Fixes: 3e35d303ab7d ("arm64: module: rework module VA range selection") Reviewed-by: Mark Rutland Link: https://lore.kernel.org/r/20231024010954.6768-1-quic_aiquny@quicinc.com Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/module-plts.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index bd69a4e7cd60..79200f21e123 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -167,9 +167,6 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num, switch (ELF64_R_TYPE(rela[i].r_info)) { case R_AARCH64_JUMP26: case R_AARCH64_CALL26: - if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE)) - break; - /* * We only have to consider branch targets that resolve * to symbols that are defined in a different section. @@ -269,9 +266,6 @@ static int partition_branch_plt_relas(Elf64_Sym *syms, Elf64_Rela *rela, { int i = 0, j = numrels - 1; - if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE)) - return 0; - while (i < j) { if (branch_rela_needs_plt(syms, &rela[i], dstidx)) i++; From 2e9ae2196d7ed5f9887e312af18c36611a001da1 Mon Sep 17 00:00:00 2001 From: Pengfei Li Date: Sat, 21 Oct 2023 02:59:49 +0800 Subject: [PATCH 0305/1397] pmdomain: imx: Make imx pgc power domain also set the fwnode commit 374de39d38f97b0e58cfee88da590b2d056ccf7f upstream. Currently, The imx pgc power domain doesn't set the fwnode pointer, which results in supply regulator device can't get consumer imx pgc power domain device from fwnode when creating a link. This causes the driver core to instead try to create a link between the parent gpc device of imx pgc power domain device and supply regulator device. However, at this point, the gpc device has already been bound, and the link creation will fail. So adding the fwnode pointer to the imx pgc power domain device will fix this issue. Signed-off-by: Pengfei Li Tested-by: Emil Kronborg Fixes: 3fb16866b51d ("driver core: fw_devlink: Make cycle detection more robust") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231020185949.537083-1-pengfei.li_1@nxp.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/pmdomain/imx/gpc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pmdomain/imx/gpc.c b/drivers/pmdomain/imx/gpc.c index 90a8b2c0676f..419ed15cc10c 100644 --- a/drivers/pmdomain/imx/gpc.c +++ b/drivers/pmdomain/imx/gpc.c @@ -498,6 +498,7 @@ static int imx_gpc_probe(struct platform_device *pdev) pd_pdev->dev.parent = &pdev->dev; pd_pdev->dev.of_node = np; + pd_pdev->dev.fwnode = of_fwnode_handle(np); ret = platform_device_add(pd_pdev); if (ret) { From cb07cb22f9792b47ac28a5cfed973b0129566203 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 18 Oct 2023 19:24:14 +0200 Subject: [PATCH 0306/1397] parisc/agp: Use 64-bit LE values in SBA IOMMU PDIR table commit 86bb854d134f4429feb35d2e05f55c6e036770d2 upstream. The PDIR table of the System Bus Adapter (SBA) I/O MMU uses 64-bit little-endian pointers. Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v6.4+ Signed-off-by: Greg Kroah-Hartman --- drivers/char/agp/parisc-agp.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/char/agp/parisc-agp.c b/drivers/char/agp/parisc-agp.c index c6f181702b9a..edbc4d338117 100644 --- a/drivers/char/agp/parisc-agp.c +++ b/drivers/char/agp/parisc-agp.c @@ -38,7 +38,7 @@ static struct _parisc_agp_info { int lba_cap_offset; - u64 *gatt; + __le64 *gatt; u64 gatt_entries; u64 gart_base; @@ -104,7 +104,7 @@ parisc_agp_create_gatt_table(struct agp_bridge_data *bridge) int i; for (i = 0; i < info->gatt_entries; i++) { - info->gatt[i] = (unsigned long)agp_bridge->scratch_page; + info->gatt[i] = cpu_to_le64(agp_bridge->scratch_page); } return 0; @@ -158,9 +158,9 @@ parisc_agp_insert_memory(struct agp_memory *mem, off_t pg_start, int type) for (k = 0; k < info->io_pages_per_kpage; k++, j++, paddr += info->io_page_size) { - info->gatt[j] = + info->gatt[j] = cpu_to_le64( parisc_agp_mask_memory(agp_bridge, - paddr, type); + paddr, type)); asm_io_fdc(&info->gatt[j]); } } @@ -184,7 +184,7 @@ parisc_agp_remove_memory(struct agp_memory *mem, off_t pg_start, int type) io_pg_start = info->io_pages_per_kpage * pg_start; io_pg_count = info->io_pages_per_kpage * mem->page_count; for (i = io_pg_start; i < io_pg_count + io_pg_start; i++) { - info->gatt[i] = agp_bridge->scratch_page; + info->gatt[i] = cpu_to_le64(agp_bridge->scratch_page); } agp_bridge->driver->tlb_flush(mem); @@ -204,7 +204,8 @@ parisc_agp_mask_memory(struct agp_bridge_data *bridge, dma_addr_t addr, pa |= (ci >> PAGE_SHIFT) & 0xff;/* move CI (8 bits) into lowest byte */ pa |= SBA_PDIR_VALID_BIT; /* set "valid" bit */ - return cpu_to_le64(pa); + /* return native (big-endian) PDIR entry */ + return pa; } static void @@ -251,7 +252,8 @@ static int __init agp_ioc_init(void __iomem *ioc_regs) { struct _parisc_agp_info *info = &parisc_agp_info; - u64 iova_base, *io_pdir, io_tlb_ps; + u64 iova_base, io_tlb_ps; + __le64 *io_pdir; int io_tlb_shift; printk(KERN_INFO DRVPFX "IO PDIR shared with sba_iommu\n"); From 1b017510bcaf7c4bdc2014ee21f7d028c1e6e3aa Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 22 Oct 2023 11:48:11 +0200 Subject: [PATCH 0307/1397] parisc/pdc: Add width field to struct pdc_model commit 6240553b52c475d9fc9674de0521b77e692f3764 upstream. PDC2.0 specifies the additional PSW-bit field. Signed-off-by: Helge Deller Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/parisc/include/uapi/asm/pdc.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/parisc/include/uapi/asm/pdc.h b/arch/parisc/include/uapi/asm/pdc.h index 7a90070136e8..8e38a86996fc 100644 --- a/arch/parisc/include/uapi/asm/pdc.h +++ b/arch/parisc/include/uapi/asm/pdc.h @@ -472,6 +472,7 @@ struct pdc_model { /* for PDC_MODEL */ unsigned long arch_rev; unsigned long pot_key; unsigned long curr_key; + unsigned long width; /* default of PSW_W bit (1=enabled) */ }; struct pdc_cache_cf { /* for PDC_CACHE (I/D-caches) */ From 47b816649a186064b5c51bb1db2364a0e3afe5ed Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 17 Oct 2023 22:19:53 +0200 Subject: [PATCH 0308/1397] parisc/power: Add power soft-off when running on qemu commit d0c219472980d15f5cbc5c8aec736848bda3f235 upstream. Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v6.0+ Signed-off-by: Greg Kroah-Hartman --- drivers/parisc/power.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/parisc/power.c b/drivers/parisc/power.c index 6f5e5f0230d3..6ee0717e34ec 100644 --- a/drivers/parisc/power.c +++ b/drivers/parisc/power.c @@ -197,6 +197,14 @@ static struct notifier_block parisc_panic_block = { .priority = INT_MAX, }; +/* qemu soft power-off function */ +static int qemu_power_off(struct sys_off_data *data) +{ + /* this turns the system off via SeaBIOS */ + *(int *)data->cb_data = 0; + pdc_soft_power_button(1); + return NOTIFY_DONE; +} static int __init power_init(void) { @@ -226,7 +234,13 @@ static int __init power_init(void) soft_power_reg); } - power_task = kthread_run(kpowerswd, (void*)soft_power_reg, KTHREAD_NAME); + power_task = NULL; + if (running_on_qemu && soft_power_reg) + register_sys_off_handler(SYS_OFF_MODE_POWER_OFF, SYS_OFF_PRIO_DEFAULT, + qemu_power_off, (void *)soft_power_reg); + else + power_task = kthread_run(kpowerswd, (void*)soft_power_reg, + KTHREAD_NAME); if (IS_ERR(power_task)) { printk(KERN_ERR DRIVER_NAME ": thread creation failed. Driver not loaded.\n"); pdc_soft_power_button(0); From 22d4c2a841d7d912a8e047a080790da580f8ce56 Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Tue, 24 Oct 2023 20:30:14 +0200 Subject: [PATCH 0309/1397] cpufreq: stats: Fix buffer overflow detection in trans_stats() commit ea167a7fc2426f7685c3735e104921c1a20a6d3f upstream. Commit 3c0897c180c6 ("cpufreq: Use scnprintf() for avoiding potential buffer overflow") switched from snprintf to the more secure scnprintf but never updated the exit condition for PAGE_SIZE. As the commit say and as scnprintf document, what scnprintf returns what is actually written not counting the '\0' end char. This results in the case of len exceeding the size, len set to PAGE_SIZE - 1, as it can be written at max PAGE_SIZE - 1 (as '\0' is not counted) Because of len is never set to PAGE_SIZE, the function never break early, never prints the warning and never return -EFBIG. Fix this by changing the condition to PAGE_SIZE - 1 to correctly trigger the error. Cc: 5.10+ # 5.10+ Fixes: 3c0897c180c6 ("cpufreq: Use scnprintf() for avoiding potential buffer overflow") Signed-off-by: Christian Marangi [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/cpufreq_stats.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index a33df3c66c88..40a9ff18da06 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -131,23 +131,23 @@ static ssize_t show_trans_table(struct cpufreq_policy *policy, char *buf) len += sysfs_emit_at(buf, len, " From : To\n"); len += sysfs_emit_at(buf, len, " : "); for (i = 0; i < stats->state_num; i++) { - if (len >= PAGE_SIZE) + if (len >= PAGE_SIZE - 1) break; len += sysfs_emit_at(buf, len, "%9u ", stats->freq_table[i]); } - if (len >= PAGE_SIZE) - return PAGE_SIZE; + if (len >= PAGE_SIZE - 1) + return PAGE_SIZE - 1; len += sysfs_emit_at(buf, len, "\n"); for (i = 0; i < stats->state_num; i++) { - if (len >= PAGE_SIZE) + if (len >= PAGE_SIZE - 1) break; len += sysfs_emit_at(buf, len, "%9u: ", stats->freq_table[i]); for (j = 0; j < stats->state_num; j++) { - if (len >= PAGE_SIZE) + if (len >= PAGE_SIZE - 1) break; if (pending) @@ -157,12 +157,12 @@ static ssize_t show_trans_table(struct cpufreq_policy *policy, char *buf) len += sysfs_emit_at(buf, len, "%9u ", count); } - if (len >= PAGE_SIZE) + if (len >= PAGE_SIZE - 1) break; len += sysfs_emit_at(buf, len, "\n"); } - if (len >= PAGE_SIZE) { + if (len >= PAGE_SIZE - 1) { pr_warn_once("cpufreq transition table exceeds PAGE_SIZE. Disabling\n"); return -EFBIG; } From 6806b24ebd3bcf65a463e535b35c499f784cbb5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 24 Oct 2023 22:17:19 +0300 Subject: [PATCH 0310/1397] powercap: intel_rapl: Downgrade BIOS locked limits pr_warn() to pr_debug() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a60ec4485f1c72dfece365cf95e6de82bdd74300 upstream. Before the refactoring the pr_warn() only triggered when someone explicitly tried to write to a BIOS locked limit. After the refactoring the warning is also triggering during system resume. The user can't do anything about this so printing scary warnings doesn't make sense Keep the printk but make it pr_debug() instead of pr_warn() to make it clear it's not a serious issue. Fixes: 9050a9cd5e4c ("powercap: intel_rapl: Cleanup Power Limits support") Signed-off-by: Ville Syrjälä Cc: 6.5+ # 6.5+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/powercap/intel_rapl_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index 40a2cc649c79..2feed036c1cd 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -892,7 +892,7 @@ static int rapl_write_pl_data(struct rapl_domain *rd, int pl, return -EINVAL; if (rd->rpl[pl].locked) { - pr_warn("%s:%s:%s locked by BIOS\n", rd->rp->name, rd->name, pl_names[pl]); + pr_debug("%s:%s:%s locked by BIOS\n", rd->rp->name, rd->name, pl_names[pl]); return -EACCES; } From b0a660d46d40ad122d200cfdfbdba81a6346f4cb Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 23 Oct 2023 21:30:52 -0600 Subject: [PATCH 0311/1397] clk: socfpga: Fix undefined behavior bug in struct stratix10_clock_data commit d761bb01c85b22d5b44abe283eb89019693f6595 upstream. `struct clk_hw_onecell_data` is a flexible structure, which means that it contains flexible-array member at the bottom, in this case array `hws`: include/linux/clk-provider.h: 1380 struct clk_hw_onecell_data { 1381 unsigned int num; 1382 struct clk_hw *hws[] __counted_by(num); 1383 }; This could potentially lead to an overwrite of the objects following `clk_data` in `struct stratix10_clock_data`, in this case `void __iomem *base;` at run-time: drivers/clk/socfpga/stratix10-clk.h: 9 struct stratix10_clock_data { 10 struct clk_hw_onecell_data clk_data; 11 void __iomem *base; 12 }; There are currently three different places where memory is allocated for `struct stratix10_clock_data`, including the flex-array `hws` in `struct clk_hw_onecell_data`: drivers/clk/socfpga/clk-agilex.c: 469 clk_data = devm_kzalloc(dev, struct_size(clk_data, clk_data.hws, 470 num_clks), GFP_KERNEL); drivers/clk/socfpga/clk-agilex.c: 509 clk_data = devm_kzalloc(dev, struct_size(clk_data, clk_data.hws, 510 num_clks), GFP_KERNEL); drivers/clk/socfpga/clk-s10.c: 400 clk_data = devm_kzalloc(dev, struct_size(clk_data, clk_data.hws, 401 num_clks), GFP_KERNEL); I'll use just one of them to describe the issue. See below. Notice that a total of 440 bytes are allocated for flexible-array member `hws` at line 469: include/dt-bindings/clock/agilex-clock.h: 70 #define AGILEX_NUM_CLKS 55 drivers/clk/socfpga/clk-agilex.c: 459 struct stratix10_clock_data *clk_data; 460 void __iomem *base; ... 466 467 num_clks = AGILEX_NUM_CLKS; 468 469 clk_data = devm_kzalloc(dev, struct_size(clk_data, clk_data.hws, 470 num_clks), GFP_KERNEL); `struct_size(clk_data, clk_data.hws, num_clks)` above translates to sizeof(struct stratix10_clock_data) + sizeof(struct clk_hw *) * 55 == 16 + 8 * 55 == 16 + 440 ^^^ | allocated bytes for flex-array `hws` 474 for (i = 0; i < num_clks; i++) 475 clk_data->clk_data.hws[i] = ERR_PTR(-ENOENT); 476 477 clk_data->base = base; and then some data is written into both `hws` and `base` objects. Fix this by placing the declaration of object `clk_data` at the end of `struct stratix10_clock_data`. Also, add a comment to make it clear that this object must always be last in the structure. -Wflex-array-member-not-at-end is coming in GCC-14, and we are getting ready to enable it globally. Fixes: ba7e258425ac ("clk: socfpga: Convert to s10/agilex/n5x to use clk_hw") Cc: stable@vger.kernel.org Reviewed-by: Kees Cook Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/1da736106d8e0806aeafa6e471a13ced490eae22.1698117815.git.gustavoars@kernel.org Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/socfpga/stratix10-clk.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/clk/socfpga/stratix10-clk.h b/drivers/clk/socfpga/stratix10-clk.h index 75234e0783e1..83fe4eb3133c 100644 --- a/drivers/clk/socfpga/stratix10-clk.h +++ b/drivers/clk/socfpga/stratix10-clk.h @@ -7,8 +7,10 @@ #define __STRATIX10_CLK_H struct stratix10_clock_data { - struct clk_hw_onecell_data clk_data; void __iomem *base; + + /* Must be last */ + struct clk_hw_onecell_data clk_data; }; struct stratix10_pll_clock { From c21a0913da18574d71553622d70ec52ac6fb99a9 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 16 Oct 2023 16:05:27 -0600 Subject: [PATCH 0312/1397] clk: visconti: Fix undefined behavior bug in struct visconti_pll_provider commit 5ad1e217a2b23aa046b241183bd9452d259d70d0 upstream. `struct clk_hw_onecell_data` is a flexible structure, which means that it contains flexible-array member at the bottom, in this case array `hws`: include/linux/clk-provider.h: 1380 struct clk_hw_onecell_data { 1381 unsigned int num; 1382 struct clk_hw *hws[] __counted_by(num); 1383 }; This could potentially lead to an overwrite of the objects following `clk_data` in `struct visconti_pll_provider`, in this case `struct device_node *node;`, at run-time: drivers/clk/visconti/pll.h: 16 struct visconti_pll_provider { 17 void __iomem *reg_base; 18 struct clk_hw_onecell_data clk_data; 19 struct device_node *node; 20 }; Notice that a total of 56 bytes are allocated for flexible-array `hws` at line 328. See below: include/dt-bindings/clock/toshiba,tmpv770x.h: 14 #define TMPV770X_NR_PLL 7 drivers/clk/visconti/pll-tmpv770x.c: 69 ctx = visconti_init_pll(np, reg_base, TMPV770X_NR_PLL); drivers/clk/visconti/pll.c: 321 struct visconti_pll_provider * __init visconti_init_pll(struct device_node *np, 322 void __iomem *base, 323 unsigned long nr_plls) 324 { 325 struct visconti_pll_provider *ctx; ... 328 ctx = kzalloc(struct_size(ctx, clk_data.hws, nr_plls), GFP_KERNEL); `struct_size(ctx, clk_data.hws, nr_plls)` above translates to sizeof(struct visconti_pll_provider) + sizeof(struct clk_hw *) * 7 == 24 + 8 * 7 == 24 + 56 ^^^^ | allocated bytes for flex array `hws` $ pahole -C visconti_pll_provider drivers/clk/visconti/pll.o struct visconti_pll_provider { void * reg_base; /* 0 8 */ struct clk_hw_onecell_data clk_data; /* 8 8 */ struct device_node * node; /* 16 8 */ /* size: 24, cachelines: 1, members: 3 */ /* last cacheline: 24 bytes */ }; And then, after the allocation, some data is written into all members of `struct visconti_pll_provider`: 332 for (i = 0; i < nr_plls; ++i) 333 ctx->clk_data.hws[i] = ERR_PTR(-ENOENT); 334 335 ctx->node = np; 336 ctx->reg_base = base; 337 ctx->clk_data.num = nr_plls; Fix all these by placing the declaration of object `clk_data` at the end of `struct visconti_pll_provider`. Also, add a comment to make it clear that this object must always be last in the structure, and prevent this bug from being introduced again in the future. -Wflex-array-member-not-at-end is coming in GCC-14, and we are getting ready to enable it globally. Fixes: b4cbe606dc36 ("clk: visconti: Add support common clock driver and reset driver") Cc: stable@vger.kernel.org Reviewed-by: Kees Cook Acked-by: Nobuhiro Iwamatsu Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/57a831d94ee2b3889b11525d4ad500356f89576f.1697492890.git.gustavoars@kernel.org Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/visconti/pll.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/clk/visconti/pll.h b/drivers/clk/visconti/pll.h index 01d07f1bf01b..c4bd40676da4 100644 --- a/drivers/clk/visconti/pll.h +++ b/drivers/clk/visconti/pll.h @@ -15,8 +15,10 @@ struct visconti_pll_provider { void __iomem *reg_base; - struct clk_hw_onecell_data clk_data; struct device_node *node; + + /* Must be last */ + struct clk_hw_onecell_data clk_data; }; #define VISCONTI_PLL_RATE(_rate, _dacen, _dsmen, \ From 3293de84bf3e041242b9826bb2a439a1f4d3da7d Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Thu, 7 Sep 2023 18:52:19 +0200 Subject: [PATCH 0313/1397] integrity: powerpc: Do not select CA_MACHINE_KEYRING commit 3edc22655647378dea01900f7b04e017ff96bda9 upstream. No other platform needs CA_MACHINE_KEYRING, either. This is policy that should be decided by the administrator, not Kconfig dependencies. Cc: stable@vger.kernel.org # v6.6+ Fixes: d7d91c4743c4 ("integrity: PowerVM machine keyring enablement") Signed-off-by: Michal Suchanek Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- security/integrity/Kconfig | 2 -- 1 file changed, 2 deletions(-) diff --git a/security/integrity/Kconfig b/security/integrity/Kconfig index 232191ee09e3..b6e074ac0227 100644 --- a/security/integrity/Kconfig +++ b/security/integrity/Kconfig @@ -68,8 +68,6 @@ config INTEGRITY_MACHINE_KEYRING depends on INTEGRITY_ASYMMETRIC_KEYS depends on SYSTEM_BLACKLIST_KEYRING depends on LOAD_UEFI_KEYS || LOAD_PPC_KEYS - select INTEGRITY_CA_MACHINE_KEYRING if LOAD_PPC_KEYS - select INTEGRITY_CA_MACHINE_KEYRING_MAX if LOAD_PPC_KEYS help If set, provide a keyring to which Machine Owner Keys (MOK) may be added. This keyring shall contain just MOK keys. Unlike keys From 9466443cd66e739673b716774d0bf4035d166e10 Mon Sep 17 00:00:00 2001 From: Kathiravan Thirumoorthy Date: Thu, 14 Sep 2023 12:29:51 +0530 Subject: [PATCH 0314/1397] clk: qcom: ipq8074: drop the CLK_SET_RATE_PARENT flag from PLL clocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e641a070137dd959932c7c222e000d9d941167a2 upstream. GPLL, NSS crypto PLL clock rates are fixed and shouldn't be scaled based on the request from dependent clocks. Doing so will result in the unexpected behaviour. So drop the CLK_SET_RATE_PARENT flag from the PLL clocks. Cc: stable@vger.kernel.org Fixes: b8e7e519625f ("clk: qcom: ipq8074: add remaining PLL’s") Signed-off-by: Kathiravan Thirumoorthy Link: https://lore.kernel.org/r/20230913-gpll_cleanup-v2-1-c8ceb1a37680@quicinc.com Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- drivers/clk/qcom/gcc-ipq8074.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/clk/qcom/gcc-ipq8074.c b/drivers/clk/qcom/gcc-ipq8074.c index 63ac2ced76bb..b7faf12a511a 100644 --- a/drivers/clk/qcom/gcc-ipq8074.c +++ b/drivers/clk/qcom/gcc-ipq8074.c @@ -75,7 +75,6 @@ static struct clk_fixed_factor gpll0_out_main_div2 = { &gpll0_main.clkr.hw }, .num_parents = 1, .ops = &clk_fixed_factor_ops, - .flags = CLK_SET_RATE_PARENT, }, }; @@ -121,7 +120,6 @@ static struct clk_alpha_pll_postdiv gpll2 = { &gpll2_main.clkr.hw }, .num_parents = 1, .ops = &clk_alpha_pll_postdiv_ro_ops, - .flags = CLK_SET_RATE_PARENT, }, }; @@ -154,7 +152,6 @@ static struct clk_alpha_pll_postdiv gpll4 = { &gpll4_main.clkr.hw }, .num_parents = 1, .ops = &clk_alpha_pll_postdiv_ro_ops, - .flags = CLK_SET_RATE_PARENT, }, }; @@ -188,7 +185,6 @@ static struct clk_alpha_pll_postdiv gpll6 = { &gpll6_main.clkr.hw }, .num_parents = 1, .ops = &clk_alpha_pll_postdiv_ro_ops, - .flags = CLK_SET_RATE_PARENT, }, }; @@ -201,7 +197,6 @@ static struct clk_fixed_factor gpll6_out_main_div2 = { &gpll6_main.clkr.hw }, .num_parents = 1, .ops = &clk_fixed_factor_ops, - .flags = CLK_SET_RATE_PARENT, }, }; @@ -266,7 +261,6 @@ static struct clk_alpha_pll_postdiv nss_crypto_pll = { &nss_crypto_pll_main.clkr.hw }, .num_parents = 1, .ops = &clk_alpha_pll_postdiv_ro_ops, - .flags = CLK_SET_RATE_PARENT, }, }; From 7d7ba4b888b09e03829509f0371225b75deb4b67 Mon Sep 17 00:00:00 2001 From: Kathiravan Thirumoorthy Date: Thu, 14 Sep 2023 12:29:52 +0530 Subject: [PATCH 0315/1397] clk: qcom: ipq6018: drop the CLK_SET_RATE_PARENT flag from PLL clocks commit 99cd4935cb972d0aafb16838bb2aeadbcaf196ce upstream. GPLL, NSS crypto PLL clock rates are fixed and shouldn't be scaled based on the request from dependent clocks. Doing so will result in the unexpected behaviour. So drop the CLK_SET_RATE_PARENT flag from the PLL clocks. Cc: stable@vger.kernel.org Fixes: d9db07f088af ("clk: qcom: Add ipq6018 Global Clock Controller support") Signed-off-by: Kathiravan Thirumoorthy Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20230913-gpll_cleanup-v2-2-c8ceb1a37680@quicinc.com Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- drivers/clk/qcom/gcc-ipq6018.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/clk/qcom/gcc-ipq6018.c b/drivers/clk/qcom/gcc-ipq6018.c index 6120fbbc5de0..f9494fa1b871 100644 --- a/drivers/clk/qcom/gcc-ipq6018.c +++ b/drivers/clk/qcom/gcc-ipq6018.c @@ -72,7 +72,6 @@ static struct clk_fixed_factor gpll0_out_main_div2 = { &gpll0_main.clkr.hw }, .num_parents = 1, .ops = &clk_fixed_factor_ops, - .flags = CLK_SET_RATE_PARENT, }, }; @@ -86,7 +85,6 @@ static struct clk_alpha_pll_postdiv gpll0 = { &gpll0_main.clkr.hw }, .num_parents = 1, .ops = &clk_alpha_pll_postdiv_ro_ops, - .flags = CLK_SET_RATE_PARENT, }, }; @@ -161,7 +159,6 @@ static struct clk_alpha_pll_postdiv gpll6 = { &gpll6_main.clkr.hw }, .num_parents = 1, .ops = &clk_alpha_pll_postdiv_ro_ops, - .flags = CLK_SET_RATE_PARENT, }, }; @@ -192,7 +189,6 @@ static struct clk_alpha_pll_postdiv gpll4 = { &gpll4_main.clkr.hw }, .num_parents = 1, .ops = &clk_alpha_pll_postdiv_ro_ops, - .flags = CLK_SET_RATE_PARENT, }, }; @@ -243,7 +239,6 @@ static struct clk_alpha_pll_postdiv gpll2 = { &gpll2_main.clkr.hw }, .num_parents = 1, .ops = &clk_alpha_pll_postdiv_ro_ops, - .flags = CLK_SET_RATE_PARENT, }, }; @@ -274,7 +269,6 @@ static struct clk_alpha_pll_postdiv nss_crypto_pll = { &nss_crypto_pll_main.clkr.hw }, .num_parents = 1, .ops = &clk_alpha_pll_postdiv_ro_ops, - .flags = CLK_SET_RATE_PARENT, }, }; From c012fba75ac884825f45a4e4960c09f50896f439 Mon Sep 17 00:00:00 2001 From: Marios Makassikis Date: Sat, 14 Oct 2023 12:48:25 +0900 Subject: [PATCH 0316/1397] ksmbd: fix recursive locking in vfs helpers commit 807252f028c59b9a3bac4d62ad84761548c10f11 upstream. Running smb2.rename test from Samba smbtorture suite against a kernel built with lockdep triggers a "possible recursive locking detected" warning. This is because mnt_want_write() is called twice with no mnt_drop_write() in between: -> ksmbd_vfs_mkdir() -> ksmbd_vfs_kern_path_create() -> kern_path_create() -> filename_create() -> mnt_want_write() -> mnt_want_write() Fix this by removing the mnt_want_write/mnt_drop_write calls from vfs helpers that call kern_path_create(). Full lockdep trace below: ============================================ WARNING: possible recursive locking detected 6.6.0-rc5 #775 Not tainted -------------------------------------------- kworker/1:1/32 is trying to acquire lock: ffff888005ac83f8 (sb_writers#5){.+.+}-{0:0}, at: ksmbd_vfs_mkdir+0xe1/0x410 but task is already holding lock: ffff888005ac83f8 (sb_writers#5){.+.+}-{0:0}, at: filename_create+0xb6/0x260 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(sb_writers#5); lock(sb_writers#5); *** DEADLOCK *** May be due to missing lock nesting notation 4 locks held by kworker/1:1/32: #0: ffff8880064e4138 ((wq_completion)ksmbd-io){+.+.}-{0:0}, at: process_one_work+0x40e/0x980 #1: ffff888005b0fdd0 ((work_completion)(&work->work)){+.+.}-{0:0}, at: process_one_work+0x40e/0x980 #2: ffff888005ac83f8 (sb_writers#5){.+.+}-{0:0}, at: filename_create+0xb6/0x260 #3: ffff8880057ce760 (&type->i_mutex_dir_key#3/1){+.+.}-{3:3}, at: filename_create+0x123/0x260 Cc: stable@vger.kernel.org Fixes: 40b268d384a2 ("ksmbd: add mnt_want_write to ksmbd vfs functions") Signed-off-by: Marios Makassikis Acked-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/vfs.c | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index b5a5e50fc9ca..9919c07035d8 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -173,10 +173,6 @@ int ksmbd_vfs_create(struct ksmbd_work *work, const char *name, umode_t mode) return err; } - err = mnt_want_write(path.mnt); - if (err) - goto out_err; - mode |= S_IFREG; err = vfs_create(mnt_idmap(path.mnt), d_inode(path.dentry), dentry, mode, true); @@ -186,9 +182,7 @@ int ksmbd_vfs_create(struct ksmbd_work *work, const char *name, umode_t mode) } else { pr_err("File(%s): creation failed (err:%d)\n", name, err); } - mnt_drop_write(path.mnt); -out_err: done_path_create(&path, dentry); return err; } @@ -219,10 +213,6 @@ int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode) return err; } - err = mnt_want_write(path.mnt); - if (err) - goto out_err2; - idmap = mnt_idmap(path.mnt); mode |= S_IFDIR; err = vfs_mkdir(idmap, d_inode(path.dentry), dentry, mode); @@ -233,21 +223,19 @@ int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode) dentry->d_name.len); if (IS_ERR(d)) { err = PTR_ERR(d); - goto out_err1; + goto out_err; } if (unlikely(d_is_negative(d))) { dput(d); err = -ENOENT; - goto out_err1; + goto out_err; } ksmbd_vfs_inherit_owner(work, d_inode(path.dentry), d_inode(d)); dput(d); } -out_err1: - mnt_drop_write(path.mnt); -out_err2: +out_err: done_path_create(&path, dentry); if (err) pr_err("mkdir(%s): creation failed (err:%d)\n", name, err); @@ -665,16 +653,11 @@ int ksmbd_vfs_link(struct ksmbd_work *work, const char *oldname, goto out3; } - err = mnt_want_write(newpath.mnt); - if (err) - goto out3; - err = vfs_link(oldpath.dentry, mnt_idmap(newpath.mnt), d_inode(newpath.dentry), dentry, NULL); if (err) ksmbd_debug(VFS, "vfs_link failed err %d\n", err); - mnt_drop_write(newpath.mnt); out3: done_path_create(&newpath, dentry); From f4f863a0e901131aff9b7be77590b637acc6f8b1 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 7 Nov 2023 21:04:31 +0900 Subject: [PATCH 0317/1397] ksmbd: handle malformed smb1 message commit 5a5409d90bd05f87fe5623a749ccfbf3f7c7d400 upstream. If set_smb1_rsp_status() is not implemented, It will cause NULL pointer dereferece error when client send malformed smb1 message. This patch add set_smb1_rsp_status() to ignore malformed smb1 message. Cc: stable@vger.kernel.org Reported-by: Robert Morris Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/smb_common.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/smb/server/smb_common.c b/fs/smb/server/smb_common.c index e6ba1e9b8589..6691ae68af0c 100644 --- a/fs/smb/server/smb_common.c +++ b/fs/smb/server/smb_common.c @@ -366,11 +366,22 @@ static int smb1_allocate_rsp_buf(struct ksmbd_work *work) return 0; } +/** + * set_smb1_rsp_status() - set error type in smb response header + * @work: smb work containing smb response header + * @err: error code to set in response + */ +static void set_smb1_rsp_status(struct ksmbd_work *work, __le32 err) +{ + work->send_no_response = 1; +} + static struct smb_version_ops smb1_server_ops = { .get_cmd_val = get_smb1_cmd_val, .init_rsp_hdr = init_smb1_rsp_hdr, .allocate_rsp_buf = smb1_allocate_rsp_buf, .check_user_session = smb1_check_user_session, + .set_rsp_status = set_smb1_rsp_status, }; static int smb1_negotiate(struct ksmbd_work *work) From 712e01f32e577e7e48ab0adb5fe550646a3d93cb Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 5 Nov 2023 12:46:24 +0900 Subject: [PATCH 0318/1397] ksmbd: fix slab out of bounds write in smb_inherit_dacl() commit eebff19acaa35820cb09ce2ccb3d21bee2156ffb upstream. slab out-of-bounds write is caused by that offsets is bigger than pntsd allocation size. This patch add the check to validate 3 offsets using allocation size. Reported-by: zdi-disclosures@trendmicro.com # ZDI-CAN-22271 Cc: stable@vger.kernel.org Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/smbacl.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/fs/smb/server/smbacl.c b/fs/smb/server/smbacl.c index 6c0305be895e..51b8bfab7481 100644 --- a/fs/smb/server/smbacl.c +++ b/fs/smb/server/smbacl.c @@ -1107,6 +1107,7 @@ int smb_inherit_dacl(struct ksmbd_conn *conn, struct smb_acl *pdacl; struct smb_sid *powner_sid = NULL, *pgroup_sid = NULL; int powner_sid_size = 0, pgroup_sid_size = 0, pntsd_size; + int pntsd_alloc_size; if (parent_pntsd->osidoffset) { powner_sid = (struct smb_sid *)((char *)parent_pntsd + @@ -1119,9 +1120,10 @@ int smb_inherit_dacl(struct ksmbd_conn *conn, pgroup_sid_size = 1 + 1 + 6 + (pgroup_sid->num_subauth * 4); } - pntsd = kzalloc(sizeof(struct smb_ntsd) + powner_sid_size + - pgroup_sid_size + sizeof(struct smb_acl) + - nt_size, GFP_KERNEL); + pntsd_alloc_size = sizeof(struct smb_ntsd) + powner_sid_size + + pgroup_sid_size + sizeof(struct smb_acl) + nt_size; + + pntsd = kzalloc(pntsd_alloc_size, GFP_KERNEL); if (!pntsd) { rc = -ENOMEM; goto free_aces_base; @@ -1136,6 +1138,27 @@ int smb_inherit_dacl(struct ksmbd_conn *conn, pntsd->gsidoffset = parent_pntsd->gsidoffset; pntsd->dacloffset = parent_pntsd->dacloffset; + if ((u64)le32_to_cpu(pntsd->osidoffset) + powner_sid_size > + pntsd_alloc_size) { + rc = -EINVAL; + kfree(pntsd); + goto free_aces_base; + } + + if ((u64)le32_to_cpu(pntsd->gsidoffset) + pgroup_sid_size > + pntsd_alloc_size) { + rc = -EINVAL; + kfree(pntsd); + goto free_aces_base; + } + + if ((u64)le32_to_cpu(pntsd->dacloffset) + sizeof(struct smb_acl) + nt_size > + pntsd_alloc_size) { + rc = -EINVAL; + kfree(pntsd); + goto free_aces_base; + } + if (pntsd->osidoffset) { struct smb_sid *owner_sid = (struct smb_sid *)((char *)pntsd + le32_to_cpu(pntsd->osidoffset)); From 053809f40e6f36292c9c1367ff549c9a082457bd Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 2 Nov 2023 10:51:06 +0300 Subject: [PATCH 0319/1397] mmc: vub300: fix an error code commit b44f9da81783fda72632ef9b0d05ea3f3ca447a5 upstream. This error path should return -EINVAL instead of success. Fixes: 88095e7b473a ("mmc: Add new VUB300 USB-to-SD/SDIO/MMC driver") Signed-off-by: Dan Carpenter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/0769d30c-ad80-421b-bf5d-7d6f5d85604e@moroto.mountain Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/vub300.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/host/vub300.c b/drivers/mmc/host/vub300.c index 9ec593d52f0f..cef0e716ad16 100644 --- a/drivers/mmc/host/vub300.c +++ b/drivers/mmc/host/vub300.c @@ -2309,6 +2309,7 @@ static int vub300_probe(struct usb_interface *interface, vub300->read_only = (0x0010 & vub300->system_port_status.port_flags) ? 1 : 0; } else { + retval = -EINVAL; goto error5; } usb_set_intfdata(interface, vub300); From 582208858304ffc382fa9d921c8b03cf1f31c1c0 Mon Sep 17 00:00:00 2001 From: Nitin Yadav Date: Thu, 26 Oct 2023 11:44:58 +0530 Subject: [PATCH 0320/1397] mmc: sdhci_am654: fix start loop index for TAP value parsing commit 71956d0cb56c1e5f9feeb4819db87a076418e930 upstream. ti,otap-del-sel-legacy/ti,itap-del-sel-legacy passed from DT are currently ignored for all SD/MMC and eMMC modes. Fix this by making start loop index to MMC_TIMING_LEGACY. Fixes: 8ee5fc0e0b3b ("mmc: sdhci_am654: Update OTAPDLY writes") Signed-off-by: Nitin Yadav Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231026061458.1116276-1-n-yadav@ti.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci_am654.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci_am654.c b/drivers/mmc/host/sdhci_am654.c index c125485ba80e..967bd2dfcda1 100644 --- a/drivers/mmc/host/sdhci_am654.c +++ b/drivers/mmc/host/sdhci_am654.c @@ -598,7 +598,7 @@ static int sdhci_am654_get_otap_delay(struct sdhci_host *host, return 0; } - for (i = MMC_TIMING_MMC_HS; i <= MMC_TIMING_MMC_HS400; i++) { + for (i = MMC_TIMING_LEGACY; i <= MMC_TIMING_MMC_HS400; i++) { ret = device_property_read_u32(dev, td[i].otap_binding, &sdhci_am654->otap_del_sel[i]); From bb94f1ad8cadb2b9c017da02ae4e941cee01a65a Mon Sep 17 00:00:00 2001 From: Bean Huo Date: Mon, 30 Oct 2023 23:48:09 +0100 Subject: [PATCH 0321/1397] mmc: Add quirk MMC_QUIRK_BROKEN_CACHE_FLUSH for Micron eMMC Q2J54A commit ed9009ad300c0f15a3ecfe9613547b1962bde02c upstream. Micron MTFC4GACAJCN eMMC supports cache but requires that flush cache operation be allowed only after a write has occurred. Otherwise, the cache flush command or subsequent commands will time out. Signed-off-by: Bean Huo Signed-off-by: Rafael Beims Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231030224809.59245-1-beanhuo@iokpp.de Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/block.c | 4 +++- drivers/mmc/core/card.h | 4 ++++ drivers/mmc/core/mmc.c | 8 ++++++-- drivers/mmc/core/quirks.h | 7 ++++--- include/linux/mmc/card.h | 2 ++ 5 files changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 3a8f27c3e310..152dfe593c43 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -2381,8 +2381,10 @@ enum mmc_issued mmc_blk_mq_issue_rq(struct mmc_queue *mq, struct request *req) } ret = mmc_blk_cqe_issue_flush(mq, req); break; - case REQ_OP_READ: case REQ_OP_WRITE: + card->written_flag = true; + fallthrough; + case REQ_OP_READ: if (host->cqe_enabled) ret = mmc_blk_cqe_issue_rw_rq(mq, req); else diff --git a/drivers/mmc/core/card.h b/drivers/mmc/core/card.h index 4edf9057fa79..b7754a1b8d97 100644 --- a/drivers/mmc/core/card.h +++ b/drivers/mmc/core/card.h @@ -280,4 +280,8 @@ static inline int mmc_card_broken_sd_cache(const struct mmc_card *c) return c->quirks & MMC_QUIRK_BROKEN_SD_CACHE; } +static inline int mmc_card_broken_cache_flush(const struct mmc_card *c) +{ + return c->quirks & MMC_QUIRK_BROKEN_CACHE_FLUSH; +} #endif diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 89cd48fcec79..a46ce0868fe1 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -2081,13 +2081,17 @@ static int _mmc_flush_cache(struct mmc_host *host) { int err = 0; + if (mmc_card_broken_cache_flush(host->card) && !host->card->written_flag) + return 0; + if (_mmc_cache_enabled(host)) { err = mmc_switch(host->card, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_FLUSH_CACHE, 1, CACHE_FLUSH_TIMEOUT_MS); if (err) - pr_err("%s: cache flush error %d\n", - mmc_hostname(host), err); + pr_err("%s: cache flush error %d\n", mmc_hostname(host), err); + else + host->card->written_flag = false; } return err; diff --git a/drivers/mmc/core/quirks.h b/drivers/mmc/core/quirks.h index 32b64b564fb1..cca71867bc4a 100644 --- a/drivers/mmc/core/quirks.h +++ b/drivers/mmc/core/quirks.h @@ -110,11 +110,12 @@ static const struct mmc_fixup __maybe_unused mmc_blk_fixups[] = { MMC_QUIRK_TRIM_BROKEN), /* - * Micron MTFC4GACAJCN-1M advertises TRIM but it does not seems to - * support being used to offload WRITE_ZEROES. + * Micron MTFC4GACAJCN-1M supports TRIM but does not appear to support + * WRITE_ZEROES offloading. It also supports caching, but the cache can + * only be flushed after a write has occurred. */ MMC_FIXUP("Q2J54A", CID_MANFID_MICRON, 0x014e, add_quirk_mmc, - MMC_QUIRK_TRIM_BROKEN), + MMC_QUIRK_TRIM_BROKEN | MMC_QUIRK_BROKEN_CACHE_FLUSH), /* * Kingston EMMC04G-M627 advertises TRIM but it does not seems to diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index daa2f40d9ce6..7b12eebc5586 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -295,7 +295,9 @@ struct mmc_card { #define MMC_QUIRK_BROKEN_HPI (1<<13) /* Disable broken HPI support */ #define MMC_QUIRK_BROKEN_SD_DISCARD (1<<14) /* Disable broken SD discard support */ #define MMC_QUIRK_BROKEN_SD_CACHE (1<<15) /* Disable broken SD cache support */ +#define MMC_QUIRK_BROKEN_CACHE_FLUSH (1<<16) /* Don't flush cache until the write has occurred */ + bool written_flag; /* Indicates eMMC has been written since power on */ bool reenable_cmdq; /* Re-enable Command Queue */ unsigned int erase_size; /* erase size in sectors */ From bf9a8870ad507c42dbf5568a27c889d79e4a50de Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Wed, 25 Oct 2023 18:30:29 +0530 Subject: [PATCH 0322/1397] PCI: qcom-ep: Add dedicated callback for writing to DBI2 registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a07d2497ed657eb2efeb967af47e22f573dcd1d6 upstream. The DWC core driver exposes the write_dbi2() callback for writing to the DBI2 registers in a vendor-specific way. On the Qcom EP platforms, the DBI_CS2 bit in the ELBI region needs to be asserted before writing to any DBI2 registers and deasserted once done. So, let's implement the callback for the Qcom PCIe EP driver so that the DBI2 writes are correctly handled in the hardware. Without this callback, the DBI2 register writes like BAR size won't go through and as a result, the default BAR size is set for all BARs. [kwilczynski: commit log, renamed function to match the DWC convention] Fixes: f55fee56a631 ("PCI: qcom-ep: Add Qualcomm PCIe Endpoint controller driver") Suggested-by: Serge Semin Link: https://lore.kernel.org/linux-pci/20231025130029.74693-2-manivannan.sadhasivam@linaro.org Signed-off-by: Manivannan Sadhasivam Signed-off-by: Krzysztof Wilczyński Reviewed-by: Serge Semin Cc: stable@vger.kernel.org # 5.16+ Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/dwc/pcie-qcom-ep.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/pci/controller/dwc/pcie-qcom-ep.c b/drivers/pci/controller/dwc/pcie-qcom-ep.c index 8bd8107690a6..9b62ee6992f0 100644 --- a/drivers/pci/controller/dwc/pcie-qcom-ep.c +++ b/drivers/pci/controller/dwc/pcie-qcom-ep.c @@ -123,6 +123,7 @@ /* ELBI registers */ #define ELBI_SYS_STTS 0x08 +#define ELBI_CS2_ENABLE 0xa4 /* DBI registers */ #define DBI_CON_STATUS 0x44 @@ -263,6 +264,21 @@ static void qcom_pcie_dw_stop_link(struct dw_pcie *pci) disable_irq(pcie_ep->perst_irq); } +static void qcom_pcie_dw_write_dbi2(struct dw_pcie *pci, void __iomem *base, + u32 reg, size_t size, u32 val) +{ + struct qcom_pcie_ep *pcie_ep = to_pcie_ep(pci); + int ret; + + writel(1, pcie_ep->elbi + ELBI_CS2_ENABLE); + + ret = dw_pcie_write(pci->dbi_base2 + reg, size, val); + if (ret) + dev_err(pci->dev, "Failed to write DBI2 register (0x%x): %d\n", reg, ret); + + writel(0, pcie_ep->elbi + ELBI_CS2_ENABLE); +} + static void qcom_pcie_ep_icc_update(struct qcom_pcie_ep *pcie_ep) { struct dw_pcie *pci = &pcie_ep->pci; @@ -519,6 +535,7 @@ static const struct dw_pcie_ops pci_ops = { .link_up = qcom_pcie_dw_link_up, .start_link = qcom_pcie_dw_start_link, .stop_link = qcom_pcie_dw_stop_link, + .write_dbi2 = qcom_pcie_dw_write_dbi2, }; static int qcom_pcie_ep_get_io_resources(struct platform_device *pdev, From 7fce228c561c1fe6d68390f470247e25b70e16ab Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 11 Oct 2023 09:46:45 +0200 Subject: [PATCH 0323/1397] PCI/ASPM: Fix L1 substate handling in aspm_attr_store_common() commit 8e37372ad0bea4c9b4712d9943f6ae96cff9491f upstream. aspm_attr_store_common(), which handles sysfs control of ASPM, has the same problem as fb097dcd5a28 ("PCI/ASPM: Disable only ASPM_STATE_L1 when driver disables L1"): disabling L1 adds only ASPM_L1 (but not any of the L1.x substates) to the "aspm_disable" mask. Enabling one substate, e.g., L1.1, via sysfs removes ASPM_L1 from the disable mask. Since disabling L1 via sysfs doesn't add any of the substates to the disable mask, enabling L1.1 actually enables *all* the substates. In this scenario: - Write 0 to "l1_aspm" to disable L1 - Write 1 to "l1_1_aspm" to enable L1.1 the intention is to disable L1 and all L1.x substates, then enable just L1.1, but in fact, *all* L1.x substates are enabled. Fix this by explicitly disabling all the L1.x substates when disabling L1. Fixes: 72ea91afbfb0 ("PCI/ASPM: Add sysfs attributes for controlling ASPM link states") Link: https://lore.kernel.org/r/6ba7dd79-9cfe-4ed0-a002-d99cb842f361@gmail.com Signed-off-by: Heiner Kallweit [bhelgaas: commit log] Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pcie/aspm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 530c3bb5708c..fc18e42f0a6e 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -1248,6 +1248,8 @@ static ssize_t aspm_attr_store_common(struct device *dev, link->aspm_disable &= ~ASPM_STATE_L1; } else { link->aspm_disable |= state; + if (state & ASPM_STATE_L1) + link->aspm_disable |= ASPM_STATE_L1SS; } pcie_config_aspm_link(link, policy_to_aspm_state(link)); From 5274f925e7ae0d674107e4f31a77a1ba509fc4f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 1 Oct 2023 19:02:52 +0200 Subject: [PATCH 0324/1397] PCI: kirin: Don't discard .remove() callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3064ef2e88c1629c1e67a77d7bc20020b35846f2 upstream. With CONFIG_PCIE_KIRIN=y and kirin_pcie_remove() marked with __exit, the function is discarded from the driver. In this case a bound device can still get unbound, e.g via sysfs. Then no cleanup code is run resulting in resource leaks or worse. The right thing to do is do always have the remove callback available. This fixes the following warning by modpost: drivers/pci/controller/dwc/pcie-kirin: section mismatch in reference: kirin_pcie_driver+0x8 (section: .data) -> kirin_pcie_remove (section: .exit.text) (with ARCH=x86_64 W=1 allmodconfig). Fixes: 000f60db784b ("PCI: kirin: Add support for a PHY layer") Link: https://lore.kernel.org/r/20231001170254.2506508-3-u.kleine-koenig@pengutronix.de Signed-off-by: Uwe Kleine-König Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/dwc/pcie-kirin.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-kirin.c b/drivers/pci/controller/dwc/pcie-kirin.c index d93bc2906950..2ee146767971 100644 --- a/drivers/pci/controller/dwc/pcie-kirin.c +++ b/drivers/pci/controller/dwc/pcie-kirin.c @@ -741,7 +741,7 @@ static int kirin_pcie_power_on(struct platform_device *pdev, return ret; } -static int __exit kirin_pcie_remove(struct platform_device *pdev) +static int kirin_pcie_remove(struct platform_device *pdev) { struct kirin_pcie *kirin_pcie = platform_get_drvdata(pdev); @@ -818,7 +818,7 @@ static int kirin_pcie_probe(struct platform_device *pdev) static struct platform_driver kirin_pcie_driver = { .probe = kirin_pcie_probe, - .remove = __exit_p(kirin_pcie_remove), + .remove = kirin_pcie_remove, .driver = { .name = "kirin-pcie", .of_match_table = kirin_pcie_match, From acfc0791bd138e3af9cb1f24464797b845bab8e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 1 Oct 2023 19:02:51 +0200 Subject: [PATCH 0325/1397] PCI: exynos: Don't discard .remove() callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 83a939f0fdc208ff3639dd3d42ac9b3c35607fd2 upstream. With CONFIG_PCI_EXYNOS=y and exynos_pcie_remove() marked with __exit, the function is discarded from the driver. In this case a bound device can still get unbound, e.g via sysfs. Then no cleanup code is run resulting in resource leaks or worse. The right thing to do is do always have the remove callback available. This fixes the following warning by modpost: WARNING: modpost: drivers/pci/controller/dwc/pci-exynos: section mismatch in reference: exynos_pcie_driver+0x8 (section: .data) -> exynos_pcie_remove (section: .exit.text) (with ARCH=x86_64 W=1 allmodconfig). Fixes: 340cba6092c2 ("pci: Add PCIe driver for Samsung Exynos") Link: https://lore.kernel.org/r/20231001170254.2506508-2-u.kleine-koenig@pengutronix.de Signed-off-by: Uwe Kleine-König Signed-off-by: Bjorn Helgaas Reviewed-by: Alim Akhtar Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/dwc/pci-exynos.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pci/controller/dwc/pci-exynos.c b/drivers/pci/controller/dwc/pci-exynos.c index 6319082301d6..c6bede346932 100644 --- a/drivers/pci/controller/dwc/pci-exynos.c +++ b/drivers/pci/controller/dwc/pci-exynos.c @@ -375,7 +375,7 @@ static int exynos_pcie_probe(struct platform_device *pdev) return ret; } -static int __exit exynos_pcie_remove(struct platform_device *pdev) +static int exynos_pcie_remove(struct platform_device *pdev) { struct exynos_pcie *ep = platform_get_drvdata(pdev); @@ -431,7 +431,7 @@ static const struct of_device_id exynos_pcie_of_match[] = { static struct platform_driver exynos_pcie_driver = { .probe = exynos_pcie_probe, - .remove = __exit_p(exynos_pcie_remove), + .remove = exynos_pcie_remove, .driver = { .name = "exynos-pcie", .of_match_table = exynos_pcie_of_match, From 0b496b19ab669caf7739ec3f13730021e69730ec Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 21 Sep 2023 16:23:34 +0200 Subject: [PATCH 0326/1397] PCI: Lengthen reset delay for VideoPropulsion Torrent QN16e card commit c9260693aa0c1e029ed23693cfd4d7814eee6624 upstream. Commit ac91e6980563 ("PCI: Unify delay handling for reset and resume") shortened an unconditional 1 sec delay after a Secondary Bus Reset to 100 msec for PCIe (per PCIe r6.1 sec 6.6.1). The 1 sec delay is only required for Conventional PCI. But it turns out that there are PCIe devices which require a longer delay than prescribed before first config space access after reset recovery or resume from D3cold: Chad reports that a "VideoPropulsion Torrent QN16e" MPEG QAM Modulator "raises a PCI system error (PERR), as reported by the IPMI event log, and the hardware itself would suffer a catastrophic event, cycling the server" unless the longer delay is observed. The card is specified to conform to PCIe r1.0 and indeed only supports Gen1 speed (2.5 GT/s) according to lspci. PCIe r1.0 sec 7.6 prescribes the same 100 msec delay as PCIe r6.1 sec 6.6.1: To allow components to perform internal initialization, system software must wait for at least 100 ms from the end of a reset (cold/warm/hot) before it is permitted to issue Configuration Requests The behavior of the Torrent QN16e card thus appears to be a quirk. Treat it as such and lengthen the reset delay for this specific device. Fixes: ac91e6980563 ("PCI: Unify delay handling for reset and resume") Link: https://lore.kernel.org/r/47727e792c7f0282dc144e3ec8ce8eb6e713394e.1695304512.git.lukas@wunner.de Reported-by: Chad Schroeder Closes: https://lore.kernel.org/linux-pci/DM6PR16MB2844903E34CAB910082DF019B1FAA@DM6PR16MB2844.namprd16.prod.outlook.com/ Tested-by: Chad Schroeder Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org # v5.4+ Signed-off-by: Greg Kroah-Hartman --- drivers/pci/quirks.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 3da1a044827d..ae95d0950772 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -6211,3 +6211,15 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a31, dpc_log_size); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_XILINX, 0x5020, of_pci_make_dev_node); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_XILINX, 0x5021, of_pci_make_dev_node); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_REDHAT, 0x0005, of_pci_make_dev_node); + +/* + * Devices known to require a longer delay before first config space access + * after reset recovery or resume from D3cold: + * + * VideoPropulsion (aka Genroco) Torrent QN16e MPEG QAM Modulator + */ +static void pci_fixup_d3cold_delay_1sec(struct pci_dev *pdev) +{ + pdev->d3cold_delay = 1000; +} +DECLARE_PCI_FIXUP_FINAL(0x5555, 0x0004, pci_fixup_d3cold_delay_1sec); From 3ce1c2c3999b232258f7aabab311d47dda75605c Mon Sep 17 00:00:00 2001 From: Ajay Singh Date: Tue, 17 Oct 2023 10:43:38 +0200 Subject: [PATCH 0327/1397] wifi: wilc1000: use vmm_table as array in wilc struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 05ac1a198a63ad66bf5ae8b7321407c102d40ef3 upstream. Enabling KASAN and running some iperf tests raises some memory issues with vmm_table: BUG: KASAN: slab-out-of-bounds in wilc_wlan_handle_txq+0x6ac/0xdb4 Write of size 4 at addr c3a61540 by task wlan0-tx/95 KASAN detects that we are writing data beyond range allocated to vmm_table. There is indeed a mismatch between the size passed to allocator in wilc_wlan_init, and the range of possible indexes used later: allocation size is missing a multiplication by sizeof(u32) Fixes: 40b717bfcefa ("wifi: wilc1000: fix DMA on stack objects") Cc: stable@vger.kernel.org Signed-off-by: Ajay Singh Signed-off-by: Alexis Lothoré Reviewed-by: Michael Walle Reviewed-by: Jeff Johnson Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20231017-wilc1000_tx_oops-v3-1-b2155f1f7bee@bootlin.com Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/microchip/wilc1000/wlan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/microchip/wilc1000/wlan.c b/drivers/net/wireless/microchip/wilc1000/wlan.c index 58bbf50081e4..9eb115c79c90 100644 --- a/drivers/net/wireless/microchip/wilc1000/wlan.c +++ b/drivers/net/wireless/microchip/wilc1000/wlan.c @@ -1492,7 +1492,7 @@ int wilc_wlan_init(struct net_device *dev) } if (!wilc->vmm_table) - wilc->vmm_table = kzalloc(WILC_VMM_TBL_SIZE, GFP_KERNEL); + wilc->vmm_table = kcalloc(WILC_VMM_TBL_SIZE, sizeof(u32), GFP_KERNEL); if (!wilc->vmm_table) { ret = -ENOBUFS; From be739f32a45931c2cfd0f260bb7e11b33a94cd07 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 10 Oct 2023 13:23:41 -0400 Subject: [PATCH 0328/1397] svcrdma: Drop connection after an RDMA Read error commit 197115ebf358cb440c73e868b2a0a5ef728decc6 upstream. When an RPC Call message cannot be pulled from the client, that is a message loss, by definition. Close the connection to trigger the client to resend. Cc: Reviewed-by: Tom Talpey Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 85c8bcaebb80..3b05f90a3e50 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -852,7 +852,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) if (ret == -EINVAL) svc_rdma_send_error(rdma_xprt, ctxt, ret); svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); - return ret; + svc_xprt_deferred_close(xprt); + return -ENOTCONN; out_backchannel: svc_rdma_handle_bc_reply(rqstp, ctxt); From 60f9dd96da9384e7c8990f83202ed6583e2c1cd8 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Tue, 5 Sep 2023 00:02:11 +0000 Subject: [PATCH 0329/1397] rcu/tree: Defer setting of jiffies during stall reset commit b96e7a5fa0ba9cda32888e04f8f4bac42d49a7f8 upstream. There are instances where rcu_cpu_stall_reset() is called when jiffies did not get a chance to update for a long time. Before jiffies is updated, the CPU stall detector can go off triggering false-positives where a just-started grace period appears to be ages old. In the past, we disabled stall detection in rcu_cpu_stall_reset() however this got changed [1]. This is resulting in false-positives in KGDB usecase [2]. Fix this by deferring the update of jiffies to the third run of the FQS loop. This is more robust, as, even if rcu_cpu_stall_reset() is called just before jiffies is read, we would end up pushing out the jiffies read by 3 more FQS loops. Meanwhile the CPU stall detection will be delayed and we will not get any false positives. [1] https://lore.kernel.org/all/20210521155624.174524-2-senozhatsky@chromium.org/ [2] https://lore.kernel.org/all/20230814020045.51950-2-chenhuacai@loongson.cn/ Tested with rcutorture.cpu_stall option as well to verify stall behavior with/without patch. Tested-by: Huacai Chen Reported-by: Binbin Zhou Closes: https://lore.kernel.org/all/20230814020045.51950-2-chenhuacai@loongson.cn/ Suggested-by: Paul McKenney Cc: Sergey Senozhatsky Cc: Thomas Gleixner Cc: stable@vger.kernel.org Fixes: a80be428fbc1 ("rcu: Do not disable GP stall detection in rcu_cpu_stall_reset()") Signed-off-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Signed-off-by: Greg Kroah-Hartman --- kernel/rcu/tree.c | 12 ++++++++++++ kernel/rcu/tree.h | 4 ++++ kernel/rcu/tree_stall.h | 20 ++++++++++++++++++-- 3 files changed, 34 insertions(+), 2 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index cb1caefa8bd0..d85779f67aea 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1556,10 +1556,22 @@ static bool rcu_gp_fqs_check_wake(int *gfp) */ static void rcu_gp_fqs(bool first_time) { + int nr_fqs = READ_ONCE(rcu_state.nr_fqs_jiffies_stall); struct rcu_node *rnp = rcu_get_root(); WRITE_ONCE(rcu_state.gp_activity, jiffies); WRITE_ONCE(rcu_state.n_force_qs, rcu_state.n_force_qs + 1); + + WARN_ON_ONCE(nr_fqs > 3); + /* Only countdown nr_fqs for stall purposes if jiffies moves. */ + if (nr_fqs) { + if (nr_fqs == 1) { + WRITE_ONCE(rcu_state.jiffies_stall, + jiffies + rcu_jiffies_till_stall_check()); + } + WRITE_ONCE(rcu_state.nr_fqs_jiffies_stall, --nr_fqs); + } + if (first_time) { /* Collect dyntick-idle snapshots. */ force_qs_rnp(dyntick_save_progress_counter); diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 192536916f9a..e9821a8422db 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -386,6 +386,10 @@ struct rcu_state { /* in jiffies. */ unsigned long jiffies_stall; /* Time at which to check */ /* for CPU stalls. */ + int nr_fqs_jiffies_stall; /* Number of fqs loops after + * which read jiffies and set + * jiffies_stall. Stall + * warnings disabled if !0. */ unsigned long jiffies_resched; /* Time at which to resched */ /* a reluctant CPU. */ unsigned long n_force_qs_gpstart; /* Snapshot of n_force_qs at */ diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 6f06dc12904a..e09f4f624261 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -149,12 +149,17 @@ static void panic_on_rcu_stall(void) /** * rcu_cpu_stall_reset - restart stall-warning timeout for current grace period * + * To perform the reset request from the caller, disable stall detection until + * 3 fqs loops have passed. This is required to ensure a fresh jiffies is + * loaded. It should be safe to do from the fqs loop as enough timer + * interrupts and context switches should have passed. + * * The caller must disable hard irqs. */ void rcu_cpu_stall_reset(void) { - WRITE_ONCE(rcu_state.jiffies_stall, - jiffies + rcu_jiffies_till_stall_check()); + WRITE_ONCE(rcu_state.nr_fqs_jiffies_stall, 3); + WRITE_ONCE(rcu_state.jiffies_stall, ULONG_MAX); } ////////////////////////////////////////////////////////////////////////////// @@ -170,6 +175,7 @@ static void record_gp_stall_check_time(void) WRITE_ONCE(rcu_state.gp_start, j); j1 = rcu_jiffies_till_stall_check(); smp_mb(); // ->gp_start before ->jiffies_stall and caller's ->gp_seq. + WRITE_ONCE(rcu_state.nr_fqs_jiffies_stall, 0); WRITE_ONCE(rcu_state.jiffies_stall, j + j1); rcu_state.jiffies_resched = j + j1 / 2; rcu_state.n_force_qs_gpstart = READ_ONCE(rcu_state.n_force_qs); @@ -725,6 +731,16 @@ static void check_cpu_stall(struct rcu_data *rdp) !rcu_gp_in_progress()) return; rcu_stall_kick_kthreads(); + + /* + * Check if it was requested (via rcu_cpu_stall_reset()) that the FQS + * loop has to set jiffies to ensure a non-stale jiffies value. This + * is required to have good jiffies value after coming out of long + * breaks of jiffies updates. Not doing so can cause false positives. + */ + if (READ_ONCE(rcu_state.nr_fqs_jiffies_stall) > 0) + return; + j = jiffies; /* From aeefe80c525afa2a61de2456eb6230b3187ac651 Mon Sep 17 00:00:00 2001 From: Vignesh Viswanathan Date: Mon, 4 Sep 2023 22:55:13 +0530 Subject: [PATCH 0330/1397] arm64: dts: qcom: ipq6018: Fix hwlock index for SMEM commit 95d97b111e1e184b0c8656137033ed64f2cf21e4 upstream. SMEM uses lock index 3 of the TCSR Mutex hwlock for allocations in SMEM region shared by the Host and FW. Fix the SMEM hwlock index to 3 for IPQ6018. Cc: stable@vger.kernel.org Fixes: 5bf635621245 ("arm64: dts: ipq6018: Add a few device nodes") Signed-off-by: Vignesh Viswanathan Acked-by: Konrad Dybcio Link: https://lore.kernel.org/r/20230904172516.479866-3-quic_viswanat@quicinc.com Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/ipq6018.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/ipq6018.dtsi b/arch/arm64/boot/dts/qcom/ipq6018.dtsi index 47b8b1d6730a..8c285f987a9c 100644 --- a/arch/arm64/boot/dts/qcom/ipq6018.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq6018.dtsi @@ -211,7 +211,7 @@ smem { compatible = "qcom,smem"; memory-region = <&smem_region>; - hwlocks = <&tcsr_mutex 0>; + hwlocks = <&tcsr_mutex 3>; }; soc: soc@0 { From 7d305a4804f3848f4a304addabf34e7b0156be57 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Thu, 27 Jul 2023 09:18:44 +0100 Subject: [PATCH 0331/1397] dt-bindings: timer: renesas,rz-mtu3: Fix overflow/underflow interrupt names commit b7a8f1f7a8a25e09aaefebb6251a77f44cda638b upstream. As per R01UH0914EJ0130 Rev.1.30 HW manual the MTU3 overflow/underflow interrupt names starts with 'tci' instead of 'tgi'. Fix this documentation issue by replacing below overflow/underflow interrupt names: - tgiv0->tciv0 - tgiv1->tciv1 - tgiu1->tciu1 - tgiv2->tciv2 - tgiu2->tciu2 - tgiv3->tciv3 - tgiv4->tciv4 - tgiv6->tciv6 - tgiv7->tciv7 - tgiv8->tciv8 - tgiu8->tciu8 Fixes: 0a9d6b54297e ("dt-bindings: timer: Document RZ/G2L MTU3a bindings") Cc: stable@kernel.org Signed-off-by: Biju Das Acked-by: Conor Dooley Reviewed-by: Geert Uytterhoeven Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20230727081848.100834-2-biju.das.jz@bp.renesas.com Signed-off-by: Greg Kroah-Hartman --- .../bindings/timer/renesas,rz-mtu3.yaml | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/Documentation/devicetree/bindings/timer/renesas,rz-mtu3.yaml b/Documentation/devicetree/bindings/timer/renesas,rz-mtu3.yaml index bffdab0b0185..fbac40b958dd 100644 --- a/Documentation/devicetree/bindings/timer/renesas,rz-mtu3.yaml +++ b/Documentation/devicetree/bindings/timer/renesas,rz-mtu3.yaml @@ -169,27 +169,27 @@ properties: - const: tgib0 - const: tgic0 - const: tgid0 - - const: tgiv0 + - const: tciv0 - const: tgie0 - const: tgif0 - const: tgia1 - const: tgib1 - - const: tgiv1 - - const: tgiu1 + - const: tciv1 + - const: tciu1 - const: tgia2 - const: tgib2 - - const: tgiv2 - - const: tgiu2 + - const: tciv2 + - const: tciu2 - const: tgia3 - const: tgib3 - const: tgic3 - const: tgid3 - - const: tgiv3 + - const: tciv3 - const: tgia4 - const: tgib4 - const: tgic4 - const: tgid4 - - const: tgiv4 + - const: tciv4 - const: tgiu5 - const: tgiv5 - const: tgiw5 @@ -197,18 +197,18 @@ properties: - const: tgib6 - const: tgic6 - const: tgid6 - - const: tgiv6 + - const: tciv6 - const: tgia7 - const: tgib7 - const: tgic7 - const: tgid7 - - const: tgiv7 + - const: tciv7 - const: tgia8 - const: tgib8 - const: tgic8 - const: tgid8 - - const: tgiv8 - - const: tgiu8 + - const: tciv8 + - const: tciu8 clocks: maxItems: 1 @@ -285,16 +285,16 @@ examples: , , ; - interrupt-names = "tgia0", "tgib0", "tgic0", "tgid0", "tgiv0", "tgie0", + interrupt-names = "tgia0", "tgib0", "tgic0", "tgid0", "tciv0", "tgie0", "tgif0", - "tgia1", "tgib1", "tgiv1", "tgiu1", - "tgia2", "tgib2", "tgiv2", "tgiu2", - "tgia3", "tgib3", "tgic3", "tgid3", "tgiv3", - "tgia4", "tgib4", "tgic4", "tgid4", "tgiv4", + "tgia1", "tgib1", "tciv1", "tciu1", + "tgia2", "tgib2", "tciv2", "tciu2", + "tgia3", "tgib3", "tgic3", "tgid3", "tciv3", + "tgia4", "tgib4", "tgic4", "tgid4", "tciv4", "tgiu5", "tgiv5", "tgiw5", - "tgia6", "tgib6", "tgic6", "tgid6", "tgiv6", - "tgia7", "tgib7", "tgic7", "tgid7", "tgiv7", - "tgia8", "tgib8", "tgic8", "tgid8", "tgiv8", "tgiu8"; + "tgia6", "tgib6", "tgic6", "tgid6", "tciv6", + "tgia7", "tgib7", "tgic7", "tgid7", "tciv7", + "tgia8", "tgib8", "tgic8", "tgid8", "tciv8", "tciu8"; clocks = <&cpg CPG_MOD R9A07G044_MTU_X_MCK_MTU3>; power-domains = <&cpg>; resets = <&cpg R9A07G044_MTU_X_PRESET_MTU3>; From 6321330d99e049d155561322c36664fde22ff1bd Mon Sep 17 00:00:00 2001 From: Brian Geffon Date: Thu, 21 Sep 2023 13:00:45 -0400 Subject: [PATCH 0332/1397] PM: hibernate: Use __get_safe_page() rather than touching the list commit f0c7183008b41e92fa676406d87f18773724b48b upstream. We found at least one situation where the safe pages list was empty and get_buffer() would gladly try to use a NULL pointer. Signed-off-by: Brian Geffon Fixes: 8357376d3df2 ("[PATCH] swsusp: Improve handling of highmem") Cc: All applicable Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- kernel/power/snapshot.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 0f12e0a97e43..59ffdff59633 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -2545,8 +2545,9 @@ static void *get_highmem_page_buffer(struct page *page, pbe->copy_page = tmp; } else { /* Copy of the page will be stored in normal memory */ - kaddr = safe_pages_list; - safe_pages_list = safe_pages_list->next; + kaddr = __get_safe_page(ca->gfp_mask); + if (!kaddr) + return ERR_PTR(-ENOMEM); pbe->copy_page = virt_to_page(kaddr); } pbe->next = highmem_pblist; @@ -2750,8 +2751,9 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) return ERR_PTR(-ENOMEM); } pbe->orig_address = page_address(page); - pbe->address = safe_pages_list; - safe_pages_list = safe_pages_list->next; + pbe->address = __get_safe_page(ca->gfp_mask); + if (!pbe->address) + return ERR_PTR(-ENOMEM); pbe->next = restore_pblist; restore_pblist = pbe; return pbe->address; From 0756504578f28f3814219f414708536a16967ad7 Mon Sep 17 00:00:00 2001 From: Brian Geffon Date: Fri, 22 Sep 2023 12:07:04 -0400 Subject: [PATCH 0333/1397] PM: hibernate: Clean up sync_read handling in snapshot_write_next() commit d08970df1980476f27936e24d452550f3e9e92e1 upstream. In snapshot_write_next(), sync_read is set and unset in three different spots unnecessiarly. As a result there is a subtle bug where the first page after the meta data has been loaded unconditionally sets sync_read to 0. If this first PFN was actually a highmem page, then the returned buffer will be the global "buffer," and the page needs to be loaded synchronously. That is, I'm not sure we can always assume the following to be safe: handle->buffer = get_buffer(&orig_bm, &ca); handle->sync_read = 0; Because get_buffer() can call get_highmem_page_buffer() which can return 'buffer'. The easiest way to address this is just set sync_read before snapshot_write_next() returns if handle->buffer == buffer. Signed-off-by: Brian Geffon Fixes: 8357376d3df2 ("[PATCH] swsusp: Improve handling of highmem") Cc: All applicable [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- kernel/power/snapshot.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 59ffdff59633..50a15408c3fc 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -2785,8 +2785,6 @@ int snapshot_write_next(struct snapshot_handle *handle) if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages + nr_zero_pages) return 0; - handle->sync_read = 1; - if (!handle->cur) { if (!buffer) /* This makes the buffer be freed by swsusp_free() */ @@ -2829,7 +2827,6 @@ int snapshot_write_next(struct snapshot_handle *handle) memory_bm_position_reset(&zero_bm); restore_pblist = NULL; handle->buffer = get_buffer(&orig_bm, &ca); - handle->sync_read = 0; if (IS_ERR(handle->buffer)) return PTR_ERR(handle->buffer); } @@ -2839,9 +2836,8 @@ int snapshot_write_next(struct snapshot_handle *handle) handle->buffer = get_buffer(&orig_bm, &ca); if (IS_ERR(handle->buffer)) return PTR_ERR(handle->buffer); - if (handle->buffer != buffer) - handle->sync_read = 0; } + handle->sync_read = (handle->buffer == buffer); handle->cur++; /* Zero pages were not included in the image, memset it and move on. */ From b88cc37a8208d17a0298ec5c0c5bd519435aefee Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Sat, 30 Sep 2023 17:46:56 +0000 Subject: [PATCH 0334/1397] rcu: kmemleak: Ignore kmemleak false positives when RCU-freeing objects commit 5f98fd034ca6fd1ab8c91a3488968a0e9caaabf6 upstream. Since the actual slab freeing is deferred when calling kvfree_rcu(), so is the kmemleak_free() callback informing kmemleak of the object deletion. From the perspective of the kvfree_rcu() caller, the object is freed and it may remove any references to it. Since kmemleak does not scan RCU internal data storing the pointer, it will report such objects as leaks during the grace period. Tell kmemleak to ignore such objects on the kvfree_call_rcu() path. Note that the tiny RCU implementation does not have such issue since the objects can be tracked from the rcu_ctrlblk structure. Signed-off-by: Catalin Marinas Reported-by: Christoph Paasch Closes: https://lore.kernel.org/all/F903A825-F05F-4B77-A2B5-7356282FBA2C@apple.com/ Cc: Tested-by: Christoph Paasch Reviewed-by: Paul E. McKenney Signed-off-by: Joel Fernandes (Google) Signed-off-by: Frederic Weisbecker Signed-off-by: Greg Kroah-Hartman --- kernel/rcu/tree.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index d85779f67aea..7b4517dc4657 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -3400,6 +3401,14 @@ void kvfree_call_rcu(struct rcu_head *head, void *ptr) success = true; } + /* + * The kvfree_rcu() caller considers the pointer freed at this point + * and likely removes any references to it. Since the actual slab + * freeing (and kmemleak_free()) is deferred, tell kmemleak to ignore + * this object (no scanning or false positives reporting). + */ + kmemleak_ignore(ptr); + // Set timer to drain after KFREE_DRAIN_JIFFIES. if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING) schedule_delayed_monitor_work(krcp); From 85bb1d41d76fe03805f506b475595d3e70197200 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 18 Sep 2023 14:15:33 -0400 Subject: [PATCH 0335/1397] btrfs: don't arbitrarily slow down delalloc if we're committing commit 11aeb97b45ad2e0040cbb2a589bc403152526345 upstream. We have a random schedule_timeout() if the current transaction is committing, which seems to be a holdover from the original delalloc reservation code. Remove this, we have the proper flushing stuff, we shouldn't be hoping for random timing things to make everything work. This just induces latency for no reason. CC: stable@vger.kernel.org # 5.4+ Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/delalloc-space.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/btrfs/delalloc-space.c b/fs/btrfs/delalloc-space.c index 427abaf608b8..0d105ed1b8de 100644 --- a/fs/btrfs/delalloc-space.c +++ b/fs/btrfs/delalloc-space.c @@ -322,9 +322,6 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes, } else { if (current->journal_info) flush = BTRFS_RESERVE_FLUSH_LIMIT; - - if (btrfs_transaction_in_commit(fs_info)) - schedule_timeout(1); } num_bytes = ALIGN(num_bytes, fs_info->sectorsize); From 0a8585281b11e3a0723bba8d8085d61f0b55f37c Mon Sep 17 00:00:00 2001 From: David Arcari Date: Thu, 5 Oct 2023 07:17:57 -0400 Subject: [PATCH 0336/1397] thermal: intel: powerclamp: fix mismatch in get function for max_idle commit fae633cfb729da2771b5433f6b84ae7e8b4aa5f7 upstream. KASAN reported this [ 444.853098] BUG: KASAN: global-out-of-bounds in param_get_int+0x77/0x90 [ 444.853111] Read of size 4 at addr ffffffffc16c9220 by task cat/2105 ... [ 444.853442] The buggy address belongs to the variable: [ 444.853443] max_idle+0x0/0xffffffffffffcde0 [intel_powerclamp] There is a mismatch between the param_get_int and the definition of max_idle. Replacing param_get_int with param_get_byte resolves this issue. Fixes: ebf519710218 ("thermal: intel: powerclamp: Add two module parameters") Cc: 6.3+ # 6.3+ Signed-off-by: David Arcari Reviewed-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/intel/intel_powerclamp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c index 36243a3972fd..5ac5cb60bae6 100644 --- a/drivers/thermal/intel/intel_powerclamp.c +++ b/drivers/thermal/intel/intel_powerclamp.c @@ -256,7 +256,7 @@ static int max_idle_set(const char *arg, const struct kernel_param *kp) static const struct kernel_param_ops max_idle_ops = { .set = max_idle_set, - .get = param_get_int, + .get = param_get_byte, }; module_param_cb(max_idle, &max_idle_ops, &max_idle, 0644); From b9e69ada222016e3c52f679a68448cb15fb3e833 Mon Sep 17 00:00:00 2001 From: Vignesh Viswanathan Date: Mon, 4 Sep 2023 22:55:12 +0530 Subject: [PATCH 0337/1397] arm64: dts: qcom: ipq5332: Fix hwlock index for SMEM commit d08afd80158399a081b478a19902364e3dd0f84c upstream. SMEM uses lock index 3 of the TCSR Mutex hwlock for allocations in SMEM region shared by the Host and FW. Fix the SMEM hwlock index to 3 for IPQ5332. Cc: stable@vger.kernel.org Fixes: d56dd7f935e1 ("arm64: dts: qcom: ipq5332: add SMEM support") Signed-off-by: Vignesh Viswanathan Acked-by: Konrad Dybcio Link: https://lore.kernel.org/r/20230904172516.479866-2-quic_viswanat@quicinc.com Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/ipq5332.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/ipq5332.dtsi b/arch/arm64/boot/dts/qcom/ipq5332.dtsi index 8bfc2db44624..e40c55adff23 100644 --- a/arch/arm64/boot/dts/qcom/ipq5332.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq5332.dtsi @@ -135,7 +135,7 @@ reg = <0x0 0x4a800000 0x0 0x100000>; no-map; - hwlocks = <&tcsr_mutex 0>; + hwlocks = <&tcsr_mutex 3>; }; }; From 209013e103631c752a2728e5431f3c523e46eee9 Mon Sep 17 00:00:00 2001 From: Vignesh Viswanathan Date: Mon, 4 Sep 2023 22:55:14 +0530 Subject: [PATCH 0338/1397] arm64: dts: qcom: ipq8074: Fix hwlock index for SMEM commit 8a781d04e580705d36f7db07f5c80e748100b69d upstream. SMEM uses lock index 3 of the TCSR Mutex hwlock for allocations in SMEM region shared by the Host and FW. Fix the SMEM hwlock index to 3 for IPQ8074. Cc: stable@vger.kernel.org Fixes: 42124b947e8e ("arm64: dts: qcom: ipq8074: add SMEM support") Signed-off-by: Vignesh Viswanathan Acked-by: Konrad Dybcio Link: https://lore.kernel.org/r/20230904172516.479866-4-quic_viswanat@quicinc.com Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/ipq8074.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/ipq8074.dtsi b/arch/arm64/boot/dts/qcom/ipq8074.dtsi index 00ed71936b47..92fd924bbdbe 100644 --- a/arch/arm64/boot/dts/qcom/ipq8074.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq8074.dtsi @@ -101,7 +101,7 @@ reg = <0x0 0x4ab00000 0x0 0x100000>; no-map; - hwlocks = <&tcsr_mutex 0>; + hwlocks = <&tcsr_mutex 3>; }; memory@4ac00000 { From df56691a37dbebdc31bf048890bb192381ecdd58 Mon Sep 17 00:00:00 2001 From: Kathiravan Thirumoorthy Date: Mon, 25 Sep 2023 13:59:22 +0530 Subject: [PATCH 0339/1397] firmware: qcom_scm: use 64-bit calling convention only when client is 64-bit commit 3337a6fea25370d3d244ec6bb38c71ee86fcf837 upstream. Per the "SMC calling convention specification", the 64-bit calling convention can only be used when the client is 64-bit. Whereas the 32-bit calling convention can be used by either a 32-bit or a 64-bit client. Currently during SCM probe, irrespective of the client, 64-bit calling convention is made, which is incorrect and may lead to the undefined behaviour when the client is 32-bit. Let's fix it. Cc: stable@vger.kernel.org Fixes: 9a434cee773a ("firmware: qcom_scm: Dynamically support SMCCC and legacy conventions") Reviewed-By: Elliot Berman Signed-off-by: Kathiravan Thirumoorthy Link: https://lore.kernel.org/r/20230925-scm-v3-1-8790dff6a749@quicinc.com Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/qcom_scm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/firmware/qcom_scm.c b/drivers/firmware/qcom_scm.c index 06fe8aca870d..69831f1d91e3 100644 --- a/drivers/firmware/qcom_scm.c +++ b/drivers/firmware/qcom_scm.c @@ -167,6 +167,12 @@ static enum qcom_scm_convention __get_convention(void) if (likely(qcom_scm_convention != SMC_CONVENTION_UNKNOWN)) return qcom_scm_convention; + /* + * Per the "SMC calling convention specification", the 64-bit calling + * convention can only be used when the client is 64-bit, otherwise + * system will encounter the undefined behaviour. + */ +#if IS_ENABLED(CONFIG_ARM64) /* * Device isn't required as there is only one argument - no device * needed to dma_map_single to secure world @@ -187,6 +193,7 @@ static enum qcom_scm_convention __get_convention(void) forced = true; goto found; } +#endif probed_convention = SMC_CONVENTION_ARM_32; ret = __scm_smc_call(NULL, &desc, probed_convention, &res, true); From e5f516b24c9ecd4c616b7867195a6c0adac57b03 Mon Sep 17 00:00:00 2001 From: Vasily Khoruzhick Date: Wed, 27 Sep 2023 12:50:02 -0700 Subject: [PATCH 0340/1397] ACPI: FPDT: properly handle invalid FPDT subtables commit a83c68a3bf7c418c9a46693c63c638852b0c1f4e upstream. Buggy BIOSes may have invalid FPDT subtables, e.g. on my hardware: S3PT subtable: 7F20FE30: 53 33 50 54 24 00 00 00-00 00 00 00 00 00 18 01 *S3PT$...........* 7F20FE40: 00 00 00 00 00 00 00 00-00 00 00 00 00 00 00 00 *................* 7F20FE50: 00 00 00 00 Here the first record has zero length. FBPT subtable: 7F20FE50: 46 42 50 54-3C 00 00 00 46 42 50 54 *....FBPT<...FBPT* 7F20FE60: 02 00 30 02 00 00 00 00-00 00 00 00 00 00 00 00 *..0.............* 7F20FE70: 2A A6 BC 6E 0B 00 00 00-1A 44 41 70 0B 00 00 00 **..n.....DAp....* 7F20FE80: 00 00 00 00 00 00 00 00-00 00 00 00 00 00 00 00 *................* And here FBPT table has FBPT signature repeated instead of the first record. Current code will be looping indefinitely due to zero length records, so break out of the loop if record length is zero. While we are here, add proper handling for fpdt_process_subtable() failures. Fixes: d1eb86e59be0 ("ACPI: tables: introduce support for FPDT table") Cc: All applicable Signed-off-by: Vasily Khoruzhick [ rjw: Comment edit, added empty code lines ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpi_fpdt.c | 45 +++++++++++++++++++++++++++++++++------- 1 file changed, 37 insertions(+), 8 deletions(-) diff --git a/drivers/acpi/acpi_fpdt.c b/drivers/acpi/acpi_fpdt.c index a2056c4c8cb7..271092f2700a 100644 --- a/drivers/acpi/acpi_fpdt.c +++ b/drivers/acpi/acpi_fpdt.c @@ -194,12 +194,19 @@ static int fpdt_process_subtable(u64 address, u32 subtable_type) record_header = (void *)subtable_header + offset; offset += record_header->length; + if (!record_header->length) { + pr_err(FW_BUG "Zero-length record found in FPTD.\n"); + result = -EINVAL; + goto err; + } + switch (record_header->type) { case RECORD_S3_RESUME: if (subtable_type != SUBTABLE_S3PT) { pr_err(FW_BUG "Invalid record %d for subtable %s\n", record_header->type, signature); - return -EINVAL; + result = -EINVAL; + goto err; } if (record_resume) { pr_err("Duplicate resume performance record found.\n"); @@ -208,7 +215,7 @@ static int fpdt_process_subtable(u64 address, u32 subtable_type) record_resume = (struct resume_performance_record *)record_header; result = sysfs_create_group(fpdt_kobj, &resume_attr_group); if (result) - return result; + goto err; break; case RECORD_S3_SUSPEND: if (subtable_type != SUBTABLE_S3PT) { @@ -223,13 +230,14 @@ static int fpdt_process_subtable(u64 address, u32 subtable_type) record_suspend = (struct suspend_performance_record *)record_header; result = sysfs_create_group(fpdt_kobj, &suspend_attr_group); if (result) - return result; + goto err; break; case RECORD_BOOT: if (subtable_type != SUBTABLE_FBPT) { pr_err(FW_BUG "Invalid %d for subtable %s\n", record_header->type, signature); - return -EINVAL; + result = -EINVAL; + goto err; } if (record_boot) { pr_err("Duplicate boot performance record found.\n"); @@ -238,7 +246,7 @@ static int fpdt_process_subtable(u64 address, u32 subtable_type) record_boot = (struct boot_performance_record *)record_header; result = sysfs_create_group(fpdt_kobj, &boot_attr_group); if (result) - return result; + goto err; break; default: @@ -247,6 +255,18 @@ static int fpdt_process_subtable(u64 address, u32 subtable_type) } } return 0; + +err: + if (record_boot) + sysfs_remove_group(fpdt_kobj, &boot_attr_group); + + if (record_suspend) + sysfs_remove_group(fpdt_kobj, &suspend_attr_group); + + if (record_resume) + sysfs_remove_group(fpdt_kobj, &resume_attr_group); + + return result; } static int __init acpi_init_fpdt(void) @@ -255,6 +275,7 @@ static int __init acpi_init_fpdt(void) struct acpi_table_header *header; struct fpdt_subtable_entry *subtable; u32 offset = sizeof(*header); + int result; status = acpi_get_table(ACPI_SIG_FPDT, 0, &header); @@ -263,8 +284,8 @@ static int __init acpi_init_fpdt(void) fpdt_kobj = kobject_create_and_add("fpdt", acpi_kobj); if (!fpdt_kobj) { - acpi_put_table(header); - return -ENOMEM; + result = -ENOMEM; + goto err_nomem; } while (offset < header->length) { @@ -272,8 +293,10 @@ static int __init acpi_init_fpdt(void) switch (subtable->type) { case SUBTABLE_FBPT: case SUBTABLE_S3PT: - fpdt_process_subtable(subtable->address, + result = fpdt_process_subtable(subtable->address, subtable->type); + if (result) + goto err_subtable; break; default: /* Other types are reserved in ACPI 6.4 spec. */ @@ -282,6 +305,12 @@ static int __init acpi_init_fpdt(void) offset += sizeof(*subtable); } return 0; +err_subtable: + kobject_put(fpdt_kobj); + +err_nomem: + acpi_put_table(header); + return result; } fs_initcall(acpi_init_fpdt); From abedf0d415d11588741d5d834e6ea69656604c23 Mon Sep 17 00:00:00 2001 From: Vignesh Viswanathan Date: Mon, 4 Sep 2023 22:55:15 +0530 Subject: [PATCH 0341/1397] arm64: dts: qcom: ipq9574: Fix hwlock index for SMEM commit 5fe8508e2bc8eb4208b0434b6c1ca306c1519ade upstream. SMEM uses lock index 3 of the TCSR Mutex hwlock for allocations in SMEM region shared by the Host and FW. Fix the SMEM hwlock index to 3 for IPQ9574. Cc: stable@vger.kernel.org Fixes: 46384ac7a618 ("arm64: dts: qcom: ipq9574: Add SMEM support") Signed-off-by: Vignesh Viswanathan Acked-by: Konrad Dybcio Link: https://lore.kernel.org/r/20230904172516.479866-5-quic_viswanat@quicinc.com Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/ipq9574.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/ipq9574.dtsi b/arch/arm64/boot/dts/qcom/ipq9574.dtsi index 51aba071c1eb..8a72ad4afd03 100644 --- a/arch/arm64/boot/dts/qcom/ipq9574.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq9574.dtsi @@ -195,7 +195,7 @@ smem@4aa00000 { compatible = "qcom,smem"; reg = <0x0 0x4aa00000 0x0 0x100000>; - hwlocks = <&tcsr_mutex 0>; + hwlocks = <&tcsr_mutex 3>; no-map; }; }; From afa233688defa241d2c1b4bad88807ee3215d469 Mon Sep 17 00:00:00 2001 From: Vignesh Viswanathan Date: Tue, 5 Sep 2023 15:25:34 +0530 Subject: [PATCH 0342/1397] arm64: dts: qcom: ipq6018: Fix tcsr_mutex register size commit 72fc3d58b87b0d622039c6299b89024fbb7b420f upstream. IPQ6018's TCSR Mutex HW lock register has 32 locks of size 4KB each. Total size of the TCSR Mutex registers is 128KB. Fix size of the tcsr_mutex hwlock register to 0x20000. Changes in v2: - Drop change to remove qcom,ipq6018-tcsr-mutex compatible string - Added Fixes and stable tags Cc: stable@vger.kernel.org Fixes: 5bf635621245 ("arm64: dts: ipq6018: Add a few device nodes") Signed-off-by: Vignesh Viswanathan Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20230905095535.1263113-2-quic_viswanat@quicinc.com Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/ipq6018.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/ipq6018.dtsi b/arch/arm64/boot/dts/qcom/ipq6018.dtsi index 8c285f987a9c..264845cecf92 100644 --- a/arch/arm64/boot/dts/qcom/ipq6018.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq6018.dtsi @@ -393,7 +393,7 @@ tcsr_mutex: hwlock@1905000 { compatible = "qcom,ipq6018-tcsr-mutex", "qcom,tcsr-mutex"; - reg = <0x0 0x01905000 0x0 0x1000>; + reg = <0x0 0x01905000 0x0 0x20000>; #hwlock-cells = <1>; }; From 0eb10025063362850ba0ce6b437406bc21427f8c Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Sat, 7 Oct 2023 15:10:42 +0200 Subject: [PATCH 0343/1397] leds: trigger: netdev: Move size check in set_device_name commit 259e33cbb1712a7dd844fc9757661cc47cb0e39b upstream. GCC 13.2 complains about array subscript 17 is above array bounds of 'char[16]' with IFNAMSIZ set to 16. The warning is correct but this scenario is impossible. set_device_name is called by device_name_store (store sysfs entry) and netdev_trig_activate. device_name_store already check if size is >= of IFNAMSIZ and return -EINVAL. (making the warning scenario impossible) netdev_trig_activate works on already defined interface, where the name has already been checked and should already follow the condition of strlen() < IFNAMSIZ. Aside from the scenario being impossible, set_device_name can be improved to both mute the warning and make the function safer. To make it safer, move size check from device_name_store directly to set_device_name and prevent any out of bounds scenario. Cc: stable@vger.kernel.org Fixes: 28a6a2ef18ad ("leds: trigger: netdev: refactor code setting device name") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202309192035.GTJEEbem-lkp@intel.com/ Signed-off-by: Christian Marangi Link: https://lore.kernel.org/r/20231007131042.15032-1-ansuelsmth@gmail.com Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/leds/trigger/ledtrig-netdev.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/leds/trigger/ledtrig-netdev.c b/drivers/leds/trigger/ledtrig-netdev.c index 58f3352539e8..e358e77e4b38 100644 --- a/drivers/leds/trigger/ledtrig-netdev.c +++ b/drivers/leds/trigger/ledtrig-netdev.c @@ -221,6 +221,9 @@ static ssize_t device_name_show(struct device *dev, static int set_device_name(struct led_netdev_data *trigger_data, const char *name, size_t size) { + if (size >= IFNAMSIZ) + return -EINVAL; + cancel_delayed_work_sync(&trigger_data->work); mutex_lock(&trigger_data->lock); @@ -263,9 +266,6 @@ static ssize_t device_name_store(struct device *dev, struct led_netdev_data *trigger_data = led_trigger_get_drvdata(dev); int ret; - if (size >= IFNAMSIZ) - return -EINVAL; - ret = set_device_name(trigger_data, buf, size); if (ret < 0) From df148c18b51c55c7fe8d2937de36627cc8a0a861 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Oct 2023 17:29:23 +0200 Subject: [PATCH 0344/1397] mfd: qcom-spmi-pmic: Fix reference leaks in revid helper commit a0fa44c261e448c531f9adb3a5189a3520f3e316 upstream. The Qualcomm SPMI PMIC revid implementation is broken in multiple ways. First, it totally ignores struct device_node reference counting and leaks references to the parent bus node as well as each child it iterates over using an open-coded for_each_child_of_node(). Second, it leaks references to each spmi device on the bus that it iterates over by failing to drop the reference taken by the spmi_device_from_of() helper. Fix the struct device_node leaks by reimplementing the lookup using for_each_child_of_node() and adding the missing reference count decrements. Fix the sibling struct device leaks by dropping the unnecessary lookups of devices with the wrong USID. Note that this still leaves one struct device reference leak in case a base device is found but it is not the parent of the device used for the lookup. This will be addressed in a follow-on patch. Fixes: e9c11c6e3a0e ("mfd: qcom-spmi-pmic: expose the PMIC revid information to clients") Cc: stable@vger.kernel.org # 6.0 Signed-off-by: Johan Hovold Acked-by: Caleb Connolly Link: https://lore.kernel.org/r/20231003152927.15000-2-johan+linaro@kernel.org Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/mfd/qcom-spmi-pmic.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/drivers/mfd/qcom-spmi-pmic.c b/drivers/mfd/qcom-spmi-pmic.c index 7e2cd79d17eb..47738f7e492c 100644 --- a/drivers/mfd/qcom-spmi-pmic.c +++ b/drivers/mfd/qcom-spmi-pmic.c @@ -81,7 +81,7 @@ static struct spmi_device *qcom_pmic_get_base_usid(struct device *dev) struct spmi_device *sdev; struct qcom_spmi_dev *ctx; struct device_node *spmi_bus; - struct device_node *other_usid = NULL; + struct device_node *child; int function_parent_usid, ret; u32 pmic_addr; @@ -105,28 +105,34 @@ static struct spmi_device *qcom_pmic_get_base_usid(struct device *dev) * device for USID 2. */ spmi_bus = of_get_parent(sdev->dev.of_node); - do { - other_usid = of_get_next_child(spmi_bus, other_usid); - - ret = of_property_read_u32_index(other_usid, "reg", 0, &pmic_addr); - if (ret) - return ERR_PTR(ret); + sdev = ERR_PTR(-ENODATA); + for_each_child_of_node(spmi_bus, child) { + ret = of_property_read_u32_index(child, "reg", 0, &pmic_addr); + if (ret) { + of_node_put(child); + sdev = ERR_PTR(ret); + break; + } - sdev = spmi_device_from_of(other_usid); if (pmic_addr == function_parent_usid - (ctx->num_usids - 1)) { - if (!sdev) + sdev = spmi_device_from_of(child); + if (!sdev) { /* * If the base USID for this PMIC hasn't probed yet * but the secondary USID has, then we need to defer * the function driver so that it will attempt to * probe again when the base USID is ready. */ - return ERR_PTR(-EPROBE_DEFER); - return sdev; + sdev = ERR_PTR(-EPROBE_DEFER); + } + of_node_put(child); + break; } - } while (other_usid->sibling); + } + + of_node_put(spmi_bus); - return ERR_PTR(-ENODATA); + return sdev; } static int pmic_spmi_load_revid(struct regmap *map, struct device *dev, From affae18838db5e6b463ee30c821385695af56dc2 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Oct 2023 17:29:24 +0200 Subject: [PATCH 0345/1397] mfd: qcom-spmi-pmic: Fix revid implementation commit 7b439aaa62fee474a0d84d67a25f4984467e7b95 upstream. The Qualcomm SPMI PMIC revid implementation is broken in multiple ways. First, it assumes that just because the sibling base device has been registered that means that it is also bound to a driver, which may not be the case (e.g. due to probe deferral or asynchronous probe). This could trigger a NULL-pointer dereference when attempting to access the driver data of the unbound device. Second, it accesses driver data of a sibling device directly and without any locking, which means that the driver data may be freed while it is being accessed (e.g. on driver unbind). Third, it leaks a struct device reference to the sibling device which is looked up using the spmi_device_from_of() every time a function (child) device is calling the revid function (e.g. on probe). Fix this mess by reimplementing the revid lookup so that it is done only at probe of the PMIC device; the base device fetches the revid info from the hardware, while any secondary SPMI device fetches the information from the base device and caches it so that it can be accessed safely from its children. If the base device has not been probed yet then probe of a secondary device is deferred. Fixes: e9c11c6e3a0e ("mfd: qcom-spmi-pmic: expose the PMIC revid information to clients") Cc: stable@vger.kernel.org # 6.0 Signed-off-by: Johan Hovold Acked-by: Caleb Connolly Link: https://lore.kernel.org/r/20231003152927.15000-3-johan+linaro@kernel.org Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/mfd/qcom-spmi-pmic.c | 69 +++++++++++++++++++++++++++--------- 1 file changed, 53 insertions(+), 16 deletions(-) diff --git a/drivers/mfd/qcom-spmi-pmic.c b/drivers/mfd/qcom-spmi-pmic.c index 47738f7e492c..8e449cff5cec 100644 --- a/drivers/mfd/qcom-spmi-pmic.c +++ b/drivers/mfd/qcom-spmi-pmic.c @@ -30,6 +30,8 @@ struct qcom_spmi_dev { struct qcom_spmi_pmic pmic; }; +static DEFINE_MUTEX(pmic_spmi_revid_lock); + #define N_USIDS(n) ((void *)n) static const struct of_device_id pmic_spmi_id_table[] = { @@ -76,24 +78,21 @@ static const struct of_device_id pmic_spmi_id_table[] = { * * This only supports PMICs with 1 or 2 USIDs. */ -static struct spmi_device *qcom_pmic_get_base_usid(struct device *dev) +static struct spmi_device *qcom_pmic_get_base_usid(struct spmi_device *sdev, struct qcom_spmi_dev *ctx) { - struct spmi_device *sdev; - struct qcom_spmi_dev *ctx; struct device_node *spmi_bus; struct device_node *child; int function_parent_usid, ret; u32 pmic_addr; - sdev = to_spmi_device(dev); - ctx = dev_get_drvdata(&sdev->dev); - /* * Quick return if the function device is already in the base * USID. This will always be hit for PMICs with only 1 USID. */ - if (sdev->usid % ctx->num_usids == 0) + if (sdev->usid % ctx->num_usids == 0) { + get_device(&sdev->dev); return sdev; + } function_parent_usid = sdev->usid; @@ -118,10 +117,8 @@ static struct spmi_device *qcom_pmic_get_base_usid(struct device *dev) sdev = spmi_device_from_of(child); if (!sdev) { /* - * If the base USID for this PMIC hasn't probed yet - * but the secondary USID has, then we need to defer - * the function driver so that it will attempt to - * probe again when the base USID is ready. + * If the base USID for this PMIC hasn't been + * registered yet then we need to defer. */ sdev = ERR_PTR(-EPROBE_DEFER); } @@ -135,6 +132,35 @@ static struct spmi_device *qcom_pmic_get_base_usid(struct device *dev) return sdev; } +static int pmic_spmi_get_base_revid(struct spmi_device *sdev, struct qcom_spmi_dev *ctx) +{ + struct qcom_spmi_dev *base_ctx; + struct spmi_device *base; + int ret = 0; + + base = qcom_pmic_get_base_usid(sdev, ctx); + if (IS_ERR(base)) + return PTR_ERR(base); + + /* + * Copy revid info from base device if it has probed and is still + * bound to its driver. + */ + mutex_lock(&pmic_spmi_revid_lock); + base_ctx = spmi_device_get_drvdata(base); + if (!base_ctx) { + ret = -EPROBE_DEFER; + goto out_unlock; + } + memcpy(&ctx->pmic, &base_ctx->pmic, sizeof(ctx->pmic)); +out_unlock: + mutex_unlock(&pmic_spmi_revid_lock); + + put_device(&base->dev); + + return ret; +} + static int pmic_spmi_load_revid(struct regmap *map, struct device *dev, struct qcom_spmi_pmic *pmic) { @@ -210,11 +236,7 @@ const struct qcom_spmi_pmic *qcom_pmic_get(struct device *dev) if (!of_match_device(pmic_spmi_id_table, dev->parent)) return ERR_PTR(-EINVAL); - sdev = qcom_pmic_get_base_usid(dev->parent); - - if (IS_ERR(sdev)) - return ERR_CAST(sdev); - + sdev = to_spmi_device(dev->parent); spmi = dev_get_drvdata(&sdev->dev); return &spmi->pmic; @@ -249,16 +271,31 @@ static int pmic_spmi_probe(struct spmi_device *sdev) ret = pmic_spmi_load_revid(regmap, &sdev->dev, &ctx->pmic); if (ret < 0) return ret; + } else { + ret = pmic_spmi_get_base_revid(sdev, ctx); + if (ret) + return ret; } + + mutex_lock(&pmic_spmi_revid_lock); spmi_device_set_drvdata(sdev, ctx); + mutex_unlock(&pmic_spmi_revid_lock); return devm_of_platform_populate(&sdev->dev); } +static void pmic_spmi_remove(struct spmi_device *sdev) +{ + mutex_lock(&pmic_spmi_revid_lock); + spmi_device_set_drvdata(sdev, NULL); + mutex_unlock(&pmic_spmi_revid_lock); +} + MODULE_DEVICE_TABLE(of, pmic_spmi_id_table); static struct spmi_driver pmic_spmi_driver = { .probe = pmic_spmi_probe, + .remove = pmic_spmi_remove, .driver = { .name = "pmic-spmi", .of_match_table = pmic_spmi_id_table, From 1eb840f104c0c1fda8c5f7e9485b87882b88b952 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 5 Oct 2023 14:15:58 +0300 Subject: [PATCH 0346/1397] ima: annotate iint mutex to avoid lockdep false positive warnings commit e044374a8a0a99e46f4e6d6751d3042b6d9cc12e upstream. It is not clear that IMA should be nested at all, but as long is it measures files both on overlayfs and on underlying fs, we need to annotate the iint mutex to avoid lockdep false positives related to IMA + overlayfs, same as overlayfs annotates the inode mutex. Reported-and-tested-by: syzbot+b42fe626038981fb7bfa@syzkaller.appspotmail.com Signed-off-by: Amir Goldstein Cc: stable@vger.kernel.org Signed-off-by: Mimi Zohar Signed-off-by: Greg Kroah-Hartman --- security/integrity/iint.c | 48 ++++++++++++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 11 deletions(-) diff --git a/security/integrity/iint.c b/security/integrity/iint.c index a462df827de2..27ea19fb1f54 100644 --- a/security/integrity/iint.c +++ b/security/integrity/iint.c @@ -66,9 +66,32 @@ struct integrity_iint_cache *integrity_iint_find(struct inode *inode) return iint; } -static void iint_free(struct integrity_iint_cache *iint) +#define IMA_MAX_NESTING (FILESYSTEM_MAX_STACK_DEPTH+1) + +/* + * It is not clear that IMA should be nested at all, but as long is it measures + * files both on overlayfs and on underlying fs, we need to annotate the iint + * mutex to avoid lockdep false positives related to IMA + overlayfs. + * See ovl_lockdep_annotate_inode_mutex_key() for more details. + */ +static inline void iint_lockdep_annotate(struct integrity_iint_cache *iint, + struct inode *inode) +{ +#ifdef CONFIG_LOCKDEP + static struct lock_class_key iint_mutex_key[IMA_MAX_NESTING]; + + int depth = inode->i_sb->s_stack_depth; + + if (WARN_ON_ONCE(depth < 0 || depth >= IMA_MAX_NESTING)) + depth = 0; + + lockdep_set_class(&iint->mutex, &iint_mutex_key[depth]); +#endif +} + +static void iint_init_always(struct integrity_iint_cache *iint, + struct inode *inode) { - kfree(iint->ima_hash); iint->ima_hash = NULL; iint->version = 0; iint->flags = 0UL; @@ -80,6 +103,14 @@ static void iint_free(struct integrity_iint_cache *iint) iint->ima_creds_status = INTEGRITY_UNKNOWN; iint->evm_status = INTEGRITY_UNKNOWN; iint->measured_pcrs = 0; + mutex_init(&iint->mutex); + iint_lockdep_annotate(iint, inode); +} + +static void iint_free(struct integrity_iint_cache *iint) +{ + kfree(iint->ima_hash); + mutex_destroy(&iint->mutex); kmem_cache_free(iint_cache, iint); } @@ -104,6 +135,8 @@ struct integrity_iint_cache *integrity_inode_get(struct inode *inode) if (!iint) return NULL; + iint_init_always(iint, inode); + write_lock(&integrity_iint_lock); p = &integrity_iint_tree.rb_node; @@ -153,25 +186,18 @@ void integrity_inode_free(struct inode *inode) iint_free(iint); } -static void init_once(void *foo) +static void iint_init_once(void *foo) { struct integrity_iint_cache *iint = (struct integrity_iint_cache *) foo; memset(iint, 0, sizeof(*iint)); - iint->ima_file_status = INTEGRITY_UNKNOWN; - iint->ima_mmap_status = INTEGRITY_UNKNOWN; - iint->ima_bprm_status = INTEGRITY_UNKNOWN; - iint->ima_read_status = INTEGRITY_UNKNOWN; - iint->ima_creds_status = INTEGRITY_UNKNOWN; - iint->evm_status = INTEGRITY_UNKNOWN; - mutex_init(&iint->mutex); } static int __init integrity_iintcache_init(void) { iint_cache = kmem_cache_create("iint_cache", sizeof(struct integrity_iint_cache), - 0, SLAB_PANIC, init_once); + 0, SLAB_PANIC, iint_init_once); return 0; } DEFINE_LSM(integrity) = { From a311638793fa0b07ecbca079273288f7b143ab2a Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Wed, 18 Oct 2023 14:47:02 -0400 Subject: [PATCH 0347/1397] ima: detect changes to the backing overlay file commit b836c4d29f2744200b2af41e14bf50758dddc818 upstream. Commit 18b44bc5a672 ("ovl: Always reevaluate the file signature for IMA") forced signature re-evaulation on every file access. Instead of always re-evaluating the file's integrity, detect a change to the backing file, by comparing the cached file metadata with the backing file's metadata. Verifying just the i_version has not changed is insufficient. In addition save and compare the i_ino and s_dev as well. Reviewed-by: Amir Goldstein Tested-by: Eric Snowberg Tested-by: Raul E Rangel Cc: stable@vger.kernel.org Signed-off-by: Mimi Zohar Signed-off-by: Greg Kroah-Hartman --- fs/overlayfs/super.c | 2 +- security/integrity/ima/ima_api.c | 5 +++++ security/integrity/ima/ima_main.c | 16 +++++++++++++++- security/integrity/integrity.h | 2 ++ 4 files changed, 23 insertions(+), 2 deletions(-) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 3fa2416264a4..c71d185980c0 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1489,7 +1489,7 @@ int ovl_fill_super(struct super_block *sb, struct fs_context *fc) ovl_trusted_xattr_handlers; sb->s_fs_info = ofs; sb->s_flags |= SB_POSIXACL; - sb->s_iflags |= SB_I_SKIP_SYNC | SB_I_IMA_UNVERIFIABLE_SIGNATURE; + sb->s_iflags |= SB_I_SKIP_SYNC; err = -ENOMEM; root_dentry = ovl_get_root(sb, ctx->upper.dentry, oe); diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c index 452e80b541e5..597ea0c4d72f 100644 --- a/security/integrity/ima/ima_api.c +++ b/security/integrity/ima/ima_api.c @@ -243,6 +243,7 @@ int ima_collect_measurement(struct integrity_iint_cache *iint, { const char *audit_cause = "failed"; struct inode *inode = file_inode(file); + struct inode *real_inode = d_real_inode(file_dentry(file)); const char *filename = file->f_path.dentry->d_name.name; struct ima_max_digest_data hash; struct kstat stat; @@ -302,6 +303,10 @@ int ima_collect_measurement(struct integrity_iint_cache *iint, iint->ima_hash = tmpbuf; memcpy(iint->ima_hash, &hash, length); iint->version = i_version; + if (real_inode != inode) { + iint->real_ino = real_inode->i_ino; + iint->real_dev = real_inode->i_sb->s_dev; + } /* Possibly temporary failure due to type of read (eg. O_DIRECT) */ if (!result) diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index 365db0e43d7c..cc1217ac2c6f 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "ima.h" @@ -207,7 +208,7 @@ static int process_measurement(struct file *file, const struct cred *cred, u32 secid, char *buf, loff_t size, int mask, enum ima_hooks func) { - struct inode *inode = file_inode(file); + struct inode *backing_inode, *inode = file_inode(file); struct integrity_iint_cache *iint = NULL; struct ima_template_desc *template_desc = NULL; char *pathbuf = NULL; @@ -284,6 +285,19 @@ static int process_measurement(struct file *file, const struct cred *cred, iint->measured_pcrs = 0; } + /* Detect and re-evaluate changes made to the backing file. */ + backing_inode = d_real_inode(file_dentry(file)); + if (backing_inode != inode && + (action & IMA_DO_MASK) && (iint->flags & IMA_DONE_MASK)) { + if (!IS_I_VERSION(backing_inode) || + backing_inode->i_sb->s_dev != iint->real_dev || + backing_inode->i_ino != iint->real_ino || + !inode_eq_iversion(backing_inode, iint->version)) { + iint->flags &= ~IMA_DONE_MASK; + iint->measured_pcrs = 0; + } + } + /* Determine if already appraised/measured based on bitmask * (IMA_MEASURE, IMA_MEASURED, IMA_XXXX_APPRAISE, IMA_XXXX_APPRAISED, * IMA_AUDIT, IMA_AUDITED) diff --git a/security/integrity/integrity.h b/security/integrity/integrity.h index d7553c93f5c0..9561db7cf6b4 100644 --- a/security/integrity/integrity.h +++ b/security/integrity/integrity.h @@ -164,6 +164,8 @@ struct integrity_iint_cache { unsigned long flags; unsigned long measured_pcrs; unsigned long atomic_flags; + unsigned long real_ino; + dev_t real_dev; enum integrity_status ima_file_status:4; enum integrity_status ima_mmap_status:4; enum integrity_status ima_bprm_status:4; From 80d6a9236ab6d2c0fd241514d1af2e325d16a210 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 21 Nov 2023 13:14:21 +0100 Subject: [PATCH 0348/1397] netfilter: nf_tables: remove catchall element in GC sync path [ Upstream commit 93995bf4af2c5a99e2a87f0cd5ce547d31eb7630 ] The expired catchall element is not deactivated and removed from GC sync path. This path holds mutex so just call nft_setelem_data_deactivate() and nft_setelem_catchall_remove() before queueing the GC work. Fixes: 4a9e12ea7e70 ("netfilter: nft_set_pipapo: call nft_trans_gc_queue_sync() in catchall GC") Reported-by: lonial con Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 3807c6c1181f..e6c52d417b6d 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -6464,6 +6464,12 @@ static int nft_setelem_deactivate(const struct net *net, return ret; } +static void nft_setelem_catchall_destroy(struct nft_set_elem_catchall *catchall) +{ + list_del_rcu(&catchall->list); + kfree_rcu(catchall, rcu); +} + static void nft_setelem_catchall_remove(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) @@ -6472,8 +6478,7 @@ static void nft_setelem_catchall_remove(const struct net *net, list_for_each_entry_safe(catchall, next, &set->catchall_list, list) { if (catchall->elem == elem->priv) { - list_del_rcu(&catchall->list); - kfree_rcu(catchall, rcu); + nft_setelem_catchall_destroy(catchall); break; } } @@ -9639,11 +9644,12 @@ static struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc, unsigned int gc_seq, bool sync) { - struct nft_set_elem_catchall *catchall; + struct nft_set_elem_catchall *catchall, *next; const struct nft_set *set = gc->set; + struct nft_elem_priv *elem_priv; struct nft_set_ext *ext; - list_for_each_entry_rcu(catchall, &set->catchall_list, list) { + list_for_each_entry_safe(catchall, next, &set->catchall_list, list) { ext = nft_set_elem_ext(set, catchall->elem); if (!nft_set_elem_expired(ext)) @@ -9661,7 +9667,17 @@ static struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc, if (!gc) return NULL; - nft_trans_gc_elem_add(gc, catchall->elem); + elem_priv = catchall->elem; + if (sync) { + struct nft_set_elem elem = { + .priv = elem_priv, + }; + + nft_setelem_data_deactivate(gc->net, gc->set, &elem); + nft_setelem_catchall_destroy(catchall); + } + + nft_trans_gc_elem_add(gc, elem_priv); } return gc; From 8c9a954c60a7b3f9e46bdda5d1ab5dabe59539d9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 21 Nov 2023 13:14:22 +0100 Subject: [PATCH 0349/1397] netfilter: nf_tables: split async and sync catchall in two functions [ Upstream commit 8837ba3e58ea1e3d09ae36db80b1e80853aada95 ] list_for_each_entry_safe() does not work for the async case which runs under RCU, therefore, split GC logic for catchall in two functions instead, one for each of the sync and async GC variants. The catchall sync GC variant never sees a _DEAD bit set on ever, thus, this handling is removed in such case, moreover, allocate GC sync batch via GFP_KERNEL. Fixes: 93995bf4af2c ("netfilter: nf_tables: remove catchall element in GC sync path") Reported-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 61 ++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index e6c52d417b6d..4a450f6d12a5 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -9640,16 +9640,14 @@ void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans) call_rcu(&trans->rcu, nft_trans_gc_trans_free); } -static struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc, - unsigned int gc_seq, - bool sync) +struct nft_trans_gc *nft_trans_gc_catchall_async(struct nft_trans_gc *gc, + unsigned int gc_seq) { - struct nft_set_elem_catchall *catchall, *next; + struct nft_set_elem_catchall *catchall; const struct nft_set *set = gc->set; - struct nft_elem_priv *elem_priv; struct nft_set_ext *ext; - list_for_each_entry_safe(catchall, next, &set->catchall_list, list) { + list_for_each_entry_rcu(catchall, &set->catchall_list, list) { ext = nft_set_elem_ext(set, catchall->elem); if (!nft_set_elem_expired(ext)) @@ -9659,39 +9657,44 @@ static struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc, nft_set_elem_dead(ext); dead_elem: - if (sync) - gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); - else - gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC); - + gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC); if (!gc) return NULL; - elem_priv = catchall->elem; - if (sync) { - struct nft_set_elem elem = { - .priv = elem_priv, - }; - - nft_setelem_data_deactivate(gc->net, gc->set, &elem); - nft_setelem_catchall_destroy(catchall); - } - - nft_trans_gc_elem_add(gc, elem_priv); + nft_trans_gc_elem_add(gc, catchall->elem); } return gc; } -struct nft_trans_gc *nft_trans_gc_catchall_async(struct nft_trans_gc *gc, - unsigned int gc_seq) -{ - return nft_trans_gc_catchall(gc, gc_seq, false); -} - struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc) { - return nft_trans_gc_catchall(gc, 0, true); + struct nft_set_elem_catchall *catchall, *next; + const struct nft_set *set = gc->set; + struct nft_set_elem elem; + struct nft_set_ext *ext; + + WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net)); + + list_for_each_entry_safe(catchall, next, &set->catchall_list, list) { + ext = nft_set_elem_ext(set, catchall->elem); + + if (!nft_set_elem_expired(ext)) + continue; + + gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL); + if (!gc) + return NULL; + + memset(&elem, 0, sizeof(elem)); + elem.priv = catchall->elem; + + nft_setelem_data_deactivate(gc->net, gc->set, &elem); + nft_setelem_catchall_destroy(catchall); + nft_trans_gc_elem_add(gc, elem.priv); + } + + return gc; } static void nf_tables_module_autoload_cleanup(struct net *net) From 868eb92b6866aa859c37bfc837b85b3e67cc5494 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Thu, 23 Nov 2023 10:47:48 +0100 Subject: [PATCH 0350/1397] ASoC: soc-dai: add flag to mute and unmute stream during trigger commit f0220575e65abe09c09cd17826a3cdea76e8d58f upstream. In some setups like Speaker amps which are very sensitive, ex: keeping them unmute without actual data stream for very short duration results in a static charge and results in pop and clicks. To minimize this, provide a way to mute and unmute such codecs during trigger callbacks. Signed-off-by: Srinivas Kandagatla Tested-by: Johan Hovold Link: https://lore.kernel.org/r/20231027105747.32450-2-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown [ johan: backport to v6.6.2 ] Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- include/sound/soc-dai.h | 1 + sound/soc/soc-dai.c | 7 +++++++ sound/soc/soc-pcm.c | 12 ++++++++---- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index 5fcfba47d98c..adcd8719d343 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -370,6 +370,7 @@ struct snd_soc_dai_ops { /* bit field */ unsigned int no_capture_mute:1; + unsigned int mute_unmute_on_trigger:1; }; struct snd_soc_cdai_ops { diff --git a/sound/soc/soc-dai.c b/sound/soc/soc-dai.c index 3f33f0630ad8..9a828e55c4f9 100644 --- a/sound/soc/soc-dai.c +++ b/sound/soc/soc-dai.c @@ -658,6 +658,10 @@ int snd_soc_pcm_dai_trigger(struct snd_pcm_substream *substream, ret = soc_dai_trigger(dai, substream, cmd); if (ret < 0) break; + + if (dai->driver->ops && dai->driver->ops->mute_unmute_on_trigger) + snd_soc_dai_digital_mute(dai, 0, substream->stream); + soc_dai_mark_push(dai, substream, trigger); } break; @@ -668,6 +672,9 @@ int snd_soc_pcm_dai_trigger(struct snd_pcm_substream *substream, if (rollback && !soc_dai_mark_match(dai, substream, trigger)) continue; + if (dai->driver->ops && dai->driver->ops->mute_unmute_on_trigger) + snd_soc_dai_digital_mute(dai, 1, substream->stream); + r = soc_dai_trigger(dai, substream, cmd); if (r < 0) ret = r; /* use last ret */ diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 0a20122b3e55..511446a30c05 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -896,8 +896,10 @@ static int __soc_pcm_prepare(struct snd_soc_pcm_runtime *rtd, snd_soc_dapm_stream_event(rtd, substream->stream, SND_SOC_DAPM_STREAM_START); - for_each_rtd_dais(rtd, i, dai) - snd_soc_dai_digital_mute(dai, 0, substream->stream); + for_each_rtd_dais(rtd, i, dai) { + if (dai->driver->ops && !dai->driver->ops->mute_unmute_on_trigger) + snd_soc_dai_digital_mute(dai, 0, substream->stream); + } out: return soc_pcm_ret(rtd, ret); @@ -939,8 +941,10 @@ static int soc_pcm_hw_clean(struct snd_soc_pcm_runtime *rtd, if (snd_soc_dai_active(dai) == 1) soc_pcm_set_dai_params(dai, NULL); - if (snd_soc_dai_stream_active(dai, substream->stream) == 1) - snd_soc_dai_digital_mute(dai, 1, substream->stream); + if (snd_soc_dai_stream_active(dai, substream->stream) == 1) { + if (dai->driver->ops && !dai->driver->ops->mute_unmute_on_trigger) + snd_soc_dai_digital_mute(dai, 1, substream->stream); + } } /* run the stream event */ From 0e0aa7108a147633146285f8956451bd8e86e6a7 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Thu, 23 Nov 2023 10:47:49 +0100 Subject: [PATCH 0351/1397] ASoC: codecs: wsa883x: make use of new mute_unmute_on_trigger flag commit 805ce81826c896dd3c351a32814b28557f9edf54 upstream. In the current setup the PA is left unmuted even when the Soundwire ports are not started streaming. This can lead to click and pop sounds during start. There is a same issue in the reverse order where in the PA is left unmute even after the data stream is stopped, the time between data stream stopping and port closing is long enough to accumulate DC on the line resulting in Click/Pop noise during end of stream. making use of new mute_unmute_on_trigger flag is helping a lot with this Click/Pop issues reported on this Codec Signed-off-by: Srinivas Kandagatla Tested-by: Johan Hovold Link: https://lore.kernel.org/r/20231027105747.32450-3-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/wsa883x.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/sound/soc/codecs/wsa883x.c b/sound/soc/codecs/wsa883x.c index 197fae23762f..cb83c569e18d 100644 --- a/sound/soc/codecs/wsa883x.c +++ b/sound/soc/codecs/wsa883x.c @@ -1203,9 +1203,6 @@ static int wsa883x_spkr_event(struct snd_soc_dapm_widget *w, break; } - snd_soc_component_write_field(component, WSA883X_DRE_CTL_1, - WSA883X_DRE_GAIN_EN_MASK, - WSA883X_DRE_GAIN_FROM_CSR); if (wsa883x->port_enable[WSA883X_PORT_COMP]) snd_soc_component_write_field(component, WSA883X_DRE_CTL_0, WSA883X_DRE_OFFSET_MASK, @@ -1218,9 +1215,6 @@ static int wsa883x_spkr_event(struct snd_soc_dapm_widget *w, snd_soc_component_write_field(component, WSA883X_PDM_WD_CTL, WSA883X_PDM_EN_MASK, WSA883X_PDM_ENABLE); - snd_soc_component_write_field(component, WSA883X_PA_FSM_CTL, - WSA883X_GLOBAL_PA_EN_MASK, - WSA883X_GLOBAL_PA_ENABLE); break; case SND_SOC_DAPM_PRE_PMD: @@ -1346,6 +1340,7 @@ static const struct snd_soc_dai_ops wsa883x_dai_ops = { .hw_free = wsa883x_hw_free, .mute_stream = wsa883x_digital_mute, .set_stream = wsa883x_set_sdw_stream, + .mute_unmute_on_trigger = true, }; static struct snd_soc_dai_driver wsa883x_dais[] = { From 38052bd5c1649f07d8b342cbec70df75429d83b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 2 Oct 2023 12:48:07 +0300 Subject: [PATCH 0352/1397] selftests/resctrl: Fix uninitialized .sa_flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit beb7f471847663559bd0fe60af1d70e05a1d7c6c upstream. signal_handler_unregister() calls sigaction() with uninitializing sa_flags in the struct sigaction. Make sure sa_flags is always initialized in signal_handler_unregister() by initializing the struct sigaction when declaring it. Also add the initialization to signal_handler_register() even if there are no know bugs in there because correctness is then obvious from the code itself. Fixes: 73c55fa5ab55 ("selftests/resctrl: Commonize the signal handler register/unregister for all tests") Suggested-by: Reinette Chatre Signed-off-by: Ilpo Järvinen Cc: Reviewed-by: Reinette Chatre Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/resctrl/resctrl_val.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c index f0f6c5f6e98b..ee07e0943f58 100644 --- a/tools/testing/selftests/resctrl/resctrl_val.c +++ b/tools/testing/selftests/resctrl/resctrl_val.c @@ -482,7 +482,7 @@ void ctrlc_handler(int signum, siginfo_t *info, void *ptr) */ int signal_handler_register(void) { - struct sigaction sigact; + struct sigaction sigact = {}; int ret = 0; sigact.sa_sigaction = ctrlc_handler; @@ -504,7 +504,7 @@ int signal_handler_register(void) */ void signal_handler_unregister(void) { - struct sigaction sigact; + struct sigaction sigact = {}; sigact.sa_handler = SIG_DFL; sigemptyset(&sigact.sa_mask); From f30943452d8ed208f0a30eba98dfb441862f525e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 2 Oct 2023 12:48:09 +0300 Subject: [PATCH 0353/1397] selftests/resctrl: Remove duplicate feature check from CMT test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 030b48fb2cf045dead8ee2c5ead560930044c029 upstream. The test runner run_cmt_test() in resctrl_tests.c checks for CMT feature and does not run cmt_resctrl_val() if CMT is not supported. Then cmt_resctrl_val() also check is CMT is supported. Remove the duplicated feature check for CMT from cmt_resctrl_val(). Signed-off-by: Ilpo Järvinen Tested-by: Shaopeng Tan Reviewed-by: Reinette Chatre Reviewed-by: Shaopeng Tan Cc: Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/resctrl/cmt_test.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/tools/testing/selftests/resctrl/cmt_test.c b/tools/testing/selftests/resctrl/cmt_test.c index cb2197647c6c..cc93cb5ae7ee 100644 --- a/tools/testing/selftests/resctrl/cmt_test.c +++ b/tools/testing/selftests/resctrl/cmt_test.c @@ -76,9 +76,6 @@ int cmt_resctrl_val(int cpu_no, int n, char **benchmark_cmd) int count_of_bits; int ret; - if (!validate_resctrl_feature_request(CMT_STR)) - return -1; - ret = get_cbm_mask("L3", cbm_mask); if (ret) return ret; From e775994f12f39da7f381da96efc2cd77f62ebd65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 2 Oct 2023 12:48:10 +0300 Subject: [PATCH 0354/1397] selftests/resctrl: Move _GNU_SOURCE define into Makefile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3a1e4a91aa454a1c589a9824d54179fdbfccde45 upstream. _GNU_SOURCE is defined in resctrl.h. Defining _GNU_SOURCE has a large impact on what gets defined when including headers either before or after it. This can result in compile failures if .c file decides to include a standard header file before resctrl.h. It is safer to define _GNU_SOURCE in Makefile so it is always defined regardless of in which order includes are done. Signed-off-by: Ilpo Järvinen Tested-by: Shaopeng Tan Reviewed-by: Reinette Chatre Reviewed-by: Shaopeng Tan Cc: Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/resctrl/Makefile | 2 +- tools/testing/selftests/resctrl/resctrl.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/testing/selftests/resctrl/Makefile b/tools/testing/selftests/resctrl/Makefile index 5073dbc96125..2deac2031de9 100644 --- a/tools/testing/selftests/resctrl/Makefile +++ b/tools/testing/selftests/resctrl/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS = -g -Wall -O2 -D_FORTIFY_SOURCE=2 +CFLAGS = -g -Wall -O2 -D_FORTIFY_SOURCE=2 -D_GNU_SOURCE CFLAGS += $(KHDR_INCLUDES) TEST_GEN_PROGS := resctrl_tests diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h index 838d1a438f33..eff178befe4a 100644 --- a/tools/testing/selftests/resctrl/resctrl.h +++ b/tools/testing/selftests/resctrl/resctrl.h @@ -1,5 +1,4 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#define _GNU_SOURCE #ifndef RESCTRL_H #define RESCTRL_H #include From 077171788ac8a1e2611b450e6bf90092627d2bb4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 2 Oct 2023 12:48:11 +0300 Subject: [PATCH 0355/1397] selftests/resctrl: Refactor feature check to use resource and feature name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d56e5da0e0f557a206bace16bbbdad00a5800e34 upstream. Feature check in validate_resctrl_feature_request() takes in the test name string and maps that to what to check per test. Pass resource and feature names to validate_resctrl_feature_request() directly rather than deriving them from the test name inside the function which makes the feature check easier to extend for new test cases. Use !! in the return statement to make the boolean conversion more obvious even if it is not strictly necessary from correctness point of view (to avoid it looking like the function is returning a freed pointer). Signed-off-by: Ilpo Järvinen Tested-by: Shaopeng Tan Reviewed-by: Reinette Chatre Reviewed-by: Shaopeng Tan Cc: # selftests/resctrl: Remove duplicate feature check from CMT test Cc: # selftests/resctrl: Move _GNU_SOURCE define into Makefile Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/resctrl/resctrl.h | 6 +- .../testing/selftests/resctrl/resctrl_tests.c | 10 +-- tools/testing/selftests/resctrl/resctrlfs.c | 69 ++++++++----------- 3 files changed, 34 insertions(+), 51 deletions(-) diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h index eff178befe4a..9e46fa90e7be 100644 --- a/tools/testing/selftests/resctrl/resctrl.h +++ b/tools/testing/selftests/resctrl/resctrl.h @@ -27,10 +27,6 @@ #define RESCTRL_PATH "/sys/fs/resctrl" #define PHYS_ID_PATH "/sys/devices/system/cpu/cpu" #define INFO_PATH "/sys/fs/resctrl/info" -#define L3_PATH "/sys/fs/resctrl/info/L3" -#define MB_PATH "/sys/fs/resctrl/info/MB" -#define L3_MON_PATH "/sys/fs/resctrl/info/L3_MON" -#define L3_MON_FEATURES_PATH "/sys/fs/resctrl/info/L3_MON/mon_features" #define ARCH_INTEL 1 #define ARCH_AMD 2 @@ -85,7 +81,7 @@ int get_resource_id(int cpu_no, int *resource_id); int mount_resctrlfs(void); int umount_resctrlfs(void); int validate_bw_report_request(char *bw_report); -bool validate_resctrl_feature_request(const char *resctrl_val); +bool validate_resctrl_feature_request(const char *resource, const char *feature); char *fgrep(FILE *inf, const char *str); int taskset_benchmark(pid_t bm_pid, int cpu_no); void run_benchmark(int signum, siginfo_t *info, void *ucontext); diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c index 9e2bc8ba95f1..65b27aa315b1 100644 --- a/tools/testing/selftests/resctrl/resctrl_tests.c +++ b/tools/testing/selftests/resctrl/resctrl_tests.c @@ -83,7 +83,9 @@ static void run_mbm_test(char **benchmark_cmd, size_t span, return; } - if (!validate_resctrl_feature_request(MBM_STR) || (get_vendor() != ARCH_INTEL)) { + if (!validate_resctrl_feature_request("L3_MON", "mbm_total_bytes") || + !validate_resctrl_feature_request("L3_MON", "mbm_local_bytes") || + (get_vendor() != ARCH_INTEL)) { ksft_test_result_skip("Hardware does not support MBM or MBM is disabled\n"); goto umount; } @@ -109,7 +111,7 @@ static void run_mba_test(char **benchmark_cmd, int cpu_no, char *bw_report) return; } - if (!validate_resctrl_feature_request(MBA_STR) || (get_vendor() != ARCH_INTEL)) { + if (!validate_resctrl_feature_request("MB", NULL) || (get_vendor() != ARCH_INTEL)) { ksft_test_result_skip("Hardware does not support MBA or MBA is disabled\n"); goto umount; } @@ -133,7 +135,7 @@ static void run_cmt_test(char **benchmark_cmd, int cpu_no) return; } - if (!validate_resctrl_feature_request(CMT_STR)) { + if (!validate_resctrl_feature_request("L3_MON", "llc_occupancy")) { ksft_test_result_skip("Hardware does not support CMT or CMT is disabled\n"); goto umount; } @@ -159,7 +161,7 @@ static void run_cat_test(int cpu_no, int no_of_bits) return; } - if (!validate_resctrl_feature_request(CAT_STR)) { + if (!validate_resctrl_feature_request("L3", NULL)) { ksft_test_result_skip("Hardware does not support CAT or CAT is disabled\n"); goto umount; } diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c index bd36ee206602..3a8111362d26 100644 --- a/tools/testing/selftests/resctrl/resctrlfs.c +++ b/tools/testing/selftests/resctrl/resctrlfs.c @@ -8,6 +8,8 @@ * Sai Praneeth Prakhya , * Fenghua Yu */ +#include + #include "resctrl.h" static int find_resctrl_mount(char *buffer) @@ -604,63 +606,46 @@ char *fgrep(FILE *inf, const char *str) /* * validate_resctrl_feature_request - Check if requested feature is valid. - * @resctrl_val: Requested feature + * @resource: Required resource (e.g., MB, L3, L2, L3_MON, etc.) + * @feature: Required monitor feature (in mon_features file). Can only be + * set for L3_MON. Must be NULL for all other resources. * - * Return: True if the feature is supported, else false. False is also - * returned if resctrl FS is not mounted. + * Return: True if the resource/feature is supported, else false. False is + * also returned if resctrl FS is not mounted. */ -bool validate_resctrl_feature_request(const char *resctrl_val) +bool validate_resctrl_feature_request(const char *resource, const char *feature) { + char res_path[PATH_MAX]; struct stat statbuf; - bool found = false; char *res; FILE *inf; int ret; - if (!resctrl_val) + if (!resource) return false; ret = find_resctrl_mount(NULL); if (ret) return false; - if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR))) { - if (!stat(L3_PATH, &statbuf)) - return true; - } else if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) { - if (!stat(MB_PATH, &statbuf)) - return true; - } else if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) || - !strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) { - if (!stat(L3_MON_PATH, &statbuf)) { - inf = fopen(L3_MON_FEATURES_PATH, "r"); - if (!inf) - return false; - - if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) { - res = fgrep(inf, "llc_occupancy"); - if (res) { - found = true; - free(res); - } - } - - if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) { - res = fgrep(inf, "mbm_total_bytes"); - if (res) { - free(res); - res = fgrep(inf, "mbm_local_bytes"); - if (res) { - found = true; - free(res); - } - } - } - fclose(inf); - } - } + snprintf(res_path, sizeof(res_path), "%s/%s", INFO_PATH, resource); + + if (stat(res_path, &statbuf)) + return false; + + if (!feature) + return true; + + snprintf(res_path, sizeof(res_path), "%s/%s/mon_features", INFO_PATH, resource); + inf = fopen(res_path, "r"); + if (!inf) + return false; + + res = fgrep(inf, feature); + free(res); + fclose(inf); - return found; + return !!res; } int filter_dmesg(void) From a8eba04777849eb59bf3c2d59a0ad4ada9046dbd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 2 Oct 2023 12:48:12 +0300 Subject: [PATCH 0356/1397] selftests/resctrl: Fix feature checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 06035f019422ba17e85c11e70d6d8bdbe9fa1afd upstream. The MBA and CMT tests expect support of other features to be able to run. When platform only supports MBA but not MBM, MBA test will fail with: Failed to open total bw file: No such file or directory When platform only supports CMT but not CAT, CMT test will fail with: Failed to open bit mask file '/sys/fs/resctrl/info/L3/cbm_mask': No such file or directory It leads to the test reporting test fail (even if no test was run at all). Extend feature checks to cover these two conditions to show these tests were skipped rather than failed. Fixes: ee0415681eb6 ("selftests/resctrl: Use resctrl/info for feature detection") Signed-off-by: Ilpo Järvinen Tested-by: Shaopeng Tan Reviewed-by: Reinette Chatre Reviewed-by: Shaopeng Tan Cc: # selftests/resctrl: Refactor feature check to use resource and feature name Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/resctrl/resctrl_tests.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c index 65b27aa315b1..59a361660d8c 100644 --- a/tools/testing/selftests/resctrl/resctrl_tests.c +++ b/tools/testing/selftests/resctrl/resctrl_tests.c @@ -111,7 +111,9 @@ static void run_mba_test(char **benchmark_cmd, int cpu_no, char *bw_report) return; } - if (!validate_resctrl_feature_request("MB", NULL) || (get_vendor() != ARCH_INTEL)) { + if (!validate_resctrl_feature_request("MB", NULL) || + !validate_resctrl_feature_request("L3_MON", "mbm_local_bytes") || + (get_vendor() != ARCH_INTEL)) { ksft_test_result_skip("Hardware does not support MBA or MBA is disabled\n"); goto umount; } @@ -135,7 +137,8 @@ static void run_cmt_test(char **benchmark_cmd, int cpu_no) return; } - if (!validate_resctrl_feature_request("L3_MON", "llc_occupancy")) { + if (!validate_resctrl_feature_request("L3_MON", "llc_occupancy") || + !validate_resctrl_feature_request("L3", NULL)) { ksft_test_result_skip("Hardware does not support CMT or CMT is disabled\n"); goto umount; } From 7fb9d792c06d664587b7aebb5b5c0ec8ec2bdc26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 2 Oct 2023 12:48:13 +0300 Subject: [PATCH 0357/1397] selftests/resctrl: Reduce failures due to outliers in MBA/MBM tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ef43c30858754d99373a63dff33280a9969b49bc upstream. The initial value of 5% chosen for the maximum allowed percentage difference between resctrl mbm value and IMC mbm value in commit 06bd03a57f8c ("selftests/resctrl: Fix MBA/MBM results reporting format") was "randomly chosen value" (as admitted by the changelog). When running tests in our lab across a large number platforms, 5% difference upper bound for success seems a bit on the low side for the MBA and MBM tests. Some platforms produce outliers that are slightly above that, typically 6-7%, which leads MBA/MBM test frequently failing. Replace the "randomly chosen value" with a success bound that is based on those measurements across large number of platforms by relaxing the MBA/MBM success bound to 8%. The relaxed bound removes the failures due the frequent outliers. Fixed commit description style error during merge: Shuah Khan Fixes: 06bd03a57f8c ("selftests/resctrl: Fix MBA/MBM results reporting format") Signed-off-by: Ilpo Järvinen Tested-by: Shaopeng Tan Reviewed-by: Reinette Chatre Reviewed-by: Shaopeng Tan Cc: Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/resctrl/mba_test.c | 2 +- tools/testing/selftests/resctrl/mbm_test.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/resctrl/mba_test.c b/tools/testing/selftests/resctrl/mba_test.c index 4d2f145804b8..c7d1ec6d81ea 100644 --- a/tools/testing/selftests/resctrl/mba_test.c +++ b/tools/testing/selftests/resctrl/mba_test.c @@ -12,7 +12,7 @@ #define RESULT_FILE_NAME "result_mba" #define NUM_OF_RUNS 5 -#define MAX_DIFF_PERCENT 5 +#define MAX_DIFF_PERCENT 8 #define ALLOCATION_MAX 100 #define ALLOCATION_MIN 10 #define ALLOCATION_STEP 10 diff --git a/tools/testing/selftests/resctrl/mbm_test.c b/tools/testing/selftests/resctrl/mbm_test.c index c7de6f5977f6..d0c26a5e89fb 100644 --- a/tools/testing/selftests/resctrl/mbm_test.c +++ b/tools/testing/selftests/resctrl/mbm_test.c @@ -11,7 +11,7 @@ #include "resctrl.h" #define RESULT_FILE_NAME "result_mbm" -#define MAX_DIFF_PERCENT 5 +#define MAX_DIFF_PERCENT 8 #define NUM_OF_RUNS 5 static int From 465b557215664536cd20001872dade2cc4ae2d6a Mon Sep 17 00:00:00 2001 From: Jamie Lentin Date: Mon, 2 Oct 2023 15:09:14 +0000 Subject: [PATCH 0358/1397] hid: lenovo: Resend all settings on reset_resume for compact keyboards commit 2f2bd7cbd1d1548137b351040dc4e037d18cdfdc upstream. The USB Compact Keyboard variant requires a reset_resume function to restore keyboard configuration after a suspend in some situations. Move configuration normally done on probe to lenovo_features_set_cptkbd(), then recycle this for use on reset_resume. Without, the keyboard and driver would end up in an inconsistent state, breaking middle-button scrolling amongst other problems, and twiddling sysfs values wouldn't help as the middle-button mode won't be set until the driver is reloaded. Tested on a USB and Bluetooth Thinkpad Compact Keyboard. CC: stable@vger.kernel.org Fixes: 94eefa271323 ("HID: lenovo: Use native middle-button mode for compact keyboards") Signed-off-by: Jamie Lentin Signed-off-by: Martin Kepplinger Link: https://lore.kernel.org/r/20231002150914.22101-1-martink@posteo.de Signed-off-by: Benjamin Tissoires Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-lenovo.c | 50 +++++++++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 16 deletions(-) diff --git a/drivers/hid/hid-lenovo.c b/drivers/hid/hid-lenovo.c index 9c1181313e44..7c1b33be9d13 100644 --- a/drivers/hid/hid-lenovo.c +++ b/drivers/hid/hid-lenovo.c @@ -526,6 +526,19 @@ static void lenovo_features_set_cptkbd(struct hid_device *hdev) int ret; struct lenovo_drvdata *cptkbd_data = hid_get_drvdata(hdev); + /* + * Tell the keyboard a driver understands it, and turn F7, F9, F11 into + * regular keys + */ + ret = lenovo_send_cmd_cptkbd(hdev, 0x01, 0x03); + if (ret) + hid_warn(hdev, "Failed to switch F7/9/11 mode: %d\n", ret); + + /* Switch middle button to native mode */ + ret = lenovo_send_cmd_cptkbd(hdev, 0x09, 0x01); + if (ret) + hid_warn(hdev, "Failed to switch middle button: %d\n", ret); + ret = lenovo_send_cmd_cptkbd(hdev, 0x05, cptkbd_data->fn_lock); if (ret) hid_err(hdev, "Fn-lock setting failed: %d\n", ret); @@ -1148,22 +1161,6 @@ static int lenovo_probe_cptkbd(struct hid_device *hdev) } hid_set_drvdata(hdev, cptkbd_data); - /* - * Tell the keyboard a driver understands it, and turn F7, F9, F11 into - * regular keys (Compact only) - */ - if (hdev->product == USB_DEVICE_ID_LENOVO_CUSBKBD || - hdev->product == USB_DEVICE_ID_LENOVO_CBTKBD) { - ret = lenovo_send_cmd_cptkbd(hdev, 0x01, 0x03); - if (ret) - hid_warn(hdev, "Failed to switch F7/9/11 mode: %d\n", ret); - } - - /* Switch middle button to native mode */ - ret = lenovo_send_cmd_cptkbd(hdev, 0x09, 0x01); - if (ret) - hid_warn(hdev, "Failed to switch middle button: %d\n", ret); - /* Set keyboard settings to known state */ cptkbd_data->middlebutton_state = 0; cptkbd_data->fn_lock = true; @@ -1286,6 +1283,24 @@ static int lenovo_probe(struct hid_device *hdev, return ret; } +#ifdef CONFIG_PM +static int lenovo_reset_resume(struct hid_device *hdev) +{ + switch (hdev->product) { + case USB_DEVICE_ID_LENOVO_CUSBKBD: + case USB_DEVICE_ID_LENOVO_TPIIUSBKBD: + if (hdev->type == HID_TYPE_USBMOUSE) + lenovo_features_set_cptkbd(hdev); + + break; + default: + break; + } + + return 0; +} +#endif + static void lenovo_remove_tpkbd(struct hid_device *hdev) { struct lenovo_drvdata *data_pointer = hid_get_drvdata(hdev); @@ -1402,6 +1417,9 @@ static struct hid_driver lenovo_driver = { .raw_event = lenovo_raw_event, .event = lenovo_event, .report_fixup = lenovo_report_fixup, +#ifdef CONFIG_PM + .reset_resume = lenovo_reset_resume, +#endif }; module_hid_driver(lenovo_driver); From 6d1adbec337228b31c6a76d908e36b8754872723 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 3 Oct 2023 17:54:22 +0200 Subject: [PATCH 0359/1397] ASoC: codecs: wsa-macro: fix uninitialized stack variables with name prefix commit 72151ad0cba8a07df90130ff62c979520d71f23b upstream. Driver compares widget name in wsa_macro_spk_boost_event() widget event callback, however it does not handle component's name prefix. This leads to using uninitialized stack variables as registers and register values. Handle gracefully such case. Fixes: 2c4066e5d428 ("ASoC: codecs: lpass-wsa-macro: add dapm widgets and route") Cc: stable@vger.kernel.org Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20231003155422.801160-1-krzysztof.kozlowski@linaro.org Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/lpass-wsa-macro.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/codecs/lpass-wsa-macro.c b/sound/soc/codecs/lpass-wsa-macro.c index fff4a8b862a7..7e21cec3c2fb 100644 --- a/sound/soc/codecs/lpass-wsa-macro.c +++ b/sound/soc/codecs/lpass-wsa-macro.c @@ -1685,6 +1685,9 @@ static int wsa_macro_spk_boost_event(struct snd_soc_dapm_widget *w, boost_path_cfg1 = CDC_WSA_RX1_RX_PATH_CFG1; reg = CDC_WSA_RX1_RX_PATH_CTL; reg_mix = CDC_WSA_RX1_RX_PATH_MIX_CTL; + } else { + dev_warn(component->dev, "Incorrect widget name in the driver\n"); + return -EINVAL; } switch (event) { From 5706a65c3bd110e01dd695039c218d4ea2f67339 Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Tue, 19 Sep 2023 09:25:25 +0800 Subject: [PATCH 0360/1397] jbd2: fix potential data lost in recovering journal raced with synchronizing fs bdev commit 61187fce8600e8ef90e601be84f9d0f3222c1206 upstream. JBD2 makes sure journal data is fallen on fs device by sync_blockdev(), however, other process could intercept the EIO information from bdev's mapping, which leads journal recovering successful even EIO occurs during data written back to fs device. We found this problem in our product, iscsi + multipath is chosen for block device of ext4. Unstable network may trigger kpartx to rescan partitions in device mapper layer. Detailed process is shown as following: mount kpartx irq jbd2_journal_recover do_one_pass memcpy(nbh->b_data, obh->b_data) // copy data to fs dev from journal mark_buffer_dirty // mark bh dirty vfs_read generic_file_read_iter // dio filemap_write_and_wait_range __filemap_fdatawrite_range do_writepages block_write_full_folio submit_bh_wbc >> EIO occurs in disk << end_buffer_async_write mark_buffer_write_io_error mapping_set_error set_bit(AS_EIO, &mapping->flags) // set! filemap_check_errors test_and_clear_bit(AS_EIO, &mapping->flags) // clear! err2 = sync_blockdev filemap_write_and_wait filemap_check_errors test_and_clear_bit(AS_EIO, &mapping->flags) // false err2 = 0 Filesystem is mounted successfully even data from journal is failed written into disk, and ext4/ocfs2 could become corrupted. Fix it by comparing the wb_err state in fs block device before recovering and after recovering. A reproducer can be found in the kernel bugzilla referenced below. Link: https://bugzilla.kernel.org/show_bug.cgi?id=217888 Cc: stable@vger.kernel.org Signed-off-by: Zhihao Cheng Signed-off-by: Zhang Yi Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20230919012525.1783108-1-chengzhihao1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/jbd2/recovery.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index c269a7d29a46..5b771a3d8d9a 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -289,6 +289,8 @@ int jbd2_journal_recover(journal_t *journal) journal_superblock_t * sb; struct recovery_info info; + errseq_t wb_err; + struct address_space *mapping; memset(&info, 0, sizeof(info)); sb = journal->j_superblock; @@ -306,6 +308,9 @@ int jbd2_journal_recover(journal_t *journal) return 0; } + wb_err = 0; + mapping = journal->j_fs_dev->bd_inode->i_mapping; + errseq_check_and_advance(&mapping->wb_err, &wb_err); err = do_one_pass(journal, &info, PASS_SCAN); if (!err) err = do_one_pass(journal, &info, PASS_REVOKE); @@ -327,6 +332,9 @@ int jbd2_journal_recover(journal_t *journal) jbd2_journal_clear_revoke(journal); err2 = sync_blockdev(journal->j_fs_dev); + if (!err) + err = err2; + err2 = errseq_check_and_advance(&mapping->wb_err, &wb_err); if (!err) err = err2; /* Make sure all replayed data is on permanent storage */ From 0b98335aded47ebf3d218f69fc6ab0ceb7e423c6 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 4 Sep 2023 17:32:27 -0700 Subject: [PATCH 0361/1397] quota: explicitly forbid quota files from being encrypted commit d3cc1b0be258191d6360c82ea158c2972f8d3991 upstream. Since commit d7e7b9af104c ("fscrypt: stop using keyrings subsystem for fscrypt_master_key"), xfstest generic/270 causes a WARNING when run on f2fs with test_dummy_encryption in the mount options: $ kvm-xfstests -c f2fs/encrypt generic/270 [...] WARNING: CPU: 1 PID: 2453 at fs/crypto/keyring.c:240 fscrypt_destroy_keyring+0x1f5/0x260 The cause of the WARNING is that not all encrypted inodes have been evicted before fscrypt_destroy_keyring() is called, which violates an assumption. This happens because the test uses an external quota file, which gets automatically encrypted due to test_dummy_encryption. Encryption of quota files has never really been supported. On ext4, ext4_quota_read() does not decrypt the data, so encrypted quota files are always considered invalid on ext4. On f2fs, f2fs_quota_read() uses the pagecache, so trying to use an encrypted quota file gets farther, resulting in the issue described above being possible. But this was never intended to be possible, and there is no use case for it. Therefore, make the quota support layer explicitly reject using IS_ENCRYPTED inodes when quotaon is attempted. Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Signed-off-by: Jan Kara Message-Id: <20230905003227.326998-1-ebiggers@kernel.org> Signed-off-by: Greg Kroah-Hartman --- fs/quota/dquot.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 31e897ad5e6a..023b91b4e1f0 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -2351,6 +2351,20 @@ static int vfs_setup_quota_inode(struct inode *inode, int type) if (sb_has_quota_loaded(sb, type)) return -EBUSY; + /* + * Quota files should never be encrypted. They should be thought of as + * filesystem metadata, not user data. New-style internal quota files + * cannot be encrypted by users anyway, but old-style external quota + * files could potentially be incorrectly created in an encrypted + * directory, hence this explicit check. Some reasons why encrypted + * quota files don't work include: (1) some filesystems that support + * encryption don't handle it in their quota_read and quota_write, and + * (2) cleaning up encrypted quota files at unmount would need special + * consideration, as quota files are cleaned up later than user files. + */ + if (IS_ENCRYPTED(inode)) + return -EINVAL; + dqopt->files[type] = igrab(inode); if (!dqopt->files[type]) return -EIO; From b3d81d3e8b607116af4323840bb5a904918acea2 Mon Sep 17 00:00:00 2001 From: Benjamin Bara Date: Sat, 15 Jul 2023 09:53:23 +0200 Subject: [PATCH 0362/1397] kernel/reboot: emergency_restart: Set correct system_state commit 60466c067927abbcaff299845abd4b7069963139 upstream. As the emergency restart does not call kernel_restart_prepare(), the system_state stays in SYSTEM_RUNNING. Since bae1d3a05a8b, this hinders i2c_in_atomic_xfer_mode() from becoming active, and therefore might lead to avoidable warnings in the restart handlers, e.g.: [ 12.667612] WARNING: CPU: 1 PID: 1 at kernel/rcu/tree_plugin.h:318 rcu_note_context_switch+0x33c/0x6b0 [ 12.676926] Voluntary context switch within RCU read-side critical section! ... [ 12.742376] schedule_timeout from wait_for_completion_timeout+0x90/0x114 [ 12.749179] wait_for_completion_timeout from tegra_i2c_wait_completion+0x40/0x70 ... [ 12.994527] atomic_notifier_call_chain from machine_restart+0x34/0x58 [ 13.001050] machine_restart from panic+0x2a8/0x32c Avoid these by setting the correct system_state. Fixes: bae1d3a05a8b ("i2c: core: remove use of in_atomic()") Cc: stable@vger.kernel.org # v5.2+ Reviewed-by: Dmitry Osipenko Tested-by: Nishanth Menon Signed-off-by: Benjamin Bara Link: https://lore.kernel.org/r/20230327-tegra-pmic-reboot-v7-1-18699d5dcd76@skidata.com Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- kernel/reboot.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/reboot.c b/kernel/reboot.c index 3bba88c7ffc6..6ebef11c8876 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -74,6 +74,7 @@ void __weak (*pm_power_off)(void); void emergency_restart(void) { kmsg_dump(KMSG_DUMP_EMERG); + system_state = SYSTEM_RESTART; machine_emergency_restart(); } EXPORT_SYMBOL_GPL(emergency_restart); From 08c52a25fa57eb48f53647217036357ecda42e62 Mon Sep 17 00:00:00 2001 From: Ben Wolsieffer Date: Tue, 31 Oct 2023 16:22:36 -0400 Subject: [PATCH 0363/1397] scripts/gdb/vmalloc: disable on no-MMU commit 6620999f0d41e4fd6f047727936a964c3399d249 upstream. vmap_area does not exist on no-MMU, therefore the GDB scripts fail to load: Traceback (most recent call last): File "<...>/vmlinux-gdb.py", line 51, in import linux.vmalloc File "<...>/scripts/gdb/linux/vmalloc.py", line 14, in vmap_area_ptr_type = vmap_area_type.get_type().pointer() ^^^^^^^^^^^^^^^^^^^^^^^^^ File "<...>/scripts/gdb/linux/utils.py", line 28, in get_type self._type = gdb.lookup_type(self._name) ^^^^^^^^^^^^^^^^^^^^^^^^^^^ gdb.error: No struct type named vmap_area. To fix this, disable the command and add an informative error message if CONFIG_MMU is not defined, following the example of lx-slabinfo. Link: https://lkml.kernel.org/r/20231031202235.2655333-2-ben.wolsieffer@hefring.com Fixes: 852622bf3616 ("scripts/gdb/vmalloc: add vmallocinfo support") Signed-off-by: Ben Wolsieffer Cc: Jan Kiszka Cc: Kieran Bingham Cc: Kuan-Ying Lee Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- scripts/gdb/linux/constants.py.in | 1 + scripts/gdb/linux/vmalloc.py | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/scripts/gdb/linux/constants.py.in b/scripts/gdb/linux/constants.py.in index 04c87b570aab..e810e0c27ff1 100644 --- a/scripts/gdb/linux/constants.py.in +++ b/scripts/gdb/linux/constants.py.in @@ -158,3 +158,4 @@ LX_CONFIG(CONFIG_STACKDEPOT) LX_CONFIG(CONFIG_PAGE_OWNER) LX_CONFIG(CONFIG_SLUB_DEBUG) LX_CONFIG(CONFIG_SLAB_FREELIST_HARDENED) +LX_CONFIG(CONFIG_MMU) diff --git a/scripts/gdb/linux/vmalloc.py b/scripts/gdb/linux/vmalloc.py index 48e4a4fae7bb..d3c8a0274d1e 100644 --- a/scripts/gdb/linux/vmalloc.py +++ b/scripts/gdb/linux/vmalloc.py @@ -10,8 +10,9 @@ import re from linux import lists, utils, stackdepot, constants, mm -vmap_area_type = utils.CachedType('struct vmap_area') -vmap_area_ptr_type = vmap_area_type.get_type().pointer() +if constants.LX_CONFIG_MMU: + vmap_area_type = utils.CachedType('struct vmap_area') + vmap_area_ptr_type = vmap_area_type.get_type().pointer() def is_vmalloc_addr(x): pg_ops = mm.page_ops().ops @@ -25,6 +26,9 @@ def __init__(self): super(LxVmallocInfo, self).__init__("lx-vmallocinfo", gdb.COMMAND_DATA) def invoke(self, arg, from_tty): + if not constants.LX_CONFIG_MMU: + raise gdb.GdbError("Requires MMU support") + vmap_area_list = gdb.parse_and_eval('vmap_area_list') for vmap_area in lists.list_for_each_entry(vmap_area_list, vmap_area_ptr_type, "list"): if not vmap_area['vm']: From cc64f5ea94442d8fb663ead545ccfb6976cc2a11 Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Wed, 13 Sep 2023 16:12:47 -0400 Subject: [PATCH 0364/1397] fs: use nth_page() in place of direct struct page manipulation commit 8db0ec791f7788cd21e7f91ee5ff42c1c458d0e7 upstream. When dealing with hugetlb pages, struct page is not guaranteed to be contiguous on SPARSEMEM without VMEMMAP. Use nth_page() to handle it properly. Without the fix, a wrong subpage might be checked for HWPoison, causing wrong number of bytes of a page copied to user space. No bug is reported. The fix comes from code inspection. Link: https://lkml.kernel.org/r/20230913201248.452081-5-zi.yan@sent.com Fixes: 38c1ddbde6c6 ("hugetlbfs: improve read HWPOISON hugepage") Signed-off-by: Zi Yan Reviewed-by: Muchun Song Cc: David Hildenbrand Cc: Matthew Wilcox (Oracle) Cc: Mike Kravetz Cc: Mike Rapoport (IBM) Cc: Thomas Bogendoerfer Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/hugetlbfs/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 316c4cebd3f3..60fce26ff937 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -295,7 +295,7 @@ static size_t adjust_range_hwpoison(struct page *page, size_t offset, size_t byt size_t res = 0; /* First subpage to start the loop. */ - page += offset / PAGE_SIZE; + page = nth_page(page, offset / PAGE_SIZE); offset %= PAGE_SIZE; while (1) { if (is_raw_hwpoison_page_in_hugepage(page)) @@ -309,7 +309,7 @@ static size_t adjust_range_hwpoison(struct page *page, size_t offset, size_t byt break; offset += n; if (offset == PAGE_SIZE) { - page++; + page = nth_page(page, 1); offset = 0; } } From df5062f0815ab3b2477cc146ec9a71f4686340e8 Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Wed, 13 Sep 2023 16:12:48 -0400 Subject: [PATCH 0365/1397] mips: use nth_page() in place of direct struct page manipulation commit aa5fe31b6b59210cb4ea28a59e68781f48eeca74 upstream. __flush_dcache_pages() is called during hugetlb migration via migrate_pages() -> migrate_hugetlbs() -> unmap_and_move_huge_page() -> move_to_new_folio() -> flush_dcache_folio(). And with hugetlb and without sparsemem vmemmap, struct page is not guaranteed to be contiguous beyond a section. Use nth_page() instead. Without the fix, a wrong address might be used for data cache page flush. No bug is reported. The fix comes from code inspection. Link: https://lkml.kernel.org/r/20230913201248.452081-6-zi.yan@sent.com Fixes: 15fa3e8e3269 ("mips: implement the new page table range API") Signed-off-by: Zi Yan Cc: David Hildenbrand Cc: Matthew Wilcox (Oracle) Cc: Mike Kravetz Cc: Mike Rapoport (IBM) Cc: Muchun Song Cc: Thomas Bogendoerfer Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- arch/mips/mm/cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c index 02042100e267..7f830634dbe7 100644 --- a/arch/mips/mm/cache.c +++ b/arch/mips/mm/cache.c @@ -117,7 +117,7 @@ void __flush_dcache_pages(struct page *page, unsigned int nr) * get faulted into the tlb (and thus flushed) anyways. */ for (i = 0; i < nr; i++) { - addr = (unsigned long)kmap_local_page(page + i); + addr = (unsigned long)kmap_local_page(nth_page(page, i)); flush_data_cache_page(addr); kunmap_local((void *)addr); } From 3473cf43b9068b9dfef2f545f833f33c6a544b91 Mon Sep 17 00:00:00 2001 From: Benjamin Bara Date: Sat, 15 Jul 2023 09:53:24 +0200 Subject: [PATCH 0366/1397] i2c: core: Run atomic i2c xfer when !preemptible commit aa49c90894d06e18a1ee7c095edbd2f37c232d02 upstream. Since bae1d3a05a8b, i2c transfers are non-atomic if preemption is disabled. However, non-atomic i2c transfers require preemption (e.g. in wait_for_completion() while waiting for the DMA). panic() calls preempt_disable_notrace() before calling emergency_restart(). Therefore, if an i2c device is used for the restart, the xfer should be atomic. This avoids warnings like: [ 12.667612] WARNING: CPU: 1 PID: 1 at kernel/rcu/tree_plugin.h:318 rcu_note_context_switch+0x33c/0x6b0 [ 12.676926] Voluntary context switch within RCU read-side critical section! ... [ 12.742376] schedule_timeout from wait_for_completion_timeout+0x90/0x114 [ 12.749179] wait_for_completion_timeout from tegra_i2c_wait_completion+0x40/0x70 ... [ 12.994527] atomic_notifier_call_chain from machine_restart+0x34/0x58 [ 13.001050] machine_restart from panic+0x2a8/0x32c Use !preemptible() instead, which is basically the same check as pre-v5.2. Fixes: bae1d3a05a8b ("i2c: core: remove use of in_atomic()") Cc: stable@vger.kernel.org # v5.2+ Suggested-by: Dmitry Osipenko Acked-by: Wolfram Sang Reviewed-by: Dmitry Osipenko Tested-by: Nishanth Menon Signed-off-by: Benjamin Bara Link: https://lore.kernel.org/r/20230327-tegra-pmic-reboot-v7-2-18699d5dcd76@skidata.com Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/i2c-core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/i2c-core.h b/drivers/i2c/i2c-core.h index 1247e6e6e975..05b8b8dfa9bd 100644 --- a/drivers/i2c/i2c-core.h +++ b/drivers/i2c/i2c-core.h @@ -29,7 +29,7 @@ int i2c_dev_irq_from_resources(const struct resource *resources, */ static inline bool i2c_in_atomic_xfer_mode(void) { - return system_state > SYSTEM_RUNNING && irqs_disabled(); + return system_state > SYSTEM_RUNNING && !preemptible(); } static inline int __i2c_lock_bus_helper(struct i2c_adapter *adap) From 4c004ed95273f22228ce67147f4a5615e7a62845 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 11 Jul 2023 17:13:34 +0800 Subject: [PATCH 0367/1397] selftests/clone3: Fix broken test under !CONFIG_TIME_NS commit fc7f04dc23db50206bee7891516ed4726c3f64cf upstream. When execute the following command to test clone3 under !CONFIG_TIME_NS: # make headers && cd tools/testing/selftests/clone3 && make && ./clone3 we can see the following error info: # [7538] Trying clone3() with flags 0x80 (size 0) # Invalid argument - Failed to create new process # [7538] clone3() with flags says: -22 expected 0 not ok 18 [7538] Result (-22) is different than expected (0) ... # Totals: pass:18 fail:1 xfail:0 xpass:0 skip:0 error:0 This is because if CONFIG_TIME_NS is not set, but the flag CLONE_NEWTIME (0x80) is used to clone a time namespace, it will return -EINVAL in copy_time_ns(). If kernel does not support CONFIG_TIME_NS, /proc/self/ns/time will be not exist, and then we should skip clone3() test with CLONE_NEWTIME. With this patch under !CONFIG_TIME_NS: # make headers && cd tools/testing/selftests/clone3 && make && ./clone3 ... # Time namespaces are not supported ok 18 # SKIP Skipping clone3() with CLONE_NEWTIME ... # Totals: pass:18 fail:0 xfail:0 xpass:0 skip:1 error:0 Link: https://lkml.kernel.org/r/1689066814-13295-1-git-send-email-yangtiezhu@loongson.cn Fixes: 515bddf0ec41 ("selftests/clone3: test clone3 with CLONE_NEWTIME") Signed-off-by: Tiezhu Yang Suggested-by: Thomas Gleixner Cc: Christian Brauner Cc: Shuah Khan Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/clone3/clone3.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/clone3/clone3.c b/tools/testing/selftests/clone3/clone3.c index e60cf4da8fb0..1c61e3c022cb 100644 --- a/tools/testing/selftests/clone3/clone3.c +++ b/tools/testing/selftests/clone3/clone3.c @@ -196,7 +196,12 @@ int main(int argc, char *argv[]) CLONE3_ARGS_NO_TEST); /* Do a clone3() in a new time namespace */ - test_clone3(CLONE_NEWTIME, 0, 0, CLONE3_ARGS_NO_TEST); + if (access("/proc/self/ns/time", F_OK) == 0) { + test_clone3(CLONE_NEWTIME, 0, 0, CLONE3_ARGS_NO_TEST); + } else { + ksft_print_msg("Time namespaces are not supported\n"); + ksft_test_result_skip("Skipping clone3() with CLONE_NEWTIME\n"); + } /* Do a clone3() with exit signal (SIGCHLD) in flags */ test_clone3(SIGCHLD, 0, -EINVAL, CLONE3_ARGS_NO_TEST); From 707e483e711614e2ca4cf7bd73d983564cae2ccd Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 31 Oct 2023 15:10:33 -0400 Subject: [PATCH 0368/1397] tracing: Have the user copy of synthetic event address use correct context commit 4f7969bcd6d33042d62e249b41b5578161e4c868 upstream. A synthetic event is created by the synthetic event interface that can read both user or kernel address memory. In reality, it reads any arbitrary memory location from within the kernel. If the address space is in USER (where CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE is set) then it uses strncpy_from_user_nofault() to copy strings otherwise it uses strncpy_from_kernel_nofault(). But since both functions use the same variable there's no annotation to what that variable is (ie. __user). This makes sparse complain. Quiet sparse by typecasting the strncpy_from_user_nofault() variable to a __user pointer. Link: https://lore.kernel.org/linux-trace-kernel/20231031151033.73c42e23@gandalf.local.home Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Fixes: 0934ae9977c2 ("tracing: Fix reading strings from synthetic events"); Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202311010013.fm8WTxa5-lkp@intel.com/ Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events_synth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index 14cb275a0bab..846e02c0fb59 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -452,7 +452,7 @@ static unsigned int trace_string(struct synth_trace_event *entry, #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE if ((unsigned long)str_val < TASK_SIZE) - ret = strncpy_from_user_nofault(str_field, str_val, STR_VAR_LEN_MAX); + ret = strncpy_from_user_nofault(str_field, (const void __user *)str_val, STR_VAR_LEN_MAX); else #endif ret = strncpy_from_kernel_nofault(str_field, str_val, STR_VAR_LEN_MAX); From e4eccf228ac2c050d17af9a7bc6457a4d4e9116b Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Tue, 17 Oct 2023 18:38:50 -0700 Subject: [PATCH 0369/1397] driver core: Release all resources during unbind before updating device links MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2e84dc37920012b458e9458b19fc4ed33f81bc74 upstream. This commit fixes a bug in commit 9ed9895370ae ("driver core: Functional dependencies tracking support") where the device link status was incorrectly updated in the driver unbind path before all the device's resources were released. Fixes: 9ed9895370ae ("driver core: Functional dependencies tracking support") Cc: stable Reported-by: Uwe Kleine-König Closes: https://lore.kernel.org/all/20231014161721.f4iqyroddkcyoefo@pengutronix.de/ Signed-off-by: Saravana Kannan Cc: Thierry Reding Cc: Yang Yingliang Cc: Andy Shevchenko Cc: Mark Brown Cc: Matti Vaittinen Cc: James Clark Acked-by: "Rafael J. Wysocki" Tested-by: Uwe Kleine-König Acked-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20231018013851.3303928-1-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/dd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index a528cec24264..0c3725c3eefa 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -1274,8 +1274,8 @@ static void __device_release_driver(struct device *dev, struct device *parent) if (dev->bus && dev->bus->dma_cleanup) dev->bus->dma_cleanup(dev); - device_links_driver_cleanup(dev); device_unbind_cleanup(dev); + device_links_driver_cleanup(dev); klist_remove(&dev->p->knode_driver); device_pm_check_callbacks(dev); From 07f2f69f0b82578da6bda5f10320ddfe0be43d96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sanju=C3=A1n=20Garc=C3=ADa=2C=20Jorge?= Date: Thu, 19 Oct 2023 14:15:34 +0000 Subject: [PATCH 0370/1397] mcb: fix error handling for different scenarios when parsing commit 63ba2d07b4be72b94216d20561f43e1150b25d98 upstream. chameleon_parse_gdd() may fail for different reasons and end up in the err tag. Make sure we at least always free the mcb_device allocated with mcb_alloc_dev(). If mcb_device_register() fails, make sure to give up the reference in the same place the device was added. Fixes: 728ac3389296 ("mcb: mcb-parse: fix error handing in chameleon_parse_gdd()") Cc: stable Reviewed-by: Jose Javier Rodriguez Barbarin Signed-off-by: Jorge Sanjuan Garcia Link: https://lore.kernel.org/r/20231019141434.57971-2-jorge.sanjuangarcia@duagon.com Signed-off-by: Greg Kroah-Hartman --- drivers/mcb/mcb-core.c | 1 + drivers/mcb/mcb-parse.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mcb/mcb-core.c b/drivers/mcb/mcb-core.c index 0cac5bead84f..d4eec0900980 100644 --- a/drivers/mcb/mcb-core.c +++ b/drivers/mcb/mcb-core.c @@ -246,6 +246,7 @@ int mcb_device_register(struct mcb_bus *bus, struct mcb_device *dev) return 0; out: + put_device(&dev->dev); return ret; } diff --git a/drivers/mcb/mcb-parse.c b/drivers/mcb/mcb-parse.c index 656b6b71c768..1ae37e693de0 100644 --- a/drivers/mcb/mcb-parse.c +++ b/drivers/mcb/mcb-parse.c @@ -106,7 +106,7 @@ static int chameleon_parse_gdd(struct mcb_bus *bus, return 0; err: - put_device(&mdev->dev); + mcb_free_dev(mdev); return ret; } From d356d3379d8782f0b6db324cd6e01e718ea8fe2e Mon Sep 17 00:00:00 2001 From: Alain Volmat Date: Mon, 9 Oct 2023 10:24:50 +0200 Subject: [PATCH 0371/1397] dmaengine: stm32-mdma: correct desc prep when channel running commit 03f25d53b145bc2f7ccc82fc04e4482ed734f524 upstream. In case of the prep descriptor while the channel is already running, the CCR register value stored into the channel could already have its EN bit set. This would lead to a bad transfer since, at start transfer time, enabling the channel while other registers aren't yet properly set. To avoid this, ensure to mask the CCR_EN bit when storing the ccr value into the mdma channel structure. Fixes: a4ffb13c8946 ("dmaengine: Add STM32 MDMA driver") Signed-off-by: Alain Volmat Signed-off-by: Amelie Delaunay Cc: stable@vger.kernel.org Tested-by: Alain Volmat Link: https://lore.kernel.org/r/20231009082450.452877-1-amelie.delaunay@foss.st.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/stm32-mdma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c index bae08b3f55c7..f414efdbd809 100644 --- a/drivers/dma/stm32-mdma.c +++ b/drivers/dma/stm32-mdma.c @@ -489,7 +489,7 @@ static int stm32_mdma_set_xfer_param(struct stm32_mdma_chan *chan, src_maxburst = chan->dma_config.src_maxburst; dst_maxburst = chan->dma_config.dst_maxburst; - ccr = stm32_mdma_read(dmadev, STM32_MDMA_CCR(chan->id)); + ccr = stm32_mdma_read(dmadev, STM32_MDMA_CCR(chan->id)) & ~STM32_MDMA_CCR_EN; ctcr = stm32_mdma_read(dmadev, STM32_MDMA_CTCR(chan->id)); ctbr = stm32_mdma_read(dmadev, STM32_MDMA_CTBR(chan->id)); @@ -965,7 +965,7 @@ stm32_mdma_prep_dma_memcpy(struct dma_chan *c, dma_addr_t dest, dma_addr_t src, if (!desc) return NULL; - ccr = stm32_mdma_read(dmadev, STM32_MDMA_CCR(chan->id)); + ccr = stm32_mdma_read(dmadev, STM32_MDMA_CCR(chan->id)) & ~STM32_MDMA_CCR_EN; ctcr = stm32_mdma_read(dmadev, STM32_MDMA_CTCR(chan->id)); ctbr = stm32_mdma_read(dmadev, STM32_MDMA_CTBR(chan->id)); cbndtr = stm32_mdma_read(dmadev, STM32_MDMA_CBNDTR(chan->id)); From 8847617e2f0a0344ff88147a2cd89a42bd309ed8 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 17 Oct 2023 21:07:04 +0200 Subject: [PATCH 0372/1397] s390/mm: add missing arch_set_page_dat() call to vmem_crst_alloc() commit 09cda0a400519b1541591c506e54c9c48e3101bf upstream. If the cmma no-dat feature is available all pages that are not used for dynamic address translation are marked as "no-dat" with the ESSA instruction. This information is visible to the hypervisor, so that the hypervisor can optimize purging of guest TLB entries. This also means that pages which are used for dynamic address translation must not be marked as "no-dat", since the hypervisor may then incorrectly not purge guest TLB entries. Region and segment tables allocated via vmem_crst_alloc() are incorrectly marked as "no-dat", as soon as slab_is_available() returns true. Such tables are allocated e.g. when kernel page tables are split, memory is hotplugged, or a DCSS segment is loaded. Fix this by adding the missing arch_set_page_dat() call. Cc: Reviewed-by: Claudio Imbrenda Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Greg Kroah-Hartman --- arch/s390/mm/vmem.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 6957d2ed97bf..6d276103c6d5 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -45,8 +46,11 @@ void *vmem_crst_alloc(unsigned long val) unsigned long *table; table = vmem_alloc_pages(CRST_ALLOC_ORDER); - if (table) - crst_table_init(table, val); + if (!table) + return NULL; + crst_table_init(table, val); + if (slab_is_available()) + arch_set_page_dat(virt_to_page(table), CRST_ALLOC_ORDER); return table; } From bfabe8d0c1c106f6567f5688f29fd4cee1675d6c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 20 Oct 2023 17:26:50 +0200 Subject: [PATCH 0373/1397] s390/mm: add missing arch_set_page_dat() call to gmap allocations commit 1954da4a2b621a3328a63382cae7e5f5e2af502c upstream. If the cmma no-dat feature is available all pages that are not used for dynamic address translation are marked as "no-dat" with the ESSA instruction. This information is visible to the hypervisor, so that the hypervisor can optimize purging of guest TLB entries. This also means that pages which are used for dynamic address translation must not be marked as "no-dat", since the hypervisor may then incorrectly not purge guest TLB entries. Region, segment, and page tables allocated within the gmap code are incorrectly marked as "no-dat", since an explicit call to arch_set_page_dat() is missing, which would remove the "no-dat" mark. In order to fix this add a new gmap_alloc_crst() function which should be used to allocate region and segment tables, and which also calls arch_set_page_dat(). Also add the arch_set_page_dat() call to page_table_alloc_pgste(). Cc: Reviewed-by: Claudio Imbrenda Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Greg Kroah-Hartman --- arch/s390/mm/gmap.c | 24 ++++++++++++++++++------ arch/s390/mm/pgalloc.c | 1 + 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 906a7bfc2a78..20786f6883b2 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -21,10 +21,22 @@ #include #include +#include #include #define GMAP_SHADOW_FAKE_TABLE 1ULL +static struct page *gmap_alloc_crst(void) +{ + struct page *page; + + page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); + if (!page) + return NULL; + arch_set_page_dat(page, CRST_ALLOC_ORDER); + return page; +} + /** * gmap_alloc - allocate and initialize a guest address space * @limit: maximum address of the gmap address space @@ -67,7 +79,7 @@ static struct gmap *gmap_alloc(unsigned long limit) spin_lock_init(&gmap->guest_table_lock); spin_lock_init(&gmap->shadow_lock); refcount_set(&gmap->ref_count, 1); - page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); + page = gmap_alloc_crst(); if (!page) goto out_free; page->index = 0; @@ -308,7 +320,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, unsigned long *new; /* since we dont free the gmap table until gmap_free we can unlock */ - page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); + page = gmap_alloc_crst(); if (!page) return -ENOMEM; new = page_to_virt(page); @@ -1759,7 +1771,7 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, BUG_ON(!gmap_is_shadow(sg)); /* Allocate a shadow region second table */ - page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); + page = gmap_alloc_crst(); if (!page) return -ENOMEM; page->index = r2t & _REGION_ENTRY_ORIGIN; @@ -1843,7 +1855,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, BUG_ON(!gmap_is_shadow(sg)); /* Allocate a shadow region second table */ - page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); + page = gmap_alloc_crst(); if (!page) return -ENOMEM; page->index = r3t & _REGION_ENTRY_ORIGIN; @@ -1927,7 +1939,7 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, BUG_ON(!gmap_is_shadow(sg) || (sgt & _REGION3_ENTRY_LARGE)); /* Allocate a shadow segment table */ - page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); + page = gmap_alloc_crst(); if (!page) return -ENOMEM; page->index = sgt & _REGION_ENTRY_ORIGIN; @@ -2855,7 +2867,7 @@ int s390_replace_asce(struct gmap *gmap) if ((gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT) return -EINVAL; - page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); + page = gmap_alloc_crst(); if (!page) return -ENOMEM; page->index = 0; diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 07fc660a24aa..6396d6b06a3a 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -146,6 +146,7 @@ struct page *page_table_alloc_pgste(struct mm_struct *mm) ptdesc = pagetable_alloc(GFP_KERNEL, 0); if (ptdesc) { table = (u64 *)ptdesc_to_virt(ptdesc); + arch_set_page_dat(virt_to_page(table), 0); memset64(table, _PAGE_INVALID, PTRS_PER_PTE); memset64(table + PTRS_PER_PTE, 0, PTRS_PER_PTE); } From 7453e81061a492085f00e9d3bcb26a95217577d9 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 24 Oct 2023 10:15:19 +0200 Subject: [PATCH 0374/1397] s390/cmma: fix detection of DAT pages commit 44d93045247661acbd50b1629e62f415f2747577 upstream. If the cmma no-dat feature is available the kernel page tables are walked to identify and mark all pages which are used for address translation (all region, segment, and page tables). In a subsequent loop all other pages are marked as "no-dat" pages with the ESSA instruction. This information is visible to the hypervisor, so that the hypervisor can optimize purging of guest TLB entries. The initial loop however is incorrect: only the first three of the four pages which belong to segment and region tables will be marked as being used for DAT. The last page is incorrectly marked as no-dat. This can result in incorrect guest TLB flushes. Fix this by simply marking all four pages. Cc: Reviewed-by: Claudio Imbrenda Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Greg Kroah-Hartman --- arch/s390/mm/page-states.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c index 1e2ea706aa22..00e7b0876dc5 100644 --- a/arch/s390/mm/page-states.c +++ b/arch/s390/mm/page-states.c @@ -121,7 +121,7 @@ static void mark_kernel_pud(p4d_t *p4d, unsigned long addr, unsigned long end) continue; if (!pud_folded(*pud)) { page = phys_to_page(pud_val(*pud)); - for (i = 0; i < 3; i++) + for (i = 0; i < 4; i++) set_bit(PG_arch_1, &page[i].flags); } mark_kernel_pmd(pud, addr, next); @@ -142,7 +142,7 @@ static void mark_kernel_p4d(pgd_t *pgd, unsigned long addr, unsigned long end) continue; if (!p4d_folded(*p4d)) { page = phys_to_page(p4d_val(*p4d)); - for (i = 0; i < 3; i++) + for (i = 0; i < 4; i++) set_bit(PG_arch_1, &page[i].flags); } mark_kernel_pud(p4d, addr, next); @@ -164,7 +164,7 @@ static void mark_kernel_pgd(void) continue; if (!pgd_folded(*pgd)) { page = phys_to_page(pgd_val(*pgd)); - for (i = 0; i < 3; i++) + for (i = 0; i < 4; i++) set_bit(PG_arch_1, &page[i].flags); } mark_kernel_p4d(pgd, addr, next); From 6b16a62e509313a45b10a46d6c9ce310bb77c5e5 Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Wed, 13 Sep 2023 16:12:44 -0400 Subject: [PATCH 0375/1397] mm/cma: use nth_page() in place of direct struct page manipulation commit 2e7cfe5cd5b6b0b98abf57a3074885979e187c1c upstream. Patch series "Use nth_page() in place of direct struct page manipulation", v3. On SPARSEMEM without VMEMMAP, struct page is not guaranteed to be contiguous, since each memory section's memmap might be allocated independently. hugetlb pages can go beyond a memory section size, thus direct struct page manipulation on hugetlb pages/subpages might give wrong struct page. Kernel provides nth_page() to do the manipulation properly. Use that whenever code can see hugetlb pages. This patch (of 5): When dealing with hugetlb pages, manipulating struct page pointers directly can get to wrong struct page, since struct page is not guaranteed to be contiguous on SPARSEMEM without VMEMMAP. Use nth_page() to handle it properly. Without the fix, page_kasan_tag_reset() could reset wrong page tags, causing a wrong kasan result. No related bug is reported. The fix comes from code inspection. Link: https://lkml.kernel.org/r/20230913201248.452081-1-zi.yan@sent.com Link: https://lkml.kernel.org/r/20230913201248.452081-2-zi.yan@sent.com Fixes: 2813b9c02962 ("kasan, mm, arm64: tag non slab memory allocated via pagealloc") Signed-off-by: Zi Yan Reviewed-by: Muchun Song Cc: David Hildenbrand Cc: Matthew Wilcox (Oracle) Cc: Mike Kravetz Cc: Mike Rapoport (IBM) Cc: Thomas Bogendoerfer Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/cma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/cma.c b/mm/cma.c index da2967c6a223..2b2494fd6b59 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -505,7 +505,7 @@ struct page *cma_alloc(struct cma *cma, unsigned long count, */ if (page) { for (i = 0; i < count; i++) - page_kasan_tag_reset(page + i); + page_kasan_tag_reset(nth_page(page, i)); } if (ret && !no_warn) { From 18576d128e9bc91aa1eac85e2f93909ebc24ee2a Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Wed, 13 Sep 2023 16:12:45 -0400 Subject: [PATCH 0376/1397] mm/hugetlb: use nth_page() in place of direct struct page manipulation commit 426056efe835cf4864ccf4c328fe3af9146fc539 upstream. When dealing with hugetlb pages, manipulating struct page pointers directly can get to wrong struct page, since struct page is not guaranteed to be contiguous on SPARSEMEM without VMEMMAP. Use nth_page() to handle it properly. A wrong or non-existing page might be tried to be grabbed, either leading to a non freeable page or kernel memory access errors. No bug is reported. It comes from code inspection. Link: https://lkml.kernel.org/r/20230913201248.452081-3-zi.yan@sent.com Fixes: 57a196a58421 ("hugetlb: simplify hugetlb handling in follow_page_mask") Signed-off-by: Zi Yan Reviewed-by: Muchun Song Cc: David Hildenbrand Cc: Matthew Wilcox (Oracle) Cc: Mike Kravetz Cc: Mike Rapoport (IBM) Cc: Thomas Bogendoerfer Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/hugetlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 1301ba7b2c9a..5f0adffeceb1 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6520,7 +6520,7 @@ struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma, } } - page += ((address & ~huge_page_mask(h)) >> PAGE_SHIFT); + page = nth_page(page, ((address & ~huge_page_mask(h)) >> PAGE_SHIFT)); /* * Note that page may be a sub-page, and with vmemmap From 2c1589c7f2ad2dc20f4015027b8e3b80e3dd3367 Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Wed, 13 Sep 2023 16:12:46 -0400 Subject: [PATCH 0377/1397] mm/memory_hotplug: use pfn math in place of direct struct page manipulation commit 1640a0ef80f6d572725f5b0330038c18e98ea168 upstream. When dealing with hugetlb pages, manipulating struct page pointers directly can get to wrong struct page, since struct page is not guaranteed to be contiguous on SPARSEMEM without VMEMMAP. Use pfn calculation to handle it properly. Without the fix, a wrong number of page might be skipped. Since skip cannot be negative, scan_movable_page() will end early and might miss a movable page with -ENOENT. This might fail offline_pages(). No bug is reported. The fix comes from code inspection. Link: https://lkml.kernel.org/r/20230913201248.452081-4-zi.yan@sent.com Fixes: eeb0efd071d8 ("mm,memory_hotplug: fix scan_movable_pages() for gigantic hugepages") Signed-off-by: Zi Yan Reviewed-by: Muchun Song Acked-by: David Hildenbrand Cc: Matthew Wilcox (Oracle) Cc: Mike Kravetz Cc: Mike Rapoport (IBM) Cc: Thomas Bogendoerfer Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/memory_hotplug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 1b03f4ec6fd2..3b301c4023ff 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1689,7 +1689,7 @@ static int scan_movable_pages(unsigned long start, unsigned long end, */ if (HPageMigratable(head)) goto found; - skip = compound_nr(head) - (page - head); + skip = compound_nr(head) - (pfn - page_to_pfn(head)); pfn += skip - 1; } return -ENOENT; From 2500a33323b867ab6229ea2491d800b7b8ddfb64 Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Mon, 28 Aug 2023 17:08:56 +0200 Subject: [PATCH 0378/1397] mm: make PR_MDWE_REFUSE_EXEC_GAIN an unsigned long commit 0da668333fb07805c2836d5d50e26eda915b24a1 upstream. Defining a prctl flag as an int is a footgun because on a 64 bit machine and with a variadic implementation of prctl (like in musl and glibc), when used directly as a prctl argument, it can get casted to long with garbage upper bits which would result in unexpected behaviors. This patch changes the constant to an unsigned long to eliminate that possibilities. This does not break UAPI. I think that a stable backport would be "nice to have": to reduce the chances that users build binaries that could end up with garbage bits in their MDWE prctl arguments. We are not aware of anyone having yet encountered this corner case with MDWE prctls but a backport would reduce the likelihood it happens, since this sort of issues has happened with other prctls. But If this is perceived as a backporting burden, I suppose we could also live without a stable backport. Link: https://lkml.kernel.org/r/20230828150858.393570-5-revest@chromium.org Fixes: b507808ebce2 ("mm: implement memory-deny-write-execute as a prctl") Signed-off-by: Florent Revest Suggested-by: Alexey Izbyshev Reviewed-by: David Hildenbrand Reviewed-by: Kees Cook Acked-by: Catalin Marinas Cc: Anshuman Khandual Cc: Ayush Jain Cc: Greg Thelen Cc: Joey Gouly Cc: KP Singh Cc: Mark Brown Cc: Michal Hocko Cc: Peter Xu Cc: Ryan Roberts Cc: Szabolcs Nagy Cc: Topi Miettinen Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/prctl.h | 2 +- tools/include/uapi/linux/prctl.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 3c36aeade991..9a85c69782bd 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -283,7 +283,7 @@ struct prctl_mm_map { /* Memory deny write / execute */ #define PR_SET_MDWE 65 -# define PR_MDWE_REFUSE_EXEC_GAIN 1 +# define PR_MDWE_REFUSE_EXEC_GAIN (1UL << 0) #define PR_GET_MDWE 66 diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index 3c36aeade991..9a85c69782bd 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -283,7 +283,7 @@ struct prctl_mm_map { /* Memory deny write / execute */ #define PR_SET_MDWE 65 -# define PR_MDWE_REFUSE_EXEC_GAIN 1 +# define PR_MDWE_REFUSE_EXEC_GAIN (1UL << 0) #define PR_GET_MDWE 66 From 63fbcea8d0d9f68583d4f4a0988b44f3171a6d0c Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 20 Oct 2023 22:30:29 +0200 Subject: [PATCH 0379/1397] mtd: cfi_cmdset_0001: Byte swap OTP info commit 565fe150624ee77dc63a735cc1b3bff5101f38a3 upstream. Currently the offset into the device when looking for OTP bits can go outside of the address of the MTD NOR devices, and if that memory isn't readable, bad things happen on the IXP4xx (added prints that illustrate the problem before the crash): cfi_intelext_otp_walk walk OTP on chip 0 start at reg_prot_offset 0x00000100 ixp4xx_copy_from copy from 0x00000100 to 0xc880dd78 cfi_intelext_otp_walk walk OTP on chip 0 start at reg_prot_offset 0x12000000 ixp4xx_copy_from copy from 0x12000000 to 0xc880dd78 8<--- cut here --- Unable to handle kernel paging request at virtual address db000000 [db000000] *pgd=00000000 (...) This happens in this case because the IXP4xx is big endian and the 32- and 16-bit fields in the struct cfi_intelext_otpinfo are not properly byteswapped. Compare to how the code in read_pri_intelext() byteswaps the fields in struct cfi_pri_intelext. Adding a small byte swapping loop for the OTP in read_pri_intelext() and the crash goes away. The problem went unnoticed for many years until I enabled CONFIG_MTD_OTP on the IXP4xx as well, triggering the bug. Cc: stable@vger.kernel.org Reviewed-by: Nicolas Pitre Signed-off-by: Linus Walleij Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20231020-mtd-otp-byteswap-v4-1-0d132c06aa9d@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/chips/cfi_cmdset_0001.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c index 11b06fefaa0e..c10693ba265b 100644 --- a/drivers/mtd/chips/cfi_cmdset_0001.c +++ b/drivers/mtd/chips/cfi_cmdset_0001.c @@ -422,9 +422,25 @@ read_pri_intelext(struct map_info *map, __u16 adr) extra_size = 0; /* Protection Register info */ - if (extp->NumProtectionFields) + if (extp->NumProtectionFields) { + struct cfi_intelext_otpinfo *otp = + (struct cfi_intelext_otpinfo *)&extp->extra[0]; + extra_size += (extp->NumProtectionFields - 1) * - sizeof(struct cfi_intelext_otpinfo); + sizeof(struct cfi_intelext_otpinfo); + + if (extp_size >= sizeof(*extp) + extra_size) { + int i; + + /* Do some byteswapping if necessary */ + for (i = 0; i < extp->NumProtectionFields - 1; i++) { + otp->ProtRegAddr = le32_to_cpu(otp->ProtRegAddr); + otp->FactGroups = le16_to_cpu(otp->FactGroups); + otp->UserGroups = le16_to_cpu(otp->UserGroups); + otp++; + } + } + } } if (extp->MinorVersion >= '1') { From 07ffcd8ec79cf7383e1e45815f4842fd357991c2 Mon Sep 17 00:00:00 2001 From: Jim Harris Date: Wed, 11 Oct 2023 14:51:31 +0000 Subject: [PATCH 0380/1397] cxl/region: Do not try to cleanup after cxl_region_setup_targets() fails commit 0718588c7aaa7a1510b4de972370535b61dddd0d upstream. Commit 5e42bcbc3fef ("cxl/region: decrement ->nr_targets on error in cxl_region_attach()") tried to avoid 'eiw' initialization errors when ->nr_targets exceeded 16, by just decrementing ->nr_targets when cxl_region_setup_targets() failed. Commit 86987c766276 ("cxl/region: Cleanup target list on attach error") extended that cleanup to also clear cxled->pos and p->targets[pos]. The initialization error was incidentally fixed separately by: Commit 8d4285425714 ("cxl/region: Fix port setup uninitialized variable warnings") which was merged a few days after 5e42bcbc3fef. But now the original cleanup when cxl_region_setup_targets() fails prevents endpoint and switch decoder resources from being reused: 1) the cleanup does not set the decoder's region to NULL, which results in future dpa_size_store() calls returning -EBUSY 2) the decoder is not properly freed, which results in future commit errors associated with the upstream switch Now that the initialization errors were fixed separately, the proper cleanup for this case is to just return immediately. Then the resources associated with this target get cleanup up as normal when the failed region is deleted. The ->nr_targets decrement in the error case also helped prevent a p->targets[] array overflow, so add a new check to prevent against that overflow. Tested by trying to create an invalid region for a 2 switch * 2 endpoint topology, and then following up with creating a valid region. Fixes: 5e42bcbc3fef ("cxl/region: decrement ->nr_targets on error in cxl_region_attach()") Cc: Signed-off-by: Jim Harris Reviewed-by: Jonathan Cameron Acked-by: Dan Carpenter Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/169703589120.1202031.14696100866518083806.stgit@bgt-140510-bm03.eng.stellus.in Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/cxl/core/region.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 85c0881fba44..359af0623302 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -1675,6 +1675,12 @@ static int cxl_region_attach(struct cxl_region *cxlr, return -ENXIO; } + if (p->nr_targets >= p->interleave_ways) { + dev_dbg(&cxlr->dev, "region already has %d endpoints\n", + p->nr_targets); + return -EINVAL; + } + ep_port = cxled_to_port(cxled); root_port = cxlrd_to_port(cxlrd); dport = cxl_find_dport_by_dev(root_port, ep_port->host_bridge); @@ -1767,7 +1773,7 @@ static int cxl_region_attach(struct cxl_region *cxlr, if (p->nr_targets == p->interleave_ways) { rc = cxl_region_setup_targets(cxlr); if (rc) - goto err_decrement; + return rc; p->state = CXL_CONFIG_ACTIVE; } @@ -1799,12 +1805,6 @@ static int cxl_region_attach(struct cxl_region *cxlr, } return 0; - -err_decrement: - p->nr_targets--; - cxled->pos = -1; - p->targets[pos] = NULL; - return rc; } static int cxl_region_detach(struct cxl_endpoint_decoder *cxled) From 9f8f73391b3102522fe7c2eb301f4f3816f7c130 Mon Sep 17 00:00:00 2001 From: Joshua Yeong Date: Wed, 13 Sep 2023 11:17:45 +0800 Subject: [PATCH 0381/1397] i3c: master: cdns: Fix reading status register commit 4bd8405257da717cd556f99e5fb68693d12c9766 upstream. IBIR_DEPTH and CMDR_DEPTH should read from status0 instead of status1. Cc: stable@vger.kernel.org Fixes: 603f2bee2c54 ("i3c: master: Add driver for Cadence IP") Signed-off-by: Joshua Yeong Reviewed-by: Miquel Raynal Link: https://lore.kernel.org/r/20230913031743.11439-2-joshua.yeong@starfivetech.com Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/i3c/master/i3c-master-cdns.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/i3c/master/i3c-master-cdns.c b/drivers/i3c/master/i3c-master-cdns.c index 49551db71bc9..8f1fda3c7ac5 100644 --- a/drivers/i3c/master/i3c-master-cdns.c +++ b/drivers/i3c/master/i3c-master-cdns.c @@ -191,7 +191,7 @@ #define SLV_STATUS1_HJ_DIS BIT(18) #define SLV_STATUS1_MR_DIS BIT(17) #define SLV_STATUS1_PROT_ERR BIT(16) -#define SLV_STATUS1_DA(x) (((s) & GENMASK(15, 9)) >> 9) +#define SLV_STATUS1_DA(s) (((s) & GENMASK(15, 9)) >> 9) #define SLV_STATUS1_HAS_DA BIT(8) #define SLV_STATUS1_DDR_RX_FULL BIT(7) #define SLV_STATUS1_DDR_TX_FULL BIT(6) @@ -1623,13 +1623,13 @@ static int cdns_i3c_master_probe(struct platform_device *pdev) /* Device ID0 is reserved to describe this master. */ master->maxdevs = CONF_STATUS0_DEVS_NUM(val); master->free_rr_slots = GENMASK(master->maxdevs, 1); + master->caps.ibirfifodepth = CONF_STATUS0_IBIR_DEPTH(val); + master->caps.cmdrfifodepth = CONF_STATUS0_CMDR_DEPTH(val); val = readl(master->regs + CONF_STATUS1); master->caps.cmdfifodepth = CONF_STATUS1_CMD_DEPTH(val); master->caps.rxfifodepth = CONF_STATUS1_RX_DEPTH(val); master->caps.txfifodepth = CONF_STATUS1_TX_DEPTH(val); - master->caps.ibirfifodepth = CONF_STATUS0_IBIR_DEPTH(val); - master->caps.cmdrfifodepth = CONF_STATUS0_CMDR_DEPTH(val); spin_lock_init(&master->ibi.lock); master->ibi.num_slots = CONF_STATUS1_IBI_HW_RES(val); From 3188677d9aa0e9ae58c4a2cc84dc27b275cf4499 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Mon, 23 Oct 2023 12:16:53 -0400 Subject: [PATCH 0382/1397] i3c: master: svc: fix race condition in ibi work thread commit 6bf3fc268183816856c96b8794cd66146bc27b35 upstream. The ibi work thread operates asynchronously with other transfers, such as svc_i3c_master_priv_xfers(). Introduce mutex protection to ensure the completion of the entire i3c/i2c transaction. Fixes: dd3c52846d59 ("i3c: master: svc: Add Silvaco I3C master driver") Cc: Reviewed-by: Miquel Raynal Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20231023161658.3890811-2-Frank.Li@nxp.com Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/i3c/master/svc-i3c-master.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/i3c/master/svc-i3c-master.c b/drivers/i3c/master/svc-i3c-master.c index 8f8295acdadb..87f819915c55 100644 --- a/drivers/i3c/master/svc-i3c-master.c +++ b/drivers/i3c/master/svc-i3c-master.c @@ -175,6 +175,7 @@ struct svc_i3c_regs_save { * @ibi.slots: Available IBI slots * @ibi.tbq_slot: To be queued IBI slot * @ibi.lock: IBI lock + * @lock: Transfer lock, protect between IBI work thread and callbacks from master */ struct svc_i3c_master { struct i3c_master_controller base; @@ -203,6 +204,7 @@ struct svc_i3c_master { /* Prevent races within IBI handlers */ spinlock_t lock; } ibi; + struct mutex lock; }; /** @@ -384,6 +386,7 @@ static void svc_i3c_master_ibi_work(struct work_struct *work) u32 status, val; int ret; + mutex_lock(&master->lock); /* Acknowledge the incoming interrupt with the AUTOIBI mechanism */ writel(SVC_I3C_MCTRL_REQUEST_AUTO_IBI | SVC_I3C_MCTRL_IBIRESP_AUTO, @@ -460,6 +463,7 @@ static void svc_i3c_master_ibi_work(struct work_struct *work) reenable_ibis: svc_i3c_master_enable_interrupts(master, SVC_I3C_MINT_SLVSTART); + mutex_unlock(&master->lock); } static irqreturn_t svc_i3c_master_irq_handler(int irq, void *dev_id) @@ -1204,9 +1208,11 @@ static int svc_i3c_master_send_bdcast_ccc_cmd(struct svc_i3c_master *master, cmd->read_len = 0; cmd->continued = false; + mutex_lock(&master->lock); svc_i3c_master_enqueue_xfer(master, xfer); if (!wait_for_completion_timeout(&xfer->comp, msecs_to_jiffies(1000))) svc_i3c_master_dequeue_xfer(master, xfer); + mutex_unlock(&master->lock); ret = xfer->ret; kfree(buf); @@ -1250,9 +1256,11 @@ static int svc_i3c_master_send_direct_ccc_cmd(struct svc_i3c_master *master, cmd->read_len = read_len; cmd->continued = false; + mutex_lock(&master->lock); svc_i3c_master_enqueue_xfer(master, xfer); if (!wait_for_completion_timeout(&xfer->comp, msecs_to_jiffies(1000))) svc_i3c_master_dequeue_xfer(master, xfer); + mutex_unlock(&master->lock); if (cmd->read_len != xfer_len) ccc->dests[0].payload.len = cmd->read_len; @@ -1309,9 +1317,11 @@ static int svc_i3c_master_priv_xfers(struct i3c_dev_desc *dev, cmd->continued = (i + 1) < nxfers; } + mutex_lock(&master->lock); svc_i3c_master_enqueue_xfer(master, xfer); if (!wait_for_completion_timeout(&xfer->comp, msecs_to_jiffies(1000))) svc_i3c_master_dequeue_xfer(master, xfer); + mutex_unlock(&master->lock); ret = xfer->ret; svc_i3c_master_free_xfer(xfer); @@ -1347,9 +1357,11 @@ static int svc_i3c_master_i2c_xfers(struct i2c_dev_desc *dev, cmd->continued = (i + 1 < nxfers); } + mutex_lock(&master->lock); svc_i3c_master_enqueue_xfer(master, xfer); if (!wait_for_completion_timeout(&xfer->comp, msecs_to_jiffies(1000))) svc_i3c_master_dequeue_xfer(master, xfer); + mutex_unlock(&master->lock); ret = xfer->ret; svc_i3c_master_free_xfer(xfer); @@ -1540,6 +1552,8 @@ static int svc_i3c_master_probe(struct platform_device *pdev) INIT_WORK(&master->hj_work, svc_i3c_master_hj_work); INIT_WORK(&master->ibi_work, svc_i3c_master_ibi_work); + mutex_init(&master->lock); + ret = devm_request_irq(dev, master->irq, svc_i3c_master_irq_handler, IRQF_NO_SUSPEND, "svc-i3c-irq", master); if (ret) From eaf992baaceacbc6e479e5b6085106d7bb3062e1 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Mon, 23 Oct 2023 12:16:54 -0400 Subject: [PATCH 0383/1397] i3c: master: svc: fix wrong data return when IBI happen during start frame MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5e5e3c92e748a6d859190e123b9193cf4911fcca upstream. ┌─────┐ ┏──┐ ┏──┐ ┏──┐ ┏──┐ ┏──┐ ┏──┐ ┏──┐ ┏──┐ ┌───── SCL: ┘ └─────┛ └──┛ └──┛ └──┛ └──┛ └──┛ └──┛ └──┛ └──┘ ───┐ ┌─────┐ ┌─────┐ ┌───────────┐ SDA: └───────────────────────┘ └─────┘ └─────┘ └───── xxx╱ ╲╱ ╲╱ ╲╱ ╲╱ ╲ : xxx╲IBI ╱╲ Addr(0x0a) ╱╲ RW ╱╲NACK╱╲ S ╱ If an In-Band Interrupt (IBI) occurs and IBI work thread is not immediately scheduled, when svc_i3c_master_priv_xfers() initiates the I3C transfer and attempts to send address 0x7e, the target interprets it as an IBI handler and returns the target address 0x0a. However, svc_i3c_master_priv_xfers() does not handle this case and proceeds with other transfers, resulting in incorrect data being returned. Add IBIWON check in svc_i3c_master_xfer(). In case this situation occurs, return a failure to the driver. Fixes: dd3c52846d59 ("i3c: master: svc: Add Silvaco I3C master driver") Cc: Reviewed-by: Miquel Raynal Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20231023161658.3890811-3-Frank.Li@nxp.com Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/i3c/master/svc-i3c-master.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/i3c/master/svc-i3c-master.c b/drivers/i3c/master/svc-i3c-master.c index 87f819915c55..ab5f2d2d9fac 100644 --- a/drivers/i3c/master/svc-i3c-master.c +++ b/drivers/i3c/master/svc-i3c-master.c @@ -1011,6 +1011,9 @@ static int svc_i3c_master_xfer(struct svc_i3c_master *master, u32 reg; int ret; + /* clean SVC_I3C_MINT_IBIWON w1c bits */ + writel(SVC_I3C_MINT_IBIWON, master->regs + SVC_I3C_MSTATUS); + writel(SVC_I3C_MCTRL_REQUEST_START_ADDR | xfer_type | SVC_I3C_MCTRL_IBIRESP_NACK | @@ -1029,6 +1032,23 @@ static int svc_i3c_master_xfer(struct svc_i3c_master *master, goto emit_stop; } + /* + * According to I3C spec ver 1.1.1, 5.1.2.2.3 Consequence of Controller Starting a Frame + * with I3C Target Address. + * + * The I3C Controller normally should start a Frame, the Address may be arbitrated, and so + * the Controller shall monitor to see whether an In-Band Interrupt request, a Controller + * Role Request (i.e., Secondary Controller requests to become the Active Controller), or + * a Hot-Join Request has been made. + * + * If missed IBIWON check, the wrong data will be return. When IBIWON happen, return failure + * and yield the above events handler. + */ + if (SVC_I3C_MSTATUS_IBIWON(reg)) { + ret = -ENXIO; + goto emit_stop; + } + if (rnw) ret = svc_i3c_master_read(master, in, xfer_len); else From 93ba03b2a90e694de6076cc53aa9f7c8dd71ea5c Mon Sep 17 00:00:00 2001 From: Frank Li Date: Mon, 23 Oct 2023 12:16:55 -0400 Subject: [PATCH 0384/1397] i3c: master: svc: fix ibi may not return mandatory data byte commit c85e209b799f12d18a90ae6353b997b1bb1274a5 upstream. MSTATUS[RXPEND] is only updated after the data transfer cycle started. This creates an issue when the I3C clock is slow, and the CPU is running fast enough that MSTATUS[RXPEND] may not be updated when the code reaches checking point. As a result, mandatory data can be missed. Add a wait for MSTATUS[COMPLETE] to ensure that all mandatory data is already in FIFO. It also works without mandatory data. Fixes: dd3c52846d59 ("i3c: master: svc: Add Silvaco I3C master driver") Cc: Reviewed-by: Miquel Raynal Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20231023161658.3890811-4-Frank.Li@nxp.com Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/i3c/master/svc-i3c-master.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/i3c/master/svc-i3c-master.c b/drivers/i3c/master/svc-i3c-master.c index ab5f2d2d9fac..1926ba04920c 100644 --- a/drivers/i3c/master/svc-i3c-master.c +++ b/drivers/i3c/master/svc-i3c-master.c @@ -333,6 +333,7 @@ static int svc_i3c_master_handle_ibi(struct svc_i3c_master *master, struct i3c_ibi_slot *slot; unsigned int count; u32 mdatactrl; + int ret, val; u8 *buf; slot = i3c_generic_ibi_get_free_slot(data->ibi_pool); @@ -342,6 +343,13 @@ static int svc_i3c_master_handle_ibi(struct svc_i3c_master *master, slot->len = 0; buf = slot->data; + ret = readl_relaxed_poll_timeout(master->regs + SVC_I3C_MSTATUS, val, + SVC_I3C_MSTATUS_COMPLETE(val), 0, 1000); + if (ret) { + dev_err(master->dev, "Timeout when polling for COMPLETE\n"); + return ret; + } + while (SVC_I3C_MSTATUS_RXPEND(readl(master->regs + SVC_I3C_MSTATUS)) && slot->len < SVC_I3C_FIFO_SIZE) { mdatactrl = readl(master->regs + SVC_I3C_MDATACTRL); From ac33f259748461ea8c48c5f32b4ea626021eccc0 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Mon, 23 Oct 2023 12:16:56 -0400 Subject: [PATCH 0385/1397] i3c: master: svc: fix check wrong status register in irq handler commit 225d5ef048c4ed01a475c95d94833bd7dd61072d upstream. svc_i3c_master_irq_handler() wrongly checks register SVC_I3C_MINTMASKED. It should be SVC_I3C_MSTATUS. Fixes: dd3c52846d59 ("i3c: master: svc: Add Silvaco I3C master driver") Cc: Reviewed-by: Miquel Raynal Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20231023161658.3890811-5-Frank.Li@nxp.com Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/i3c/master/svc-i3c-master.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i3c/master/svc-i3c-master.c b/drivers/i3c/master/svc-i3c-master.c index 1926ba04920c..23805db842f7 100644 --- a/drivers/i3c/master/svc-i3c-master.c +++ b/drivers/i3c/master/svc-i3c-master.c @@ -477,7 +477,7 @@ static void svc_i3c_master_ibi_work(struct work_struct *work) static irqreturn_t svc_i3c_master_irq_handler(int irq, void *dev_id) { struct svc_i3c_master *master = (struct svc_i3c_master *)dev_id; - u32 active = readl(master->regs + SVC_I3C_MINTMASKED); + u32 active = readl(master->regs + SVC_I3C_MSTATUS); if (!SVC_I3C_MSTATUS_SLVSTART(active)) return IRQ_NONE; From 98e366afabf14e1ee1bd9e7f2bb2deabbce38059 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Mon, 23 Oct 2023 12:16:57 -0400 Subject: [PATCH 0386/1397] i3c: master: svc: fix SDA keep low when polling IBIWON timeout happen commit dfd7cd6aafdb1f5ba93828e97e56b38304b23a05 upstream. Upon IBIWON timeout, the SDA line will always be kept low if we don't emit a stop. Calling svc_i3c_master_emit_stop() there will let the bus return to idle state. Fixes: dd3c52846d59 ("i3c: master: svc: Add Silvaco I3C master driver") Cc: Reviewed-by: Miquel Raynal Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20231023161658.3890811-6-Frank.Li@nxp.com Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/i3c/master/svc-i3c-master.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i3c/master/svc-i3c-master.c b/drivers/i3c/master/svc-i3c-master.c index 23805db842f7..26871128ae63 100644 --- a/drivers/i3c/master/svc-i3c-master.c +++ b/drivers/i3c/master/svc-i3c-master.c @@ -405,6 +405,7 @@ static void svc_i3c_master_ibi_work(struct work_struct *work) SVC_I3C_MSTATUS_IBIWON(val), 0, 1000); if (ret) { dev_err(master->dev, "Timeout when polling for IBIWON\n"); + svc_i3c_master_emit_stop(master); goto reenable_ibis; } From e726cb3d353dbd503756de7a4ca9d114d1748f3f Mon Sep 17 00:00:00 2001 From: Frank Li Date: Mon, 23 Oct 2023 12:16:58 -0400 Subject: [PATCH 0387/1397] i3c: master: svc: fix random hot join failure since timeout error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9aaeef113c55248ecf3ab941c2e4460aaa8b8b9a upstream. master side report: silvaco-i3c-master 44330000.i3c-master: Error condition: MSTATUS 0x020090c7, MERRWARN 0x00100000 BIT 20: TIMEOUT error The module has stalled too long in a frame. This happens when: - The TX FIFO or RX FIFO is not handled and the bus is stuck in the middle of a message, - No STOP was issued and between messages, - IBI manual is used and no decision was made. The maximum stall period is 100 μs. This can be considered as being just a warning as the system IRQ latency can easily be greater than 100us. Fixes: dd3c52846d59 ("i3c: master: svc: Add Silvaco I3C master driver") Cc: Signed-off-by: Frank Li Reviewed-by: Miquel Raynal Link: https://lore.kernel.org/r/20231023161658.3890811-7-Frank.Li@nxp.com Signed-off-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- drivers/i3c/master/svc-i3c-master.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/i3c/master/svc-i3c-master.c b/drivers/i3c/master/svc-i3c-master.c index 26871128ae63..c395e5229414 100644 --- a/drivers/i3c/master/svc-i3c-master.c +++ b/drivers/i3c/master/svc-i3c-master.c @@ -93,6 +93,7 @@ #define SVC_I3C_MINTMASKED 0x098 #define SVC_I3C_MERRWARN 0x09C #define SVC_I3C_MERRWARN_NACK BIT(2) +#define SVC_I3C_MERRWARN_TIMEOUT BIT(20) #define SVC_I3C_MDMACTRL 0x0A0 #define SVC_I3C_MDATACTRL 0x0AC #define SVC_I3C_MDATACTRL_FLUSHTB BIT(0) @@ -227,6 +228,14 @@ static bool svc_i3c_master_error(struct svc_i3c_master *master) if (SVC_I3C_MSTATUS_ERRWARN(mstatus)) { merrwarn = readl(master->regs + SVC_I3C_MERRWARN); writel(merrwarn, master->regs + SVC_I3C_MERRWARN); + + /* Ignore timeout error */ + if (merrwarn & SVC_I3C_MERRWARN_TIMEOUT) { + dev_dbg(master->dev, "Warning condition: MSTATUS 0x%08x, MERRWARN 0x%08x\n", + mstatus, merrwarn); + return false; + } + dev_err(master->dev, "Error condition: MSTATUS 0x%08x, MERRWARN 0x%08x\n", mstatus, merrwarn); From 8a9ab9037f7530ce5b42695fd36e4e91fab956fd Mon Sep 17 00:00:00 2001 From: Jim Harris Date: Thu, 26 Oct 2023 10:09:06 -0700 Subject: [PATCH 0388/1397] cxl/region: Fix x1 root-decoder granularity calculations commit 98a04c7aced2b43b3ac4befe216c4eecc7257d4b upstream. Root decoder granularity must match value from CFWMS, which may not be the region's granularity for non-interleaved root decoders. So when calculating granularities for host bridge decoders, use the region's granularity instead of the root decoder's granularity to ensure the correct granularities are set for the host bridge decoders and any downstream switch decoders. Test configuration is 1 host bridge * 2 switches * 2 endpoints per switch. Region created with 2048 granularity using following command line: cxl create-region -m -d decoder0.0 -w 4 mem0 mem2 mem1 mem3 \ -g 2048 -s 2048M Use "cxl list -PDE | grep granularity" to get a view of the granularity set at each level of the topology. Before this patch: "interleave_granularity":2048, "interleave_granularity":2048, "interleave_granularity":512, "interleave_granularity":2048, "interleave_granularity":2048, "interleave_granularity":512, "interleave_granularity":256, After: "interleave_granularity":2048, "interleave_granularity":2048, "interleave_granularity":4096, "interleave_granularity":2048, "interleave_granularity":2048, "interleave_granularity":4096, "interleave_granularity":2048, Fixes: 27b3f8d13830 ("cxl/region: Program target lists") Cc: Signed-off-by: Jim Harris Link: https://lore.kernel.org/r/169824893473.1403938.16110924262989774582.stgit@bgt-140510-bm03.eng.stellus.in [djbw: fixup the prebuilt cxl_test region] Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/cxl/core/region.c | 9 ++++++++- tools/testing/cxl/test/cxl.c | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 359af0623302..9d60020c5cb3 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -1127,7 +1127,14 @@ static int cxl_port_setup_targets(struct cxl_port *port, } if (is_cxl_root(parent_port)) { - parent_ig = cxlrd->cxlsd.cxld.interleave_granularity; + /* + * Root decoder IG is always set to value in CFMWS which + * may be different than this region's IG. We can use the + * region's IG here since interleave_granularity_store() + * does not allow interleaved host-bridges with + * root IG != region IG. + */ + parent_ig = p->interleave_granularity; parent_iw = cxlrd->cxlsd.cxld.interleave_ways; /* * For purposes of address bit routing, use power-of-2 math for diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index fb6ab9cef84f..b88546299902 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -831,7 +831,7 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld) cxld->interleave_ways = 2; else cxld->interleave_ways = 1; - cxld->interleave_granularity = 256; + cxld->interleave_granularity = 4096; cxld->hpa_range = (struct range) { .start = base, .end = base + size - 1, From 6b2e428e673b3f55965674a426c40922e91388aa Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 27 Oct 2023 20:13:23 -0700 Subject: [PATCH 0389/1397] cxl/port: Fix delete_endpoint() vs parent unregistration race commit 8d2ad999ca3c64cb08cf6a58d227b9d9e746d708 upstream. The CXL subsystem, at cxl_mem ->probe() time, establishes a lineage of ports (struct cxl_port objects) between an endpoint and the root of a CXL topology. Each port including the endpoint port is attached to the cxl_port driver. Given that setup, it follows that when either any port in that lineage goes through a cxl_port ->remove() event, or the memdev goes through a cxl_mem ->remove() event. The hierarchy below the removed port, or the entire hierarchy if the memdev is removed needs to come down. The delete_endpoint() callback is careful to check whether it is being called to tear down the hierarchy, or if it is only being called to teardown the memdev because an ancestor port is going through ->remove(). That care needs to take the device_lock() of the endpoint's parent. Which requires 2 bugs to be fixed: 1/ A reference on the parent is needed to prevent use-after-free scenarios like this signature: BUG: spinlock bad magic on CPU#0, kworker/u56:0/11 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS edk2-20230524-3.fc38 05/24/2023 Workqueue: cxl_port detach_memdev [cxl_core] RIP: 0010:spin_bug+0x65/0xa0 Call Trace: do_raw_spin_lock+0x69/0xa0 __mutex_lock+0x695/0xb80 delete_endpoint+0xad/0x150 [cxl_core] devres_release_all+0xb8/0x110 device_unbind_cleanup+0xe/0x70 device_release_driver_internal+0x1d2/0x210 detach_memdev+0x15/0x20 [cxl_core] process_one_work+0x1e3/0x4c0 worker_thread+0x1dd/0x3d0 2/ In the case of RCH topologies, the parent device that needs to be locked is not always @port->dev as returned by cxl_mem_find_port(), use endpoint->dev.parent instead. Fixes: 8dd2bc0f8e02 ("cxl/mem: Add the cxl_mem driver") Cc: Reported-by: Robert Richter Closes: http://lore.kernel.org/r/20231018171713.1883517-2-rrichter@amd.com Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/cxl/core/port.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 2c6001592fe2..6a75a3cb601e 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -1242,35 +1242,39 @@ static struct device *grandparent(struct device *dev) return NULL; } +static struct device *endpoint_host(struct cxl_port *endpoint) +{ + struct cxl_port *port = to_cxl_port(endpoint->dev.parent); + + if (is_cxl_root(port)) + return port->uport_dev; + return &port->dev; +} + static void delete_endpoint(void *data) { struct cxl_memdev *cxlmd = data; struct cxl_port *endpoint = cxlmd->endpoint; - struct cxl_port *parent_port; - struct device *parent; - - parent_port = cxl_mem_find_port(cxlmd, NULL); - if (!parent_port) - goto out; - parent = &parent_port->dev; + struct device *host = endpoint_host(endpoint); - device_lock(parent); - if (parent->driver && !endpoint->dead) { - devm_release_action(parent, cxl_unlink_parent_dport, endpoint); - devm_release_action(parent, cxl_unlink_uport, endpoint); - devm_release_action(parent, unregister_port, endpoint); + device_lock(host); + if (host->driver && !endpoint->dead) { + devm_release_action(host, cxl_unlink_parent_dport, endpoint); + devm_release_action(host, cxl_unlink_uport, endpoint); + devm_release_action(host, unregister_port, endpoint); } cxlmd->endpoint = NULL; - device_unlock(parent); - put_device(parent); -out: + device_unlock(host); put_device(&endpoint->dev); + put_device(host); } int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint) { + struct device *host = endpoint_host(endpoint); struct device *dev = &cxlmd->dev; + get_device(host); get_device(&endpoint->dev); cxlmd->endpoint = endpoint; cxlmd->depth = endpoint->depth; From 63c7297c62b97aad24b89c7fed8173e8d543db1f Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Sun, 25 Jun 2023 09:13:39 +0800 Subject: [PATCH 0390/1397] apparmor: Fix kernel-doc warnings in apparmor/audit.c [ Upstream commit 26c9ecb34f5f5fa43c041a220de01d7cbea97dd0 ] Fix kernel-doc warnings: security/apparmor/audit.c:150: warning: Function parameter or member 'type' not described in 'aa_audit_msg' Signed-off-by: Gaosheng Cui Signed-off-by: John Johansen Stable-dep-of: 157a3537d6bc ("apparmor: Fix regression in mount mediation") Signed-off-by: Sasha Levin --- security/apparmor/audit.c | 1 + 1 file changed, 1 insertion(+) diff --git a/security/apparmor/audit.c b/security/apparmor/audit.c index 5a7978aa4b19..a3db0f8bd4f8 100644 --- a/security/apparmor/audit.c +++ b/security/apparmor/audit.c @@ -142,6 +142,7 @@ static void audit_pre(struct audit_buffer *ab, void *ca) /** * aa_audit_msg - Log a message to the audit subsystem + * @type: audit type for the message * @sa: audit event structure (NOT NULL) * @cb: optional callback fn for type specific fields (MAYBE NULL) */ From 73221ebe13d90d1d5ad96c1ef1825f84e961a02e Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Sun, 25 Jun 2023 09:13:44 +0800 Subject: [PATCH 0391/1397] apparmor: Fix kernel-doc warnings in apparmor/lib.c [ Upstream commit 8921482286116af193980f04f2f2755775a410a5 ] Fix kernel-doc warnings: security/apparmor/lib.c:33: warning: Excess function parameter 'str' description in 'aa_free_str_table' security/apparmor/lib.c:33: warning: Function parameter or member 't' not described in 'aa_free_str_table' security/apparmor/lib.c:94: warning: Function parameter or member 'n' not described in 'skipn_spaces' security/apparmor/lib.c:390: warning: Excess function parameter 'deny' description in 'aa_check_perms' Signed-off-by: Gaosheng Cui Signed-off-by: John Johansen Stable-dep-of: 157a3537d6bc ("apparmor: Fix regression in mount mediation") Signed-off-by: Sasha Levin --- security/apparmor/lib.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c index a630c951bb3b..8e1073477c09 100644 --- a/security/apparmor/lib.c +++ b/security/apparmor/lib.c @@ -27,7 +27,7 @@ struct aa_perms allperms = { .allow = ALL_PERMS_MASK, /** * aa_free_str_table - free entries str table - * @str: the string table to free (MAYBE NULL) + * @t: the string table to free (MAYBE NULL) */ void aa_free_str_table(struct aa_str_table *t) { @@ -85,6 +85,7 @@ char *aa_split_fqname(char *fqname, char **ns_name) /** * skipn_spaces - Removes leading whitespace from @str. * @str: The string to be stripped. + * @n: length of str to parse, will stop at \0 if encountered before n * * Returns a pointer to the first non-whitespace character in @str. * if all whitespace will return NULL @@ -371,7 +372,6 @@ int aa_profile_label_perm(struct aa_profile *profile, struct aa_profile *target, * @profile: profile being checked * @perms: perms computed for the request * @request: requested perms - * @deny: Returns: explicit deny set * @sa: initialized audit structure (MAY BE NULL if not auditing) * @cb: callback fn for type specific fields (MAY BE NULL) * From 0e4721f553e287184634225c400d9433a9374c3d Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Sun, 25 Jun 2023 09:13:46 +0800 Subject: [PATCH 0392/1397] apparmor: Fix kernel-doc warnings in apparmor/resource.c [ Upstream commit 13c1748e217078d437727eef333cb0387d13bc0e ] Fix kernel-doc warnings: security/apparmor/resource.c:111: warning: Function parameter or member 'label' not described in 'aa_task_setrlimit' security/apparmor/resource.c:111: warning: Function parameter or member 'new_rlim' not described in 'aa_task_setrlimit' security/apparmor/resource.c:111: warning: Function parameter or member 'resource' not described in 'aa_task_setrlimit' security/apparmor/resource.c:111: warning: Function parameter or member 'task' not described in 'aa_task_setrlimit' Signed-off-by: Gaosheng Cui Signed-off-by: John Johansen Stable-dep-of: 157a3537d6bc ("apparmor: Fix regression in mount mediation") Signed-off-by: Sasha Levin --- security/apparmor/resource.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/security/apparmor/resource.c b/security/apparmor/resource.c index e85948164896..2bebc5d9e741 100644 --- a/security/apparmor/resource.c +++ b/security/apparmor/resource.c @@ -97,10 +97,10 @@ static int profile_setrlimit(struct aa_profile *profile, unsigned int resource, /** * aa_task_setrlimit - test permission to set an rlimit - * @label - label confining the task (NOT NULL) - * @task - task the resource is being set on - * @resource - the resource being set - * @new_rlim - the new resource limit (NOT NULL) + * @label: label confining the task (NOT NULL) + * @task: task the resource is being set on + * @resource: the resource being set + * @new_rlim: the new resource limit (NOT NULL) * * Control raising the processes hard limit. * From 75ae5a7883761087bdcc8d6a456fb1f174d34143 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Sun, 25 Jun 2023 09:13:49 +0800 Subject: [PATCH 0393/1397] apparmor: Fix kernel-doc warnings in apparmor/policy.c [ Upstream commit 25ff0ff2d6286928dc516c74b879809c691c2dd8 ] Fix kernel-doc warnings: security/apparmor/policy.c:294: warning: Function parameter or member 'proxy' not described in 'aa_alloc_profile' security/apparmor/policy.c:785: warning: Function parameter or member 'label' not described in 'aa_policy_view_capable' security/apparmor/policy.c:785: warning: Function parameter or member 'ns' not described in 'aa_policy_view_capable' security/apparmor/policy.c:847: warning: Function parameter or member 'ns' not described in 'aa_may_manage_policy' security/apparmor/policy.c:964: warning: Function parameter or member 'hname' not described in '__lookup_replace' security/apparmor/policy.c:964: warning: Function parameter or member 'info' not described in '__lookup_replace' security/apparmor/policy.c:964: warning: Function parameter or member 'noreplace' not described in '__lookup_replace' security/apparmor/policy.c:964: warning: Function parameter or member 'ns' not described in '__lookup_replace' security/apparmor/policy.c:964: warning: Function parameter or member 'p' not described in '__lookup_replace' Signed-off-by: Gaosheng Cui Signed-off-by: John Johansen Stable-dep-of: 157a3537d6bc ("apparmor: Fix regression in mount mediation") Signed-off-by: Sasha Levin --- security/apparmor/policy.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index ec695a6caac7..b9aaaac84d8a 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -286,6 +286,7 @@ void aa_free_profile(struct aa_profile *profile) /** * aa_alloc_profile - allocate, initialize and return a new profile * @hname: name of the profile (NOT NULL) + * @proxy: proxy to use OR null if to allocate a new one * @gfp: allocation type * * Returns: refcount profile or NULL on failure @@ -775,8 +776,9 @@ static int policy_ns_capable(struct aa_label *label, /** * aa_policy_view_capable - check if viewing policy in at @ns is allowed - * label: label that is trying to view policy in ns - * ns: namespace being viewed by @label (may be NULL if @label's ns) + * @label: label that is trying to view policy in ns + * @ns: namespace being viewed by @label (may be NULL if @label's ns) + * * Returns: true if viewing policy is allowed * * If @ns is NULL then the namespace being viewed is assumed to be the @@ -840,6 +842,7 @@ bool aa_current_policy_admin_capable(struct aa_ns *ns) /** * aa_may_manage_policy - can the current task manage policy * @label: label to check if it can manage policy + * @ns: namespace being managed by @label (may be NULL if @label's ns) * @mask: contains the policy manipulation operation being done * * Returns: 0 if the task is allowed to manipulate policy else error @@ -951,11 +954,11 @@ static void __replace_profile(struct aa_profile *old, struct aa_profile *new) /** * __lookup_replace - lookup replacement information for a profile - * @ns - namespace the lookup occurs in - * @hname - name of profile to lookup - * @noreplace - true if not replacing an existing profile - * @p - Returns: profile to be replaced - * @info - Returns: info string on why lookup failed + * @ns: namespace the lookup occurs in + * @hname: name of profile to lookup + * @noreplace: true if not replacing an existing profile + * @p: Returns - profile to be replaced + * @info: Returns - info string on why lookup failed * * Returns: profile to replace (no ref) on success else ptr error */ From c57bc80f4508acd8c52bd89b01d324889065320d Mon Sep 17 00:00:00 2001 From: John Johansen Date: Wed, 14 Sep 2022 00:20:12 -0700 Subject: [PATCH 0394/1397] apparmor: combine common_audit_data and apparmor_audit_data [ Upstream commit bd7bd201ca46c211c3ab251ca9854787d1331a2f ] Everywhere where common_audit_data is used apparmor audit_data is also used. We can simplify the code and drop the use of the aad macro everywhere by combining the two structures. Reviewed-by: Georgia Garcia Signed-off-by: John Johansen Stable-dep-of: 157a3537d6bc ("apparmor: Fix regression in mount mediation") Signed-off-by: Sasha Levin --- security/apparmor/audit.c | 70 +++++++++++++++---------------- security/apparmor/capability.c | 24 +++++------ security/apparmor/file.c | 68 +++++++++++++++--------------- security/apparmor/include/audit.h | 34 ++++++++------- security/apparmor/include/net.h | 13 +++--- security/apparmor/include/perms.h | 4 +- security/apparmor/ipc.c | 39 ++++++++--------- security/apparmor/lib.c | 47 +++++++++++---------- security/apparmor/lsm.c | 12 +++--- security/apparmor/mount.c | 41 +++++++++--------- security/apparmor/net.c | 44 +++++++++---------- security/apparmor/policy.c | 19 +++++---- security/apparmor/policy_unpack.c | 29 ++++++------- security/apparmor/resource.c | 23 +++++----- security/apparmor/task.c | 35 ++++++++-------- 15 files changed, 257 insertions(+), 245 deletions(-) diff --git a/security/apparmor/audit.c b/security/apparmor/audit.c index a3db0f8bd4f8..06ad6a8fcce1 100644 --- a/security/apparmor/audit.c +++ b/security/apparmor/audit.c @@ -85,37 +85,36 @@ static const char *const aa_class_names[] = { /** * audit_pre() - core AppArmor function. * @ab: audit buffer to fill (NOT NULL) - * @ca: audit structure containing data to audit (NOT NULL) + * @va: audit structure containing data to audit (NOT NULL) * - * Record common AppArmor audit data from @sa + * Record common AppArmor audit data from @va */ -static void audit_pre(struct audit_buffer *ab, void *ca) +static void audit_pre(struct audit_buffer *ab, void *va) { - struct common_audit_data *sa = ca; + struct apparmor_audit_data *ad = aad_of_va(va); if (aa_g_audit_header) { audit_log_format(ab, "apparmor=\"%s\"", - aa_audit_type[aad(sa)->type]); + aa_audit_type[ad->type]); } - if (aad(sa)->op) { - audit_log_format(ab, " operation=\"%s\"", aad(sa)->op); - } + if (ad->op) + audit_log_format(ab, " operation=\"%s\"", ad->op); - if (aad(sa)->class) + if (ad->class) audit_log_format(ab, " class=\"%s\"", - aad(sa)->class <= AA_CLASS_LAST ? - aa_class_names[aad(sa)->class] : + ad->class <= AA_CLASS_LAST ? + aa_class_names[ad->class] : "unknown"); - if (aad(sa)->info) { - audit_log_format(ab, " info=\"%s\"", aad(sa)->info); - if (aad(sa)->error) - audit_log_format(ab, " error=%d", aad(sa)->error); + if (ad->info) { + audit_log_format(ab, " info=\"%s\"", ad->info); + if (ad->error) + audit_log_format(ab, " error=%d", ad->error); } - if (aad(sa)->label) { - struct aa_label *label = aad(sa)->label; + if (ad->label) { + struct aa_label *label = ad->label; if (label_isprofile(label)) { struct aa_profile *profile = labels_profile(label); @@ -134,43 +133,44 @@ static void audit_pre(struct audit_buffer *ab, void *ca) } } - if (aad(sa)->name) { + if (ad->name) { audit_log_format(ab, " name="); - audit_log_untrustedstring(ab, aad(sa)->name); + audit_log_untrustedstring(ab, ad->name); } } /** * aa_audit_msg - Log a message to the audit subsystem * @type: audit type for the message - * @sa: audit event structure (NOT NULL) + * @ad: audit event structure (NOT NULL) * @cb: optional callback fn for type specific fields (MAYBE NULL) */ -void aa_audit_msg(int type, struct common_audit_data *sa, +void aa_audit_msg(int type, struct apparmor_audit_data *ad, void (*cb) (struct audit_buffer *, void *)) { - aad(sa)->type = type; - common_lsm_audit(sa, audit_pre, cb); + ad->type = type; + common_lsm_audit(&ad->common, audit_pre, cb); } /** * aa_audit - Log a profile based audit event to the audit subsystem * @type: audit type for the message * @profile: profile to check against (NOT NULL) - * @sa: audit event (NOT NULL) + * @ad: audit event (NOT NULL) * @cb: optional callback fn for type specific fields (MAYBE NULL) * * Handle default message switching based off of audit mode flags * * Returns: error on failure */ -int aa_audit(int type, struct aa_profile *profile, struct common_audit_data *sa, +int aa_audit(int type, struct aa_profile *profile, + struct apparmor_audit_data *ad, void (*cb) (struct audit_buffer *, void *)) { AA_BUG(!profile); if (type == AUDIT_APPARMOR_AUTO) { - if (likely(!aad(sa)->error)) { + if (likely(!ad->error)) { if (AUDIT_MODE(profile) != AUDIT_ALL) return 0; type = AUDIT_APPARMOR_AUDIT; @@ -182,24 +182,24 @@ int aa_audit(int type, struct aa_profile *profile, struct common_audit_data *sa, if (AUDIT_MODE(profile) == AUDIT_QUIET || (type == AUDIT_APPARMOR_DENIED && AUDIT_MODE(profile) == AUDIT_QUIET_DENIED)) - return aad(sa)->error; + return ad->error; if (KILL_MODE(profile) && type == AUDIT_APPARMOR_DENIED) type = AUDIT_APPARMOR_KILL; - aad(sa)->label = &profile->label; + ad->label = &profile->label; - aa_audit_msg(type, sa, cb); + aa_audit_msg(type, ad, cb); - if (aad(sa)->type == AUDIT_APPARMOR_KILL) + if (ad->type == AUDIT_APPARMOR_KILL) (void)send_sig_info(SIGKILL, NULL, - sa->type == LSM_AUDIT_DATA_TASK && sa->u.tsk ? - sa->u.tsk : current); + ad->common.type == LSM_AUDIT_DATA_TASK && + ad->common.u.tsk ? ad->common.u.tsk : current); - if (aad(sa)->type == AUDIT_APPARMOR_ALLOWED) - return complain_error(aad(sa)->error); + if (ad->type == AUDIT_APPARMOR_ALLOWED) + return complain_error(ad->error); - return aad(sa)->error; + return ad->error; } struct aa_audit_rule { diff --git a/security/apparmor/capability.c b/security/apparmor/capability.c index 326a51838ef2..58490cca035d 100644 --- a/security/apparmor/capability.c +++ b/security/apparmor/capability.c @@ -51,7 +51,7 @@ static void audit_cb(struct audit_buffer *ab, void *va) /** * audit_caps - audit a capability - * @sa: audit data + * @as: audit data * @profile: profile being tested for confinement (NOT NULL) * @cap: capability tested * @error: error code returned by test @@ -59,9 +59,9 @@ static void audit_cb(struct audit_buffer *ab, void *va) * Do auditing of capability and handle, audit/complain/kill modes switching * and duplicate message elimination. * - * Returns: 0 or sa->error on success, error code on failure + * Returns: 0 or ad->error on success, error code on failure */ -static int audit_caps(struct common_audit_data *sa, struct aa_profile *profile, +static int audit_caps(struct apparmor_audit_data *ad, struct aa_profile *profile, int cap, int error) { struct aa_ruleset *rules = list_first_entry(&profile->rules, @@ -69,7 +69,7 @@ static int audit_caps(struct common_audit_data *sa, struct aa_profile *profile, struct audit_cache *ent; int type = AUDIT_APPARMOR_AUTO; - aad(sa)->error = error; + ad->error = error; if (likely(!error)) { /* test if auditing is being forced */ @@ -101,7 +101,7 @@ static int audit_caps(struct common_audit_data *sa, struct aa_profile *profile, } put_cpu_var(audit_cache); - return aa_audit(type, profile, sa, audit_cb); + return aa_audit(type, profile, ad, audit_cb); } /** @@ -109,12 +109,12 @@ static int audit_caps(struct common_audit_data *sa, struct aa_profile *profile, * @profile: profile being enforced (NOT NULL, NOT unconfined) * @cap: capability to test if allowed * @opts: CAP_OPT_NOAUDIT bit determines whether audit record is generated - * @sa: audit data (MAY BE NULL indicating no auditing) + * @ad: audit data (MAY BE NULL indicating no auditing) * * Returns: 0 if allowed else -EPERM */ static int profile_capable(struct aa_profile *profile, int cap, - unsigned int opts, struct common_audit_data *sa) + unsigned int opts, struct apparmor_audit_data *ad) { struct aa_ruleset *rules = list_first_entry(&profile->rules, typeof(*rules), list); @@ -132,10 +132,10 @@ static int profile_capable(struct aa_profile *profile, int cap, /* audit the cap request in complain mode but note that it * should be optional. */ - aad(sa)->info = "optional: no audit"; + ad->info = "optional: no audit"; } - return audit_caps(sa, profile, cap, error); + return audit_caps(ad, profile, cap, error); } /** @@ -152,11 +152,11 @@ int aa_capable(struct aa_label *label, int cap, unsigned int opts) { struct aa_profile *profile; int error = 0; - DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_CAP, AA_CLASS_CAP, OP_CAPABLE); + DEFINE_AUDIT_DATA(ad, LSM_AUDIT_DATA_CAP, AA_CLASS_CAP, OP_CAPABLE); - sa.u.cap = cap; + ad.common.u.cap = cap; error = fn_for_each_confined(label, profile, - profile_capable(profile, cap, opts, &sa)); + profile_capable(profile, cap, opts, &ad)); return error; } diff --git a/security/apparmor/file.c b/security/apparmor/file.c index 698b124e649f..9ea95fa18e7d 100644 --- a/security/apparmor/file.c +++ b/security/apparmor/file.c @@ -44,33 +44,34 @@ static u32 map_mask_to_chr_mask(u32 mask) static void file_audit_cb(struct audit_buffer *ab, void *va) { struct common_audit_data *sa = va; + struct apparmor_audit_data *ad = aad(sa); kuid_t fsuid = current_fsuid(); char str[10]; - if (aad(sa)->request & AA_AUDIT_FILE_MASK) { + if (ad->request & AA_AUDIT_FILE_MASK) { aa_perm_mask_to_str(str, sizeof(str), aa_file_perm_chrs, - map_mask_to_chr_mask(aad(sa)->request)); + map_mask_to_chr_mask(ad->request)); audit_log_format(ab, " requested_mask=\"%s\"", str); } - if (aad(sa)->denied & AA_AUDIT_FILE_MASK) { + if (ad->denied & AA_AUDIT_FILE_MASK) { aa_perm_mask_to_str(str, sizeof(str), aa_file_perm_chrs, - map_mask_to_chr_mask(aad(sa)->denied)); + map_mask_to_chr_mask(ad->denied)); audit_log_format(ab, " denied_mask=\"%s\"", str); } - if (aad(sa)->request & AA_AUDIT_FILE_MASK) { + if (ad->request & AA_AUDIT_FILE_MASK) { audit_log_format(ab, " fsuid=%d", from_kuid(&init_user_ns, fsuid)); audit_log_format(ab, " ouid=%d", - from_kuid(&init_user_ns, aad(sa)->fs.ouid)); + from_kuid(&init_user_ns, ad->fs.ouid)); } - if (aad(sa)->peer) { + if (ad->peer) { audit_log_format(ab, " target="); - aa_label_xaudit(ab, labels_ns(aad(sa)->label), aad(sa)->peer, + aa_label_xaudit(ab, labels_ns(ad->label), ad->peer, FLAG_VIEW_SUBNS, GFP_KERNEL); - } else if (aad(sa)->fs.target) { + } else if (ad->fs.target) { audit_log_format(ab, " target="); - audit_log_untrustedstring(ab, aad(sa)->fs.target); + audit_log_untrustedstring(ab, ad->fs.target); } } @@ -95,50 +96,49 @@ int aa_audit_file(struct aa_profile *profile, struct aa_perms *perms, kuid_t ouid, const char *info, int error) { int type = AUDIT_APPARMOR_AUTO; - DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_TASK, AA_CLASS_FILE, op); - - sa.u.tsk = NULL; - aad(&sa)->request = request; - aad(&sa)->name = name; - aad(&sa)->fs.target = target; - aad(&sa)->peer = tlabel; - aad(&sa)->fs.ouid = ouid; - aad(&sa)->info = info; - aad(&sa)->error = error; - sa.u.tsk = NULL; - - if (likely(!aad(&sa)->error)) { + DEFINE_AUDIT_DATA(ad, LSM_AUDIT_DATA_TASK, AA_CLASS_FILE, op); + + ad.request = request; + ad.name = name; + ad.fs.target = target; + ad.peer = tlabel; + ad.fs.ouid = ouid; + ad.info = info; + ad.error = error; + ad.common.u.tsk = NULL; + + if (likely(!ad.error)) { u32 mask = perms->audit; if (unlikely(AUDIT_MODE(profile) == AUDIT_ALL)) mask = 0xffff; /* mask off perms that are not being force audited */ - aad(&sa)->request &= mask; + ad.request &= mask; - if (likely(!aad(&sa)->request)) + if (likely(!ad.request)) return 0; type = AUDIT_APPARMOR_AUDIT; } else { /* only report permissions that were denied */ - aad(&sa)->request = aad(&sa)->request & ~perms->allow; - AA_BUG(!aad(&sa)->request); + ad.request = ad.request & ~perms->allow; + AA_BUG(!ad.request); - if (aad(&sa)->request & perms->kill) + if (ad.request & perms->kill) type = AUDIT_APPARMOR_KILL; /* quiet known rejects, assumes quiet and kill do not overlap */ - if ((aad(&sa)->request & perms->quiet) && + if ((ad.request & perms->quiet) && AUDIT_MODE(profile) != AUDIT_NOQUIET && AUDIT_MODE(profile) != AUDIT_ALL) - aad(&sa)->request &= ~perms->quiet; + ad.request &= ~perms->quiet; - if (!aad(&sa)->request) - return aad(&sa)->error; + if (!ad.request) + return ad.error; } - aad(&sa)->denied = aad(&sa)->request & ~perms->allow; - return aa_audit(type, profile, &sa, file_audit_cb); + ad.denied = ad.request & ~perms->allow; + return aa_audit(type, profile, &ad, file_audit_cb); } static int path_name(const char *op, struct aa_label *label, diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h index c328f07f11cd..85931ec94e91 100644 --- a/security/apparmor/include/audit.h +++ b/security/apparmor/include/audit.h @@ -152,33 +152,35 @@ struct apparmor_audit_data { unsigned long flags; } mnt; }; + + struct common_audit_data common; }; /* macros for dealing with apparmor_audit_data structure */ -#define aad(SA) ((SA)->apparmor_audit_data) +#define aad(SA) (container_of(SA, struct apparmor_audit_data, common)) +#define aad_of_va(VA) aad((struct common_audit_data *)(VA)) + #define DEFINE_AUDIT_DATA(NAME, T, C, X) \ /* TODO: cleanup audit init so we don't need _aad = {0,} */ \ - struct apparmor_audit_data NAME ## _aad = { \ + struct apparmor_audit_data NAME = { \ .class = (C), \ .op = (X), \ - }; \ - struct common_audit_data NAME = \ - { \ - .type = (T), \ - .u.tsk = NULL, \ - }; \ - NAME.apparmor_audit_data = &(NAME ## _aad) - -void aa_audit_msg(int type, struct common_audit_data *sa, + .common.type = (T), \ + .common.u.tsk = NULL, \ + .common.apparmor_audit_data = &NAME, \ + }; + +void aa_audit_msg(int type, struct apparmor_audit_data *ad, void (*cb) (struct audit_buffer *, void *)); -int aa_audit(int type, struct aa_profile *profile, struct common_audit_data *sa, +int aa_audit(int type, struct aa_profile *profile, + struct apparmor_audit_data *ad, void (*cb) (struct audit_buffer *, void *)); -#define aa_audit_error(ERROR, SA, CB) \ +#define aa_audit_error(ERROR, AD, CB) \ ({ \ - aad((SA))->error = (ERROR); \ - aa_audit_msg(AUDIT_APPARMOR_ERROR, (SA), (CB)); \ - aad((SA))->error; \ + (AD)->error = (ERROR); \ + aa_audit_msg(AUDIT_APPARMOR_ERROR, (AD), (CB)); \ + (AD)->error; \ }) diff --git a/security/apparmor/include/net.h b/security/apparmor/include/net.h index 6fa440b5daed..a336e57864e8 100644 --- a/security/apparmor/include/net.h +++ b/security/apparmor/include/net.h @@ -61,9 +61,9 @@ struct aa_sk_ctx { LSM_AUDIT_DATA_NONE, \ AA_CLASS_NET, \ OP); \ - NAME.u.net = &(NAME ## _net); \ - aad(&NAME)->net.type = (T); \ - aad(&NAME)->net.protocol = (P) + NAME.common.u.net = &(NAME ## _net); \ + NAME.net.type = (T); \ + NAME.net.protocol = (P) #define DEFINE_AUDIT_SK(NAME, OP, SK) \ DEFINE_AUDIT_NET(NAME, OP, SK, (SK)->sk_family, (SK)->sk_type, \ @@ -90,16 +90,17 @@ struct aa_secmark { extern struct aa_sfs_entry aa_sfs_entry_network[]; void audit_net_cb(struct audit_buffer *ab, void *va); -int aa_profile_af_perm(struct aa_profile *profile, struct common_audit_data *sa, +int aa_profile_af_perm(struct aa_profile *profile, + struct apparmor_audit_data *ad, u32 request, u16 family, int type); int aa_af_perm(struct aa_label *label, const char *op, u32 request, u16 family, int type, int protocol); static inline int aa_profile_af_sk_perm(struct aa_profile *profile, - struct common_audit_data *sa, + struct apparmor_audit_data *ad, u32 request, struct sock *sk) { - return aa_profile_af_perm(profile, sa, request, sk->sk_family, + return aa_profile_af_perm(profile, ad, request, sk->sk_family, sk->sk_type); } int aa_sk_perm(const char *op, u32 request, struct sock *sk); diff --git a/security/apparmor/include/perms.h b/security/apparmor/include/perms.h index 797a7a00644d..83534df8939f 100644 --- a/security/apparmor/include/perms.h +++ b/security/apparmor/include/perms.h @@ -212,8 +212,8 @@ void aa_profile_match_label(struct aa_profile *profile, int type, u32 request, struct aa_perms *perms); int aa_profile_label_perm(struct aa_profile *profile, struct aa_profile *target, u32 request, int type, u32 *deny, - struct common_audit_data *sa); + struct apparmor_audit_data *ad); int aa_check_perms(struct aa_profile *profile, struct aa_perms *perms, - u32 request, struct common_audit_data *sa, + u32 request, struct apparmor_audit_data *ad, void (*cb)(struct audit_buffer *, void *)); #endif /* __AA_PERM_H */ diff --git a/security/apparmor/ipc.c b/security/apparmor/ipc.c index 5acde746775f..f198b8d620a4 100644 --- a/security/apparmor/ipc.c +++ b/security/apparmor/ipc.c @@ -52,31 +52,32 @@ static const char *audit_signal_mask(u32 mask) static void audit_signal_cb(struct audit_buffer *ab, void *va) { struct common_audit_data *sa = va; + struct apparmor_audit_data *ad = aad(sa); - if (aad(sa)->request & AA_SIGNAL_PERM_MASK) { + if (ad->request & AA_SIGNAL_PERM_MASK) { audit_log_format(ab, " requested_mask=\"%s\"", - audit_signal_mask(aad(sa)->request)); - if (aad(sa)->denied & AA_SIGNAL_PERM_MASK) { + audit_signal_mask(ad->request)); + if (ad->denied & AA_SIGNAL_PERM_MASK) { audit_log_format(ab, " denied_mask=\"%s\"", - audit_signal_mask(aad(sa)->denied)); + audit_signal_mask(ad->denied)); } } - if (aad(sa)->signal == SIGUNKNOWN) + if (ad->signal == SIGUNKNOWN) audit_log_format(ab, "signal=unknown(%d)", - aad(sa)->unmappedsig); - else if (aad(sa)->signal < MAXMAPPED_SIGNAME) - audit_log_format(ab, " signal=%s", sig_names[aad(sa)->signal]); + ad->unmappedsig); + else if (ad->signal < MAXMAPPED_SIGNAME) + audit_log_format(ab, " signal=%s", sig_names[ad->signal]); else audit_log_format(ab, " signal=rtmin+%d", - aad(sa)->signal - SIGRT_BASE); + ad->signal - SIGRT_BASE); audit_log_format(ab, " peer="); - aa_label_xaudit(ab, labels_ns(aad(sa)->label), aad(sa)->peer, + aa_label_xaudit(ab, labels_ns(ad->label), ad->peer, FLAGS_NONE, GFP_ATOMIC); } static int profile_signal_perm(struct aa_profile *profile, struct aa_label *peer, u32 request, - struct common_audit_data *sa) + struct apparmor_audit_data *ad) { struct aa_ruleset *rules = list_first_entry(&profile->rules, typeof(*rules), list); @@ -87,24 +88,24 @@ static int profile_signal_perm(struct aa_profile *profile, !ANY_RULE_MEDIATES(&profile->rules, AA_CLASS_SIGNAL)) return 0; - aad(sa)->peer = peer; + ad->peer = peer; /* TODO: secondary cache check */ state = aa_dfa_next(rules->policy.dfa, rules->policy.start[AA_CLASS_SIGNAL], - aad(sa)->signal); + ad->signal); aa_label_match(profile, rules, peer, state, false, request, &perms); aa_apply_modes_to_perms(profile, &perms); - return aa_check_perms(profile, &perms, request, sa, audit_signal_cb); + return aa_check_perms(profile, &perms, request, ad, audit_signal_cb); } int aa_may_signal(struct aa_label *sender, struct aa_label *target, int sig) { struct aa_profile *profile; - DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, AA_CLASS_SIGNAL, OP_SIGNAL); + DEFINE_AUDIT_DATA(ad, LSM_AUDIT_DATA_NONE, AA_CLASS_SIGNAL, OP_SIGNAL); - aad(&sa)->signal = map_signal_num(sig); - aad(&sa)->unmappedsig = sig; + ad.signal = map_signal_num(sig); + ad.unmappedsig = sig; return xcheck_labels(sender, target, profile, - profile_signal_perm(profile, target, MAY_WRITE, &sa), - profile_signal_perm(profile, sender, MAY_READ, &sa)); + profile_signal_perm(profile, target, MAY_WRITE, &ad), + profile_signal_perm(profile, sender, MAY_READ, &ad)); } diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c index 8e1073477c09..d6b2750fd72e 100644 --- a/security/apparmor/lib.c +++ b/security/apparmor/lib.c @@ -144,10 +144,10 @@ const char *aa_splitn_fqname(const char *fqname, size_t n, const char **ns_name, void aa_info_message(const char *str) { if (audit_enabled) { - DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, AA_CLASS_NONE, NULL); + DEFINE_AUDIT_DATA(ad, LSM_AUDIT_DATA_NONE, AA_CLASS_NONE, NULL); - aad(&sa)->info = str; - aa_audit_msg(AUDIT_APPARMOR_STATUS, &sa, NULL); + ad.info = str; + aa_audit_msg(AUDIT_APPARMOR_STATUS, &ad, NULL); } printk(KERN_INFO "AppArmor: %s\n", str); } @@ -282,21 +282,22 @@ void aa_audit_perm_mask(struct audit_buffer *ab, u32 mask, const char *chrs, static void aa_audit_perms_cb(struct audit_buffer *ab, void *va) { struct common_audit_data *sa = va; + struct apparmor_audit_data *ad = aad(sa); - if (aad(sa)->request) { + if (ad->request) { audit_log_format(ab, " requested_mask="); - aa_audit_perm_mask(ab, aad(sa)->request, aa_file_perm_chrs, + aa_audit_perm_mask(ab, ad->request, aa_file_perm_chrs, PERMS_CHRS_MASK, aa_file_perm_names, PERMS_NAMES_MASK); } - if (aad(sa)->denied) { + if (ad->denied) { audit_log_format(ab, "denied_mask="); - aa_audit_perm_mask(ab, aad(sa)->denied, aa_file_perm_chrs, + aa_audit_perm_mask(ab, ad->denied, aa_file_perm_chrs, PERMS_CHRS_MASK, aa_file_perm_names, PERMS_NAMES_MASK); } audit_log_format(ab, " peer="); - aa_label_xaudit(ab, labels_ns(aad(sa)->label), aad(sa)->peer, + aa_label_xaudit(ab, labels_ns(ad->label), ad->peer, FLAGS_NONE, GFP_ATOMIC); } @@ -350,21 +351,21 @@ void aa_profile_match_label(struct aa_profile *profile, /* currently unused */ int aa_profile_label_perm(struct aa_profile *profile, struct aa_profile *target, u32 request, int type, u32 *deny, - struct common_audit_data *sa) + struct apparmor_audit_data *ad) { struct aa_ruleset *rules = list_first_entry(&profile->rules, typeof(*rules), list); struct aa_perms perms; - aad(sa)->label = &profile->label; - aad(sa)->peer = &target->label; - aad(sa)->request = request; + ad->label = &profile->label; + ad->peer = &target->label; + ad->request = request; aa_profile_match_label(profile, rules, &target->label, type, request, &perms); aa_apply_modes_to_perms(profile, &perms); *deny |= request & perms.deny; - return aa_check_perms(profile, &perms, request, sa, aa_audit_perms_cb); + return aa_check_perms(profile, &perms, request, ad, aa_audit_perms_cb); } /** @@ -372,7 +373,7 @@ int aa_profile_label_perm(struct aa_profile *profile, struct aa_profile *target, * @profile: profile being checked * @perms: perms computed for the request * @request: requested perms - * @sa: initialized audit structure (MAY BE NULL if not auditing) + * @ad: initialized audit structure (MAY BE NULL if not auditing) * @cb: callback fn for type specific fields (MAY BE NULL) * * Returns: 0 if permission else error code @@ -385,7 +386,7 @@ int aa_profile_label_perm(struct aa_profile *profile, struct aa_profile *target, * with a positive value. */ int aa_check_perms(struct aa_profile *profile, struct aa_perms *perms, - u32 request, struct common_audit_data *sa, + u32 request, struct apparmor_audit_data *ad, void (*cb)(struct audit_buffer *, void *)) { int type, error; @@ -394,7 +395,7 @@ int aa_check_perms(struct aa_profile *profile, struct aa_perms *perms, if (likely(!denied)) { /* mask off perms that are not being force audited */ request &= perms->audit; - if (!request || !sa) + if (!request || !ad) return 0; type = AUDIT_APPARMOR_AUDIT; @@ -413,16 +414,16 @@ int aa_check_perms(struct aa_profile *profile, struct aa_perms *perms, error = -ENOENT; denied &= ~perms->quiet; - if (!sa || !denied) + if (!ad || !denied) return error; } - if (sa) { - aad(sa)->label = &profile->label; - aad(sa)->request = request; - aad(sa)->denied = denied; - aad(sa)->error = error; - aa_audit_msg(type, sa, cb); + if (ad) { + ad->label = &profile->label; + ad->request = request; + ad->denied = denied; + ad->error = error; + aa_audit_msg(type, ad, cb); } if (type == AUDIT_APPARMOR_ALLOWED) diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 108eccc5ada5..fd7852a4737c 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -662,7 +662,7 @@ static int apparmor_setprocattr(const char *name, void *value, char *command, *largs = NULL, *args = value; size_t arg_size; int error; - DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, AA_CLASS_NONE, + DEFINE_AUDIT_DATA(ad, LSM_AUDIT_DATA_NONE, AA_CLASS_NONE, OP_SETPROCATTR); if (size == 0) @@ -722,11 +722,11 @@ static int apparmor_setprocattr(const char *name, void *value, return error; fail: - aad(&sa)->label = begin_current_label_crit_section(); - aad(&sa)->info = name; - aad(&sa)->error = error = -EINVAL; - aa_audit_msg(AUDIT_APPARMOR_DENIED, &sa, NULL); - end_current_label_crit_section(aad(&sa)->label); + ad.label = begin_current_label_crit_section(); + ad.info = name; + ad.error = error = -EINVAL; + aa_audit_msg(AUDIT_APPARMOR_DENIED, &ad, NULL); + end_current_label_crit_section(ad.label); goto out; } diff --git a/security/apparmor/mount.c b/security/apparmor/mount.c index cdfa430ae216..3830bceff9c8 100644 --- a/security/apparmor/mount.c +++ b/security/apparmor/mount.c @@ -86,27 +86,28 @@ static void audit_mnt_flags(struct audit_buffer *ab, unsigned long flags) static void audit_cb(struct audit_buffer *ab, void *va) { struct common_audit_data *sa = va; + struct apparmor_audit_data *ad = aad(sa); - if (aad(sa)->mnt.type) { + if (ad->mnt.type) { audit_log_format(ab, " fstype="); - audit_log_untrustedstring(ab, aad(sa)->mnt.type); + audit_log_untrustedstring(ab, ad->mnt.type); } - if (aad(sa)->mnt.src_name) { + if (ad->mnt.src_name) { audit_log_format(ab, " srcname="); - audit_log_untrustedstring(ab, aad(sa)->mnt.src_name); + audit_log_untrustedstring(ab, ad->mnt.src_name); } - if (aad(sa)->mnt.trans) { + if (ad->mnt.trans) { audit_log_format(ab, " trans="); - audit_log_untrustedstring(ab, aad(sa)->mnt.trans); + audit_log_untrustedstring(ab, ad->mnt.trans); } - if (aad(sa)->mnt.flags) { + if (ad->mnt.flags) { audit_log_format(ab, " flags=\""); - audit_mnt_flags(ab, aad(sa)->mnt.flags); + audit_mnt_flags(ab, ad->mnt.flags); audit_log_format(ab, "\""); } - if (aad(sa)->mnt.data) { + if (ad->mnt.data) { audit_log_format(ab, " options="); - audit_log_untrustedstring(ab, aad(sa)->mnt.data); + audit_log_untrustedstring(ab, ad->mnt.data); } } @@ -134,7 +135,7 @@ static int audit_mount(struct aa_profile *profile, const char *op, struct aa_perms *perms, const char *info, int error) { int audit_type = AUDIT_APPARMOR_AUTO; - DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, AA_CLASS_MOUNT, op); + DEFINE_AUDIT_DATA(ad, LSM_AUDIT_DATA_NONE, AA_CLASS_MOUNT, op); if (likely(!error)) { u32 mask = perms->audit; @@ -165,17 +166,17 @@ static int audit_mount(struct aa_profile *profile, const char *op, return error; } - aad(&sa)->name = name; - aad(&sa)->mnt.src_name = src_name; - aad(&sa)->mnt.type = type; - aad(&sa)->mnt.trans = trans; - aad(&sa)->mnt.flags = flags; + ad.name = name; + ad.mnt.src_name = src_name; + ad.mnt.type = type; + ad.mnt.trans = trans; + ad.mnt.flags = flags; if (data && (perms->audit & AA_AUDIT_DATA)) - aad(&sa)->mnt.data = data; - aad(&sa)->info = info; - aad(&sa)->error = error; + ad.mnt.data = data; + ad.info = info; + ad.error = error; - return aa_audit(audit_type, profile, &sa, audit_cb); + return aa_audit(audit_type, profile, &ad, audit_cb); } /** diff --git a/security/apparmor/net.c b/security/apparmor/net.c index 788be1609a86..0c7304cd479c 100644 --- a/security/apparmor/net.c +++ b/security/apparmor/net.c @@ -71,6 +71,7 @@ static const char * const net_mask_names[] = { void audit_net_cb(struct audit_buffer *ab, void *va) { struct common_audit_data *sa = va; + struct apparmor_audit_data *ad = aad(sa); if (address_family_names[sa->u.net->family]) audit_log_format(ab, " family=\"%s\"", @@ -78,35 +79,36 @@ void audit_net_cb(struct audit_buffer *ab, void *va) else audit_log_format(ab, " family=\"unknown(%d)\"", sa->u.net->family); - if (sock_type_names[aad(sa)->net.type]) + if (sock_type_names[ad->net.type]) audit_log_format(ab, " sock_type=\"%s\"", - sock_type_names[aad(sa)->net.type]); + sock_type_names[ad->net.type]); else audit_log_format(ab, " sock_type=\"unknown(%d)\"", - aad(sa)->net.type); - audit_log_format(ab, " protocol=%d", aad(sa)->net.protocol); + ad->net.type); + audit_log_format(ab, " protocol=%d", ad->net.protocol); - if (aad(sa)->request & NET_PERMS_MASK) { + if (ad->request & NET_PERMS_MASK) { audit_log_format(ab, " requested_mask="); - aa_audit_perm_mask(ab, aad(sa)->request, NULL, 0, + aa_audit_perm_mask(ab, ad->request, NULL, 0, net_mask_names, NET_PERMS_MASK); - if (aad(sa)->denied & NET_PERMS_MASK) { + if (ad->denied & NET_PERMS_MASK) { audit_log_format(ab, " denied_mask="); - aa_audit_perm_mask(ab, aad(sa)->denied, NULL, 0, + aa_audit_perm_mask(ab, ad->denied, NULL, 0, net_mask_names, NET_PERMS_MASK); } } - if (aad(sa)->peer) { + if (ad->peer) { audit_log_format(ab, " peer="); - aa_label_xaudit(ab, labels_ns(aad(sa)->label), aad(sa)->peer, + aa_label_xaudit(ab, labels_ns(ad->label), ad->peer, FLAGS_NONE, GFP_ATOMIC); } } /* Generic af perm */ -int aa_profile_af_perm(struct aa_profile *profile, struct common_audit_data *sa, - u32 request, u16 family, int type) +int aa_profile_af_perm(struct aa_profile *profile, + struct apparmor_audit_data *ad, u32 request, u16 family, + int type) { struct aa_ruleset *rules = list_first_entry(&profile->rules, typeof(*rules), list); @@ -130,17 +132,17 @@ int aa_profile_af_perm(struct aa_profile *profile, struct common_audit_data *sa, perms = *aa_lookup_perms(&rules->policy, state); aa_apply_modes_to_perms(profile, &perms); - return aa_check_perms(profile, &perms, request, sa, audit_net_cb); + return aa_check_perms(profile, &perms, request, ad, audit_net_cb); } int aa_af_perm(struct aa_label *label, const char *op, u32 request, u16 family, int type, int protocol) { struct aa_profile *profile; - DEFINE_AUDIT_NET(sa, op, NULL, family, type, protocol); + DEFINE_AUDIT_NET(ad, op, NULL, family, type, protocol); return fn_for_each_confined(label, profile, - aa_profile_af_perm(profile, &sa, request, family, + aa_profile_af_perm(profile, &ad, request, family, type)); } @@ -155,10 +157,10 @@ static int aa_label_sk_perm(struct aa_label *label, const char *op, u32 request, if (ctx->label != kernel_t && !unconfined(label)) { struct aa_profile *profile; - DEFINE_AUDIT_SK(sa, op, sk); + DEFINE_AUDIT_SK(ad, op, sk); error = fn_for_each_confined(label, profile, - aa_profile_af_sk_perm(profile, &sa, request, sk)); + aa_profile_af_sk_perm(profile, &ad, request, sk)); } return error; @@ -214,7 +216,7 @@ static int apparmor_secmark_init(struct aa_secmark *secmark) } static int aa_secmark_perm(struct aa_profile *profile, u32 request, u32 secid, - struct common_audit_data *sa) + struct apparmor_audit_data *ad) { int i, ret; struct aa_perms perms = { }; @@ -245,17 +247,17 @@ static int aa_secmark_perm(struct aa_profile *profile, u32 request, u32 secid, aa_apply_modes_to_perms(profile, &perms); - return aa_check_perms(profile, &perms, request, sa, audit_net_cb); + return aa_check_perms(profile, &perms, request, ad, audit_net_cb); } int apparmor_secmark_check(struct aa_label *label, char *op, u32 request, u32 secid, const struct sock *sk) { struct aa_profile *profile; - DEFINE_AUDIT_SK(sa, op, sk); + DEFINE_AUDIT_SK(ad, op, sk); return fn_for_each_confined(label, profile, aa_secmark_perm(profile, request, secid, - &sa)); + &ad)); } #endif diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index b9aaaac84d8a..9a7dbe64f102 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -723,10 +723,11 @@ static int replacement_allowed(struct aa_profile *profile, int noreplace, static void audit_cb(struct audit_buffer *ab, void *va) { struct common_audit_data *sa = va; + struct apparmor_audit_data *ad = aad(sa); - if (aad(sa)->iface.ns) { + if (ad->iface.ns) { audit_log_format(ab, " ns="); - audit_log_untrustedstring(ab, aad(sa)->iface.ns); + audit_log_untrustedstring(ab, ad->iface.ns); } } @@ -745,15 +746,15 @@ static int audit_policy(struct aa_label *label, const char *op, const char *ns_name, const char *name, const char *info, int error) { - DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, AA_CLASS_NONE, op); + DEFINE_AUDIT_DATA(ad, LSM_AUDIT_DATA_NONE, AA_CLASS_NONE, op); - aad(&sa)->iface.ns = ns_name; - aad(&sa)->name = name; - aad(&sa)->info = info; - aad(&sa)->error = error; - aad(&sa)->label = label; + ad.iface.ns = ns_name; + ad.name = name; + ad.info = info; + ad.error = error; + ad.label = label; - aa_audit_msg(AUDIT_APPARMOR_STATUS, &sa, audit_cb); + aa_audit_msg(AUDIT_APPARMOR_STATUS, &ad, audit_cb); return error; } diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index b49201306753..dbc83455d900 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -34,17 +34,18 @@ static void audit_cb(struct audit_buffer *ab, void *va) { struct common_audit_data *sa = va; + struct apparmor_audit_data *ad = aad(sa); - if (aad(sa)->iface.ns) { + if (ad->iface.ns) { audit_log_format(ab, " ns="); - audit_log_untrustedstring(ab, aad(sa)->iface.ns); + audit_log_untrustedstring(ab, ad->iface.ns); } - if (aad(sa)->name) { + if (ad->name) { audit_log_format(ab, " name="); - audit_log_untrustedstring(ab, aad(sa)->name); + audit_log_untrustedstring(ab, ad->name); } - if (aad(sa)->iface.pos) - audit_log_format(ab, " offset=%ld", aad(sa)->iface.pos); + if (ad->iface.pos) + audit_log_format(ab, " offset=%ld", ad->iface.pos); } /** @@ -63,18 +64,18 @@ static int audit_iface(struct aa_profile *new, const char *ns_name, int error) { struct aa_profile *profile = labels_profile(aa_current_raw_label()); - DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, AA_CLASS_NONE, NULL); + DEFINE_AUDIT_DATA(ad, LSM_AUDIT_DATA_NONE, AA_CLASS_NONE, NULL); if (e) - aad(&sa)->iface.pos = e->pos - e->start; - aad(&sa)->iface.ns = ns_name; + ad.iface.pos = e->pos - e->start; + ad.iface.ns = ns_name; if (new) - aad(&sa)->name = new->base.hname; + ad.name = new->base.hname; else - aad(&sa)->name = name; - aad(&sa)->info = info; - aad(&sa)->error = error; + ad.name = name; + ad.info = info; + ad.error = error; - return aa_audit(AUDIT_APPARMOR_STATUS, profile, &sa, audit_cb); + return aa_audit(AUDIT_APPARMOR_STATUS, profile, &ad, audit_cb); } void __aa_loaddata_update(struct aa_loaddata *data, long revision) diff --git a/security/apparmor/resource.c b/security/apparmor/resource.c index 2bebc5d9e741..b6b5e1bfe9a2 100644 --- a/security/apparmor/resource.c +++ b/security/apparmor/resource.c @@ -30,12 +30,13 @@ struct aa_sfs_entry aa_sfs_entry_rlimit[] = { static void audit_cb(struct audit_buffer *ab, void *va) { struct common_audit_data *sa = va; + struct apparmor_audit_data *ad = aad(sa); audit_log_format(ab, " rlimit=%s value=%lu", - rlim_names[aad(sa)->rlim.rlim], aad(sa)->rlim.max); - if (aad(sa)->peer) { + rlim_names[ad->rlim.rlim], ad->rlim.max); + if (ad->peer) { audit_log_format(ab, " peer="); - aa_label_xaudit(ab, labels_ns(aad(sa)->label), aad(sa)->peer, + aa_label_xaudit(ab, labels_ns(ad->label), ad->peer, FLAGS_NONE, GFP_ATOMIC); } } @@ -49,22 +50,22 @@ static void audit_cb(struct audit_buffer *ab, void *va) * @info: info being auditing * @error: error value * - * Returns: 0 or sa->error else other error code on failure + * Returns: 0 or ad->error else other error code on failure */ static int audit_resource(struct aa_profile *profile, unsigned int resource, unsigned long value, struct aa_label *peer, const char *info, int error) { - DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, AA_CLASS_RLIMITS, + DEFINE_AUDIT_DATA(ad, LSM_AUDIT_DATA_NONE, AA_CLASS_RLIMITS, OP_SETRLIMIT); - aad(&sa)->rlim.rlim = resource; - aad(&sa)->rlim.max = value; - aad(&sa)->peer = peer; - aad(&sa)->info = info; - aad(&sa)->error = error; + ad.rlim.rlim = resource; + ad.rlim.max = value; + ad.peer = peer; + ad.info = info; + ad.error = error; - return aa_audit(AUDIT_APPARMOR_AUTO, profile, &sa, audit_cb); + return aa_audit(AUDIT_APPARMOR_AUTO, profile, &ad, audit_cb); } /** diff --git a/security/apparmor/task.c b/security/apparmor/task.c index 84d16a29bfcb..8bd1f212215c 100644 --- a/security/apparmor/task.c +++ b/security/apparmor/task.c @@ -208,18 +208,19 @@ static const char *audit_ptrace_mask(u32 mask) static void audit_ptrace_cb(struct audit_buffer *ab, void *va) { struct common_audit_data *sa = va; + struct apparmor_audit_data *ad = aad(sa); - if (aad(sa)->request & AA_PTRACE_PERM_MASK) { + if (ad->request & AA_PTRACE_PERM_MASK) { audit_log_format(ab, " requested_mask=\"%s\"", - audit_ptrace_mask(aad(sa)->request)); + audit_ptrace_mask(ad->request)); - if (aad(sa)->denied & AA_PTRACE_PERM_MASK) { + if (ad->denied & AA_PTRACE_PERM_MASK) { audit_log_format(ab, " denied_mask=\"%s\"", - audit_ptrace_mask(aad(sa)->denied)); + audit_ptrace_mask(ad->denied)); } } audit_log_format(ab, " peer="); - aa_label_xaudit(ab, labels_ns(aad(sa)->label), aad(sa)->peer, + aa_label_xaudit(ab, labels_ns(ad->label), ad->peer, FLAGS_NONE, GFP_ATOMIC); } @@ -227,51 +228,51 @@ static void audit_ptrace_cb(struct audit_buffer *ab, void *va) /* TODO: conditionals */ static int profile_ptrace_perm(struct aa_profile *profile, struct aa_label *peer, u32 request, - struct common_audit_data *sa) + struct apparmor_audit_data *ad) { struct aa_ruleset *rules = list_first_entry(&profile->rules, typeof(*rules), list); struct aa_perms perms = { }; - aad(sa)->peer = peer; + ad->peer = peer; aa_profile_match_label(profile, rules, peer, AA_CLASS_PTRACE, request, &perms); aa_apply_modes_to_perms(profile, &perms); - return aa_check_perms(profile, &perms, request, sa, audit_ptrace_cb); + return aa_check_perms(profile, &perms, request, ad, audit_ptrace_cb); } static int profile_tracee_perm(struct aa_profile *tracee, struct aa_label *tracer, u32 request, - struct common_audit_data *sa) + struct apparmor_audit_data *ad) { if (profile_unconfined(tracee) || unconfined(tracer) || !ANY_RULE_MEDIATES(&tracee->rules, AA_CLASS_PTRACE)) return 0; - return profile_ptrace_perm(tracee, tracer, request, sa); + return profile_ptrace_perm(tracee, tracer, request, ad); } static int profile_tracer_perm(struct aa_profile *tracer, struct aa_label *tracee, u32 request, - struct common_audit_data *sa) + struct apparmor_audit_data *ad) { if (profile_unconfined(tracer)) return 0; if (ANY_RULE_MEDIATES(&tracer->rules, AA_CLASS_PTRACE)) - return profile_ptrace_perm(tracer, tracee, request, sa); + return profile_ptrace_perm(tracer, tracee, request, ad); /* profile uses the old style capability check for ptrace */ if (&tracer->label == tracee) return 0; - aad(sa)->label = &tracer->label; - aad(sa)->peer = tracee; - aad(sa)->request = 0; - aad(sa)->error = aa_capable(&tracer->label, CAP_SYS_PTRACE, + ad->label = &tracer->label; + ad->peer = tracee; + ad->request = 0; + ad->error = aa_capable(&tracer->label, CAP_SYS_PTRACE, CAP_OPT_NONE); - return aa_audit(AUDIT_APPARMOR_AUTO, tracer, sa, audit_ptrace_cb); + return aa_audit(AUDIT_APPARMOR_AUTO, tracer, ad, audit_ptrace_cb); } /** From 30b3669d40ad2400dfac75d1250596b5b0cb241b Mon Sep 17 00:00:00 2001 From: John Johansen Date: Mon, 19 Sep 2022 00:46:09 -0700 Subject: [PATCH 0395/1397] apparmor: rename audit_data->label to audit_data->subj_label [ Upstream commit d20f5a1a6e792d22199c9989ec7ab9e95c48d60c ] rename audit_data's label field to subj_label to better reflect its use. Also at the same time drop unneeded assignments to ->subj_label as the later call to aa_check_perms will do the assignment if needed. Reviewed-by: Georgia Garcia Signed-off-by: John Johansen Stable-dep-of: 157a3537d6bc ("apparmor: Fix regression in mount mediation") Signed-off-by: Sasha Levin --- security/apparmor/audit.c | 6 +++--- security/apparmor/file.c | 2 +- security/apparmor/include/audit.h | 2 +- security/apparmor/ipc.c | 2 +- security/apparmor/lib.c | 5 ++--- security/apparmor/lsm.c | 4 ++-- security/apparmor/net.c | 2 +- security/apparmor/policy.c | 6 +++--- security/apparmor/resource.c | 2 +- security/apparmor/task.c | 4 ++-- 10 files changed, 17 insertions(+), 18 deletions(-) diff --git a/security/apparmor/audit.c b/security/apparmor/audit.c index 06ad6a8fcce1..6933cb2f679b 100644 --- a/security/apparmor/audit.c +++ b/security/apparmor/audit.c @@ -113,8 +113,8 @@ static void audit_pre(struct audit_buffer *ab, void *va) audit_log_format(ab, " error=%d", ad->error); } - if (ad->label) { - struct aa_label *label = ad->label; + if (ad->subj_label) { + struct aa_label *label = ad->subj_label; if (label_isprofile(label)) { struct aa_profile *profile = labels_profile(label); @@ -187,7 +187,7 @@ int aa_audit(int type, struct aa_profile *profile, if (KILL_MODE(profile) && type == AUDIT_APPARMOR_DENIED) type = AUDIT_APPARMOR_KILL; - ad->label = &profile->label; + ad->subj_label = &profile->label; aa_audit_msg(type, ad, cb); diff --git a/security/apparmor/file.c b/security/apparmor/file.c index 9ea95fa18e7d..5bfa70a97207 100644 --- a/security/apparmor/file.c +++ b/security/apparmor/file.c @@ -67,7 +67,7 @@ static void file_audit_cb(struct audit_buffer *ab, void *va) if (ad->peer) { audit_log_format(ab, " target="); - aa_label_xaudit(ab, labels_ns(ad->label), ad->peer, + aa_label_xaudit(ab, labels_ns(ad->subj_label), ad->peer, FLAG_VIEW_SUBNS, GFP_KERNEL); } else if (ad->fs.target) { audit_log_format(ab, " target="); diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h index 85931ec94e91..096f0a04af87 100644 --- a/security/apparmor/include/audit.h +++ b/security/apparmor/include/audit.h @@ -109,7 +109,7 @@ struct apparmor_audit_data { int type; u16 class; const char *op; - struct aa_label *label; + struct aa_label *subj_label; const char *name; const char *info; u32 request; diff --git a/security/apparmor/ipc.c b/security/apparmor/ipc.c index f198b8d620a4..fd8306399b82 100644 --- a/security/apparmor/ipc.c +++ b/security/apparmor/ipc.c @@ -71,7 +71,7 @@ static void audit_signal_cb(struct audit_buffer *ab, void *va) audit_log_format(ab, " signal=rtmin+%d", ad->signal - SIGRT_BASE); audit_log_format(ab, " peer="); - aa_label_xaudit(ab, labels_ns(ad->label), ad->peer, + aa_label_xaudit(ab, labels_ns(ad->subj_label), ad->peer, FLAGS_NONE, GFP_ATOMIC); } diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c index d6b2750fd72e..c87bccafff44 100644 --- a/security/apparmor/lib.c +++ b/security/apparmor/lib.c @@ -297,7 +297,7 @@ static void aa_audit_perms_cb(struct audit_buffer *ab, void *va) PERMS_NAMES_MASK); } audit_log_format(ab, " peer="); - aa_label_xaudit(ab, labels_ns(ad->label), ad->peer, + aa_label_xaudit(ab, labels_ns(ad->subj_label), ad->peer, FLAGS_NONE, GFP_ATOMIC); } @@ -357,7 +357,6 @@ int aa_profile_label_perm(struct aa_profile *profile, struct aa_profile *target, typeof(*rules), list); struct aa_perms perms; - ad->label = &profile->label; ad->peer = &target->label; ad->request = request; @@ -419,7 +418,7 @@ int aa_check_perms(struct aa_profile *profile, struct aa_perms *perms, } if (ad) { - ad->label = &profile->label; + ad->subj_label = &profile->label; ad->request = request; ad->denied = denied; ad->error = error; diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index fd7852a4737c..359fbfbb4a66 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -722,11 +722,11 @@ static int apparmor_setprocattr(const char *name, void *value, return error; fail: - ad.label = begin_current_label_crit_section(); + ad.subj_label = begin_current_label_crit_section(); ad.info = name; ad.error = error = -EINVAL; aa_audit_msg(AUDIT_APPARMOR_DENIED, &ad, NULL); - end_current_label_crit_section(ad.label); + end_current_label_crit_section(ad.subj_label); goto out; } diff --git a/security/apparmor/net.c b/security/apparmor/net.c index 0c7304cd479c..5e50f80e35db 100644 --- a/security/apparmor/net.c +++ b/security/apparmor/net.c @@ -100,7 +100,7 @@ void audit_net_cb(struct audit_buffer *ab, void *va) } if (ad->peer) { audit_log_format(ab, " peer="); - aa_label_xaudit(ab, labels_ns(ad->label), ad->peer, + aa_label_xaudit(ab, labels_ns(ad->subj_label), ad->peer, FLAGS_NONE, GFP_ATOMIC); } } diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index 9a7dbe64f102..e5f1ef83b0fd 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -733,7 +733,7 @@ static void audit_cb(struct audit_buffer *ab, void *va) /** * audit_policy - Do auditing of policy changes - * @label: label to check if it can manage policy + * @subj_label: label to check if it can manage policy * @op: policy operation being performed * @ns_name: name of namespace being manipulated * @name: name of profile being manipulated (NOT NULL) @@ -742,7 +742,7 @@ static void audit_cb(struct audit_buffer *ab, void *va) * * Returns: the error to be returned after audit is done */ -static int audit_policy(struct aa_label *label, const char *op, +static int audit_policy(struct aa_label *subj_label, const char *op, const char *ns_name, const char *name, const char *info, int error) { @@ -752,7 +752,7 @@ static int audit_policy(struct aa_label *label, const char *op, ad.name = name; ad.info = info; ad.error = error; - ad.label = label; + ad.subj_label = subj_label; aa_audit_msg(AUDIT_APPARMOR_STATUS, &ad, audit_cb); diff --git a/security/apparmor/resource.c b/security/apparmor/resource.c index b6b5e1bfe9a2..73ba26c646a5 100644 --- a/security/apparmor/resource.c +++ b/security/apparmor/resource.c @@ -36,7 +36,7 @@ static void audit_cb(struct audit_buffer *ab, void *va) rlim_names[ad->rlim.rlim], ad->rlim.max); if (ad->peer) { audit_log_format(ab, " peer="); - aa_label_xaudit(ab, labels_ns(ad->label), ad->peer, + aa_label_xaudit(ab, labels_ns(ad->subj_label), ad->peer, FLAGS_NONE, GFP_ATOMIC); } } diff --git a/security/apparmor/task.c b/security/apparmor/task.c index 8bd1f212215c..79850e832142 100644 --- a/security/apparmor/task.c +++ b/security/apparmor/task.c @@ -220,7 +220,7 @@ static void audit_ptrace_cb(struct audit_buffer *ab, void *va) } } audit_log_format(ab, " peer="); - aa_label_xaudit(ab, labels_ns(ad->label), ad->peer, + aa_label_xaudit(ab, labels_ns(ad->subj_label), ad->peer, FLAGS_NONE, GFP_ATOMIC); } @@ -266,7 +266,7 @@ static int profile_tracer_perm(struct aa_profile *tracer, if (&tracer->label == tracee) return 0; - ad->label = &tracer->label; + ad->subj_label = &tracer->label; ad->peer = tracee; ad->request = 0; ad->error = aa_capable(&tracer->label, CAP_SYS_PTRACE, From 690f33e1edf5cd996b54094409de0067ae3fa216 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Mon, 19 Sep 2022 20:48:48 -0700 Subject: [PATCH 0396/1397] apparmor: pass cred through to audit info. [ Upstream commit 90c436a64a6e20482a9a613c47eb4af2e8a5328e ] The cred is needed to properly audit some messages, and will be needed in the future for uid conditional mediation. So pass it through to where the apparmor_audit_data struct gets defined. Reviewed-by: Georgia Garcia Signed-off-by: John Johansen Stable-dep-of: 157a3537d6bc ("apparmor: Fix regression in mount mediation") Signed-off-by: Sasha Levin --- security/apparmor/apparmorfs.c | 11 ++- security/apparmor/capability.c | 5 +- security/apparmor/domain.c | 97 +++++++++++------- security/apparmor/file.c | 131 +++++++++++++++++-------- security/apparmor/include/audit.h | 1 + security/apparmor/include/capability.h | 3 +- security/apparmor/include/file.h | 17 ++-- security/apparmor/include/ipc.h | 4 +- security/apparmor/include/mount.h | 21 ++-- security/apparmor/include/net.h | 6 +- security/apparmor/include/policy.h | 9 +- security/apparmor/include/resource.h | 3 +- security/apparmor/include/task.h | 3 +- security/apparmor/ipc.c | 14 ++- security/apparmor/lsm.c | 85 +++++++++------- security/apparmor/mount.c | 85 ++++++++++------ security/apparmor/net.c | 17 ++-- security/apparmor/policy.c | 33 ++++--- security/apparmor/resource.c | 23 +++-- security/apparmor/task.c | 31 +++--- 20 files changed, 388 insertions(+), 211 deletions(-) diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index bd6a910f6528..261cef4c622f 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -423,7 +423,7 @@ static ssize_t policy_update(u32 mask, const char __user *buf, size_t size, /* high level check about policy management - fine grained in * below after unpack */ - error = aa_may_manage_policy(label, ns, mask); + error = aa_may_manage_policy(current_cred(), label, ns, mask); if (error) goto end_section; @@ -486,7 +486,8 @@ static ssize_t profile_remove(struct file *f, const char __user *buf, /* high level check about policy management - fine grained in * below after unpack */ - error = aa_may_manage_policy(label, ns, AA_MAY_REMOVE_POLICY); + error = aa_may_manage_policy(current_cred(), label, ns, + AA_MAY_REMOVE_POLICY); if (error) goto out; @@ -1805,7 +1806,8 @@ static int ns_mkdir_op(struct mnt_idmap *idmap, struct inode *dir, int error; label = begin_current_label_crit_section(); - error = aa_may_manage_policy(label, NULL, AA_MAY_LOAD_POLICY); + error = aa_may_manage_policy(current_cred(), label, NULL, + AA_MAY_LOAD_POLICY); end_current_label_crit_section(label); if (error) return error; @@ -1854,7 +1856,8 @@ static int ns_rmdir_op(struct inode *dir, struct dentry *dentry) int error; label = begin_current_label_crit_section(); - error = aa_may_manage_policy(label, NULL, AA_MAY_LOAD_POLICY); + error = aa_may_manage_policy(current_cred(), label, NULL, + AA_MAY_LOAD_POLICY); end_current_label_crit_section(label); if (error) return error; diff --git a/security/apparmor/capability.c b/security/apparmor/capability.c index 58490cca035d..2fb6a2ea0b99 100644 --- a/security/apparmor/capability.c +++ b/security/apparmor/capability.c @@ -140,6 +140,7 @@ static int profile_capable(struct aa_profile *profile, int cap, /** * aa_capable - test permission to use capability + * @subj_cread: cred we are testing capability against * @label: label being tested for capability (NOT NULL) * @cap: capability to be tested * @opts: CAP_OPT_NOAUDIT bit determines whether audit record is generated @@ -148,12 +149,14 @@ static int profile_capable(struct aa_profile *profile, int cap, * * Returns: 0 on success, or else an error code. */ -int aa_capable(struct aa_label *label, int cap, unsigned int opts) +int aa_capable(const struct cred *subj_cred, struct aa_label *label, + int cap, unsigned int opts) { struct aa_profile *profile; int error = 0; DEFINE_AUDIT_DATA(ad, LSM_AUDIT_DATA_CAP, AA_CLASS_CAP, OP_CAPABLE); + ad.subj_cred = subj_cred; ad.common.u.cap = cap; error = fn_for_each_confined(label, profile, profile_capable(profile, cap, opts, &ad)); diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index f3715cda59c5..543105cf7e33 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -31,6 +31,7 @@ /** * may_change_ptraced_domain - check if can change profile on ptraced task + * @cred: cred of task changing domain * @to_label: profile to change to (NOT NULL) * @info: message if there is an error * @@ -39,28 +40,34 @@ * * Returns: %0 or error if change not allowed */ -static int may_change_ptraced_domain(struct aa_label *to_label, +static int may_change_ptraced_domain(const struct cred *to_cred, + struct aa_label *to_label, const char **info) { struct task_struct *tracer; struct aa_label *tracerl = NULL; + const struct cred *tracer_cred = NULL; + int error = 0; rcu_read_lock(); tracer = ptrace_parent(current); - if (tracer) + if (tracer) { /* released below */ tracerl = aa_get_task_label(tracer); - + tracer_cred = get_task_cred(tracer); + } /* not ptraced */ if (!tracer || unconfined(tracerl)) goto out; - error = aa_may_ptrace(tracerl, to_label, PTRACE_MODE_ATTACH); + error = aa_may_ptrace(tracer_cred, tracerl, to_cred, to_label, + PTRACE_MODE_ATTACH); out: rcu_read_unlock(); aa_put_label(tracerl); + put_cred(tracer_cred); if (error) *info = "ptrace prevents transition"; @@ -619,7 +626,8 @@ static struct aa_label *x_to_label(struct aa_profile *profile, return new; } -static struct aa_label *profile_transition(struct aa_profile *profile, +static struct aa_label *profile_transition(const struct cred *subj_cred, + struct aa_profile *profile, const struct linux_binprm *bprm, char *buffer, struct path_cond *cond, bool *secure_exec) @@ -709,7 +717,8 @@ static struct aa_label *profile_transition(struct aa_profile *profile, } audit: - aa_audit_file(profile, &perms, OP_EXEC, MAY_EXEC, name, target, new, + aa_audit_file(subj_cred, profile, &perms, OP_EXEC, MAY_EXEC, name, + target, new, cond->uid, info, error); if (!new || nonewprivs) { aa_put_label(new); @@ -719,7 +728,8 @@ static struct aa_label *profile_transition(struct aa_profile *profile, return new; } -static int profile_onexec(struct aa_profile *profile, struct aa_label *onexec, +static int profile_onexec(const struct cred *subj_cred, + struct aa_profile *profile, struct aa_label *onexec, bool stack, const struct linux_binprm *bprm, char *buffer, struct path_cond *cond, bool *secure_exec) @@ -787,13 +797,15 @@ static int profile_onexec(struct aa_profile *profile, struct aa_label *onexec, } audit: - return aa_audit_file(profile, &perms, OP_EXEC, AA_MAY_ONEXEC, xname, + return aa_audit_file(subj_cred, profile, &perms, OP_EXEC, + AA_MAY_ONEXEC, xname, NULL, onexec, cond->uid, info, error); } /* ensure none ns domain transitions are correctly applied with onexec */ -static struct aa_label *handle_onexec(struct aa_label *label, +static struct aa_label *handle_onexec(const struct cred *subj_cred, + struct aa_label *label, struct aa_label *onexec, bool stack, const struct linux_binprm *bprm, char *buffer, struct path_cond *cond, @@ -810,26 +822,28 @@ static struct aa_label *handle_onexec(struct aa_label *label, if (!stack) { error = fn_for_each_in_ns(label, profile, - profile_onexec(profile, onexec, stack, + profile_onexec(subj_cred, profile, onexec, stack, bprm, buffer, cond, unsafe)); if (error) return ERR_PTR(error); new = fn_label_build_in_ns(label, profile, GFP_KERNEL, aa_get_newest_label(onexec), - profile_transition(profile, bprm, buffer, + profile_transition(subj_cred, profile, bprm, + buffer, cond, unsafe)); } else { /* TODO: determine how much we want to loosen this */ error = fn_for_each_in_ns(label, profile, - profile_onexec(profile, onexec, stack, bprm, + profile_onexec(subj_cred, profile, onexec, stack, bprm, buffer, cond, unsafe)); if (error) return ERR_PTR(error); new = fn_label_build_in_ns(label, profile, GFP_KERNEL, aa_label_merge(&profile->label, onexec, GFP_KERNEL), - profile_transition(profile, bprm, buffer, + profile_transition(subj_cred, profile, bprm, + buffer, cond, unsafe)); } @@ -838,7 +852,8 @@ static struct aa_label *handle_onexec(struct aa_label *label, /* TODO: get rid of GLOBAL_ROOT_UID */ error = fn_for_each_in_ns(label, profile, - aa_audit_file(profile, &nullperms, OP_CHANGE_ONEXEC, + aa_audit_file(subj_cred, profile, &nullperms, + OP_CHANGE_ONEXEC, AA_MAY_ONEXEC, bprm->filename, NULL, onexec, GLOBAL_ROOT_UID, "failed to build target label", -ENOMEM)); @@ -857,6 +872,7 @@ int apparmor_bprm_creds_for_exec(struct linux_binprm *bprm) { struct aa_task_ctx *ctx; struct aa_label *label, *new = NULL; + const struct cred *subj_cred; struct aa_profile *profile; char *buffer = NULL; const char *info = NULL; @@ -869,6 +885,7 @@ int apparmor_bprm_creds_for_exec(struct linux_binprm *bprm) file_inode(bprm->file)->i_mode }; + subj_cred = current_cred(); ctx = task_ctx(current); AA_BUG(!cred_label(bprm->cred)); AA_BUG(!ctx); @@ -895,11 +912,12 @@ int apparmor_bprm_creds_for_exec(struct linux_binprm *bprm) /* Test for onexec first as onexec override other x transitions. */ if (ctx->onexec) - new = handle_onexec(label, ctx->onexec, ctx->token, + new = handle_onexec(subj_cred, label, ctx->onexec, ctx->token, bprm, buffer, &cond, &unsafe); else new = fn_label_build(label, profile, GFP_KERNEL, - profile_transition(profile, bprm, buffer, + profile_transition(subj_cred, profile, bprm, + buffer, &cond, &unsafe)); AA_BUG(!new); @@ -934,7 +952,7 @@ int apparmor_bprm_creds_for_exec(struct linux_binprm *bprm) if (bprm->unsafe & (LSM_UNSAFE_PTRACE)) { /* TODO: test needs to be profile of label to new */ - error = may_change_ptraced_domain(new, &info); + error = may_change_ptraced_domain(bprm->cred, new, &info); if (error) goto audit; } @@ -971,7 +989,8 @@ int apparmor_bprm_creds_for_exec(struct linux_binprm *bprm) audit: error = fn_for_each(label, profile, - aa_audit_file(profile, &nullperms, OP_EXEC, MAY_EXEC, + aa_audit_file(current_cred(), profile, &nullperms, + OP_EXEC, MAY_EXEC, bprm->filename, NULL, new, vfsuid_into_kuid(vfsuid), info, error)); aa_put_label(new); @@ -987,7 +1006,8 @@ int apparmor_bprm_creds_for_exec(struct linux_binprm *bprm) * * Returns: label for hat transition OR ERR_PTR. Does NOT return NULL */ -static struct aa_label *build_change_hat(struct aa_profile *profile, +static struct aa_label *build_change_hat(const struct cred *subj_cred, + struct aa_profile *profile, const char *name, bool sibling) { struct aa_profile *root, *hat = NULL; @@ -1019,7 +1039,8 @@ static struct aa_label *build_change_hat(struct aa_profile *profile, aa_put_profile(root); audit: - aa_audit_file(profile, &nullperms, OP_CHANGE_HAT, AA_MAY_CHANGEHAT, + aa_audit_file(subj_cred, profile, &nullperms, OP_CHANGE_HAT, + AA_MAY_CHANGEHAT, name, hat ? hat->base.hname : NULL, hat ? &hat->label : NULL, GLOBAL_ROOT_UID, info, error); @@ -1035,7 +1056,8 @@ static struct aa_label *build_change_hat(struct aa_profile *profile, * * Returns: label for hat transition or ERR_PTR. Does not return NULL */ -static struct aa_label *change_hat(struct aa_label *label, const char *hats[], +static struct aa_label *change_hat(const struct cred *subj_cred, + struct aa_label *label, const char *hats[], int count, int flags) { struct aa_profile *profile, *root, *hat = NULL; @@ -1111,7 +1133,8 @@ static struct aa_label *change_hat(struct aa_label *label, const char *hats[], */ /* TODO: get rid of GLOBAL_ROOT_UID */ if (count > 1 || COMPLAIN_MODE(profile)) { - aa_audit_file(profile, &nullperms, OP_CHANGE_HAT, + aa_audit_file(subj_cred, profile, &nullperms, + OP_CHANGE_HAT, AA_MAY_CHANGEHAT, name, NULL, NULL, GLOBAL_ROOT_UID, info, error); } @@ -1120,7 +1143,8 @@ static struct aa_label *change_hat(struct aa_label *label, const char *hats[], build: new = fn_label_build_in_ns(label, profile, GFP_KERNEL, - build_change_hat(profile, name, sibling), + build_change_hat(subj_cred, profile, name, + sibling), aa_get_label(&profile->label)); if (!new) { info = "label build failed"; @@ -1150,7 +1174,7 @@ static struct aa_label *change_hat(struct aa_label *label, const char *hats[], */ int aa_change_hat(const char *hats[], int count, u64 token, int flags) { - const struct cred *cred; + const struct cred *subj_cred; struct aa_task_ctx *ctx = task_ctx(current); struct aa_label *label, *previous, *new = NULL, *target = NULL; struct aa_profile *profile; @@ -1159,8 +1183,8 @@ int aa_change_hat(const char *hats[], int count, u64 token, int flags) int error = 0; /* released below */ - cred = get_current_cred(); - label = aa_get_newest_cred_label(cred); + subj_cred = get_current_cred(); + label = aa_get_newest_cred_label(subj_cred); previous = aa_get_newest_label(ctx->previous); /* @@ -1180,7 +1204,7 @@ int aa_change_hat(const char *hats[], int count, u64 token, int flags) } if (count) { - new = change_hat(label, hats, count, flags); + new = change_hat(subj_cred, label, hats, count, flags); AA_BUG(!new); if (IS_ERR(new)) { error = PTR_ERR(new); @@ -1189,7 +1213,8 @@ int aa_change_hat(const char *hats[], int count, u64 token, int flags) goto out; } - error = may_change_ptraced_domain(new, &info); + /* target cred is the same as current except new label */ + error = may_change_ptraced_domain(subj_cred, new, &info); if (error) goto fail; @@ -1242,7 +1267,7 @@ int aa_change_hat(const char *hats[], int count, u64 token, int flags) aa_put_label(new); aa_put_label(previous); aa_put_label(label); - put_cred(cred); + put_cred(subj_cred); return error; @@ -1252,7 +1277,7 @@ int aa_change_hat(const char *hats[], int count, u64 token, int flags) fail: fn_for_each_in_ns(label, profile, - aa_audit_file(profile, &perms, OP_CHANGE_HAT, + aa_audit_file(subj_cred, profile, &perms, OP_CHANGE_HAT, AA_MAY_CHANGEHAT, NULL, NULL, target, GLOBAL_ROOT_UID, info, error)); @@ -1261,6 +1286,7 @@ int aa_change_hat(const char *hats[], int count, u64 token, int flags) static int change_profile_perms_wrapper(const char *op, const char *name, + const struct cred *subj_cred, struct aa_profile *profile, struct aa_label *target, bool stack, u32 request, struct aa_perms *perms) @@ -1275,7 +1301,8 @@ static int change_profile_perms_wrapper(const char *op, const char *name, rules->file.start[AA_CLASS_FILE], perms); if (error) - error = aa_audit_file(profile, perms, op, request, name, + error = aa_audit_file(subj_cred, profile, perms, op, request, + name, NULL, target, GLOBAL_ROOT_UID, info, error); @@ -1304,6 +1331,7 @@ int aa_change_profile(const char *fqname, int flags) const char *auditname = fqname; /* retain leading & if stack */ bool stack = flags & AA_CHANGE_STACK; struct aa_task_ctx *ctx = task_ctx(current); + const struct cred *subj_cred = get_current_cred(); int error = 0; char *op; u32 request; @@ -1381,6 +1409,7 @@ int aa_change_profile(const char *fqname, int flags) */ error = fn_for_each_in_ns(label, profile, change_profile_perms_wrapper(op, auditname, + subj_cred, profile, target, stack, request, &perms)); if (error) @@ -1391,7 +1420,7 @@ int aa_change_profile(const char *fqname, int flags) check: /* check if tracing task is allowed to trace target domain */ - error = may_change_ptraced_domain(target, &info); + error = may_change_ptraced_domain(subj_cred, target, &info); if (error && !fn_for_each_in_ns(label, profile, COMPLAIN_MODE(profile))) goto audit; @@ -1451,7 +1480,8 @@ int aa_change_profile(const char *fqname, int flags) audit: error = fn_for_each_in_ns(label, profile, - aa_audit_file(profile, &perms, op, request, auditname, + aa_audit_file(subj_cred, + profile, &perms, op, request, auditname, NULL, new ? new : target, GLOBAL_ROOT_UID, info, error)); @@ -1459,6 +1489,7 @@ int aa_change_profile(const char *fqname, int flags) aa_put_label(new); aa_put_label(target); aa_put_label(label); + put_cred(subj_cred); return error; } diff --git a/security/apparmor/file.c b/security/apparmor/file.c index 5bfa70a97207..6fd21324a097 100644 --- a/security/apparmor/file.c +++ b/security/apparmor/file.c @@ -45,7 +45,7 @@ static void file_audit_cb(struct audit_buffer *ab, void *va) { struct common_audit_data *sa = va; struct apparmor_audit_data *ad = aad(sa); - kuid_t fsuid = current_fsuid(); + kuid_t fsuid = ad->subj_cred ? ad->subj_cred->fsuid : current_fsuid(); char str[10]; if (ad->request & AA_AUDIT_FILE_MASK) { @@ -77,6 +77,7 @@ static void file_audit_cb(struct audit_buffer *ab, void *va) /** * aa_audit_file - handle the auditing of file operations + * @subj_cred: cred of the subject * @profile: the profile being enforced (NOT NULL) * @perms: the permissions computed for the request (NOT NULL) * @op: operation being mediated @@ -90,7 +91,8 @@ static void file_audit_cb(struct audit_buffer *ab, void *va) * * Returns: %0 or error on failure */ -int aa_audit_file(struct aa_profile *profile, struct aa_perms *perms, +int aa_audit_file(const struct cred *subj_cred, + struct aa_profile *profile, struct aa_perms *perms, const char *op, u32 request, const char *name, const char *target, struct aa_label *tlabel, kuid_t ouid, const char *info, int error) @@ -98,6 +100,7 @@ int aa_audit_file(struct aa_profile *profile, struct aa_perms *perms, int type = AUDIT_APPARMOR_AUTO; DEFINE_AUDIT_DATA(ad, LSM_AUDIT_DATA_TASK, AA_CLASS_FILE, op); + ad.subj_cred = subj_cred; ad.request = request; ad.name = name; ad.fs.target = target; @@ -141,7 +144,21 @@ int aa_audit_file(struct aa_profile *profile, struct aa_perms *perms, return aa_audit(type, profile, &ad, file_audit_cb); } -static int path_name(const char *op, struct aa_label *label, +/** + * is_deleted - test if a file has been completely unlinked + * @dentry: dentry of file to test for deletion (NOT NULL) + * + * Returns: true if deleted else false + */ +static inline bool is_deleted(struct dentry *dentry) +{ + if (d_unlinked(dentry) && d_backing_inode(dentry)->i_nlink == 0) + return true; + return false; +} + +static int path_name(const char *op, const struct cred *subj_cred, + struct aa_label *label, const struct path *path, int flags, char *buffer, const char **name, struct path_cond *cond, u32 request) { @@ -153,7 +170,8 @@ static int path_name(const char *op, struct aa_label *label, labels_profile(label)->disconnected); if (error) { fn_for_each_confined(label, profile, - aa_audit_file(profile, &nullperms, op, request, *name, + aa_audit_file(subj_cred, + profile, &nullperms, op, request, *name, NULL, NULL, cond->uid, info, error)); return error; } @@ -207,9 +225,9 @@ aa_state_t aa_str_perms(struct aa_policydb *file_rules, aa_state_t start, return state; } -static int __aa_path_perm(const char *op, struct aa_profile *profile, - const char *name, u32 request, - struct path_cond *cond, int flags, +static int __aa_path_perm(const char *op, const struct cred *subj_cred, + struct aa_profile *profile, const char *name, + u32 request, struct path_cond *cond, int flags, struct aa_perms *perms) { struct aa_ruleset *rules = list_first_entry(&profile->rules, @@ -222,12 +240,14 @@ static int __aa_path_perm(const char *op, struct aa_profile *profile, name, cond, perms); if (request & ~perms->allow) e = -EACCES; - return aa_audit_file(profile, perms, op, request, name, NULL, NULL, + return aa_audit_file(subj_cred, + profile, perms, op, request, name, NULL, NULL, cond->uid, NULL, e); } -static int profile_path_perm(const char *op, struct aa_profile *profile, +static int profile_path_perm(const char *op, const struct cred *subj_cred, + struct aa_profile *profile, const struct path *path, char *buffer, u32 request, struct path_cond *cond, int flags, struct aa_perms *perms) @@ -238,18 +258,19 @@ static int profile_path_perm(const char *op, struct aa_profile *profile, if (profile_unconfined(profile)) return 0; - error = path_name(op, &profile->label, path, + error = path_name(op, subj_cred, &profile->label, path, flags | profile->path_flags, buffer, &name, cond, request); if (error) return error; - return __aa_path_perm(op, profile, name, request, cond, flags, - perms); + return __aa_path_perm(op, subj_cred, profile, name, request, cond, + flags, perms); } /** * aa_path_perm - do permissions check & audit for @path * @op: operation being checked + * @subj_cred: subject cred * @label: profile being enforced (NOT NULL) * @path: path to check permissions of (NOT NULL) * @flags: any additional path flags beyond what the profile specifies @@ -258,7 +279,8 @@ static int profile_path_perm(const char *op, struct aa_profile *profile, * * Returns: %0 else error if access denied or other error */ -int aa_path_perm(const char *op, struct aa_label *label, +int aa_path_perm(const char *op, const struct cred *subj_cred, + struct aa_label *label, const struct path *path, int flags, u32 request, struct path_cond *cond) { @@ -273,8 +295,8 @@ int aa_path_perm(const char *op, struct aa_label *label, if (!buffer) return -ENOMEM; error = fn_for_each_confined(label, profile, - profile_path_perm(op, profile, path, buffer, request, - cond, flags, &perms)); + profile_path_perm(op, subj_cred, profile, path, buffer, + request, cond, flags, &perms)); aa_put_buffer(buffer); @@ -301,7 +323,8 @@ static inline bool xindex_is_subset(u32 link, u32 target) return true; } -static int profile_path_link(struct aa_profile *profile, +static int profile_path_link(const struct cred *subj_cred, + struct aa_profile *profile, const struct path *link, char *buffer, const struct path *target, char *buffer2, struct path_cond *cond) @@ -315,13 +338,15 @@ static int profile_path_link(struct aa_profile *profile, aa_state_t state; int error; - error = path_name(OP_LINK, &profile->label, link, profile->path_flags, + error = path_name(OP_LINK, subj_cred, &profile->label, link, + profile->path_flags, buffer, &lname, cond, AA_MAY_LINK); if (error) goto audit; /* buffer2 freed below, tname is pointer in buffer2 */ - error = path_name(OP_LINK, &profile->label, target, profile->path_flags, + error = path_name(OP_LINK, subj_cred, &profile->label, target, + profile->path_flags, buffer2, &tname, cond, AA_MAY_LINK); if (error) goto audit; @@ -381,12 +406,14 @@ static int profile_path_link(struct aa_profile *profile, error = 0; audit: - return aa_audit_file(profile, &lperms, OP_LINK, request, lname, tname, + return aa_audit_file(subj_cred, + profile, &lperms, OP_LINK, request, lname, tname, NULL, cond->uid, info, error); } /** * aa_path_link - Handle hard link permission check + * @subj_cred: subject cred * @label: the label being enforced (NOT NULL) * @old_dentry: the target dentry (NOT NULL) * @new_dir: directory the new link will be created in (NOT NULL) @@ -403,7 +430,8 @@ static int profile_path_link(struct aa_profile *profile, * * Returns: %0 if allowed else error */ -int aa_path_link(struct aa_label *label, struct dentry *old_dentry, +int aa_path_link(const struct cred *subj_cred, + struct aa_label *label, struct dentry *old_dentry, const struct path *new_dir, struct dentry *new_dentry) { struct path link = { .mnt = new_dir->mnt, .dentry = new_dentry }; @@ -424,8 +452,8 @@ int aa_path_link(struct aa_label *label, struct dentry *old_dentry, goto out; error = fn_for_each_confined(label, profile, - profile_path_link(profile, &link, buffer, &target, - buffer2, &cond)); + profile_path_link(subj_cred, profile, &link, buffer, + &target, buffer2, &cond)); out: aa_put_buffer(buffer); aa_put_buffer(buffer2); @@ -453,7 +481,8 @@ static void update_file_ctx(struct aa_file_ctx *fctx, struct aa_label *label, spin_unlock(&fctx->lock); } -static int __file_path_perm(const char *op, struct aa_label *label, +static int __file_path_perm(const char *op, const struct cred *subj_cred, + struct aa_label *label, struct aa_label *flabel, struct file *file, u32 request, u32 denied, bool in_atomic) { @@ -480,7 +509,8 @@ static int __file_path_perm(const char *op, struct aa_label *label, /* check every profile in task label not in current cache */ error = fn_for_each_not_in_set(flabel, label, profile, - profile_path_perm(op, profile, &file->f_path, buffer, + profile_path_perm(op, subj_cred, profile, + &file->f_path, buffer, request, &cond, flags, &perms)); if (denied && !error) { /* @@ -493,12 +523,14 @@ static int __file_path_perm(const char *op, struct aa_label *label, */ if (label == flabel) error = fn_for_each(label, profile, - profile_path_perm(op, profile, &file->f_path, + profile_path_perm(op, subj_cred, + profile, &file->f_path, buffer, request, &cond, flags, &perms)); else error = fn_for_each_not_in_set(label, flabel, profile, - profile_path_perm(op, profile, &file->f_path, + profile_path_perm(op, subj_cred, + profile, &file->f_path, buffer, request, &cond, flags, &perms)); } @@ -510,7 +542,8 @@ static int __file_path_perm(const char *op, struct aa_label *label, return error; } -static int __file_sock_perm(const char *op, struct aa_label *label, +static int __file_sock_perm(const char *op, const struct cred *subj_cred, + struct aa_label *label, struct aa_label *flabel, struct file *file, u32 request, u32 denied) { @@ -524,11 +557,12 @@ static int __file_sock_perm(const char *op, struct aa_label *label, return 0; /* TODO: improve to skip profiles cached in flabel */ - error = aa_sock_file_perm(label, op, request, sock); + error = aa_sock_file_perm(subj_cred, label, op, request, sock); if (denied) { /* TODO: improve to skip profiles checked above */ /* check every profile in file label to is cached */ - last_error(error, aa_sock_file_perm(flabel, op, request, sock)); + last_error(error, aa_sock_file_perm(subj_cred, flabel, op, + request, sock)); } if (!error) update_file_ctx(file_ctx(file), label, request); @@ -539,6 +573,7 @@ static int __file_sock_perm(const char *op, struct aa_label *label, /** * aa_file_perm - do permission revalidation check & audit for @file * @op: operation being checked + * @subj_cred: subject cred * @label: label being enforced (NOT NULL) * @file: file to revalidate access permissions on (NOT NULL) * @request: requested permissions @@ -546,7 +581,8 @@ static int __file_sock_perm(const char *op, struct aa_label *label, * * Returns: %0 if access allowed else error */ -int aa_file_perm(const char *op, struct aa_label *label, struct file *file, +int aa_file_perm(const char *op, const struct cred *subj_cred, + struct aa_label *label, struct file *file, u32 request, bool in_atomic) { struct aa_file_ctx *fctx; @@ -582,19 +618,19 @@ int aa_file_perm(const char *op, struct aa_label *label, struct file *file, /* TODO: label cross check */ if (file->f_path.mnt && path_mediated_fs(file->f_path.dentry)) - error = __file_path_perm(op, label, flabel, file, request, - denied, in_atomic); + error = __file_path_perm(op, subj_cred, label, flabel, file, + request, denied, in_atomic); else if (S_ISSOCK(file_inode(file)->i_mode)) - error = __file_sock_perm(op, label, flabel, file, request, - denied); + error = __file_sock_perm(op, subj_cred, label, flabel, file, + request, denied); aa_put_label(flabel); done: return error; } -static void revalidate_tty(struct aa_label *label) +static void revalidate_tty(const struct cred *subj_cred, struct aa_label *label) { struct tty_struct *tty; int drop_tty = 0; @@ -612,8 +648,8 @@ static void revalidate_tty(struct aa_label *label) struct tty_file_private, list); file = file_priv->file; - if (aa_file_perm(OP_INHERIT, label, file, MAY_READ | MAY_WRITE, - IN_ATOMIC)) + if (aa_file_perm(OP_INHERIT, subj_cred, label, file, + MAY_READ | MAY_WRITE, IN_ATOMIC)) drop_tty = 1; } spin_unlock(&tty->files_lock); @@ -623,12 +659,17 @@ static void revalidate_tty(struct aa_label *label) no_tty(); } +struct cred_label { + const struct cred *cred; + struct aa_label *label; +}; + static int match_file(const void *p, struct file *file, unsigned int fd) { - struct aa_label *label = (struct aa_label *)p; + struct cred_label *cl = (struct cred_label *)p; - if (aa_file_perm(OP_INHERIT, label, file, aa_map_file_to_perms(file), - IN_ATOMIC)) + if (aa_file_perm(OP_INHERIT, cl->cred, cl->label, file, + aa_map_file_to_perms(file), IN_ATOMIC)) return fd + 1; return 0; } @@ -638,13 +679,17 @@ static int match_file(const void *p, struct file *file, unsigned int fd) void aa_inherit_files(const struct cred *cred, struct files_struct *files) { struct aa_label *label = aa_get_newest_cred_label(cred); + struct cred_label cl = { + .cred = cred, + .label = label, + }; struct file *devnull = NULL; unsigned int n; - revalidate_tty(label); + revalidate_tty(cred, label); /* Revalidate access to inherited open files. */ - n = iterate_fd(files, 0, match_file, label); + n = iterate_fd(files, 0, match_file, &cl); if (!n) /* none found? */ goto out; @@ -654,7 +699,7 @@ void aa_inherit_files(const struct cred *cred, struct files_struct *files) /* replace all the matching ones with this */ do { replace_fd(n - 1, devnull, 0); - } while ((n = iterate_fd(files, n, match_file, label)) != 0); + } while ((n = iterate_fd(files, n, match_file, &cl)) != 0); if (devnull) fput(devnull); out: diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h index 096f0a04af87..42d701fec5a6 100644 --- a/security/apparmor/include/audit.h +++ b/security/apparmor/include/audit.h @@ -109,6 +109,7 @@ struct apparmor_audit_data { int type; u16 class; const char *op; + const struct cred *subj_cred; struct aa_label *subj_label; const char *name; const char *info; diff --git a/security/apparmor/include/capability.h b/security/apparmor/include/capability.h index d420e2d10b31..d6dcc604ec0c 100644 --- a/security/apparmor/include/capability.h +++ b/security/apparmor/include/capability.h @@ -36,7 +36,8 @@ struct aa_caps { extern struct aa_sfs_entry aa_sfs_entry_caps[]; -int aa_capable(struct aa_label *label, int cap, unsigned int opts); +int aa_capable(const struct cred *subj_cred, struct aa_label *label, + int cap, unsigned int opts); static inline void aa_free_cap_rules(struct aa_caps *caps) { diff --git a/security/apparmor/include/file.h b/security/apparmor/include/file.h index 5be620af33ba..64dc6d1a7a05 100644 --- a/security/apparmor/include/file.h +++ b/security/apparmor/include/file.h @@ -108,7 +108,8 @@ struct path_cond { #define COMBINED_PERM_MASK(X) ((X).allow | (X).audit | (X).quiet | (X).kill) -int aa_audit_file(struct aa_profile *profile, struct aa_perms *perms, +int aa_audit_file(const struct cred *cred, + struct aa_profile *profile, struct aa_perms *perms, const char *op, u32 request, const char *name, const char *target, struct aa_label *tlabel, kuid_t ouid, const char *info, int error); @@ -119,14 +120,16 @@ aa_state_t aa_str_perms(struct aa_policydb *file_rules, aa_state_t start, const char *name, struct path_cond *cond, struct aa_perms *perms); -int aa_path_perm(const char *op, struct aa_label *label, - const struct path *path, int flags, u32 request, - struct path_cond *cond); +int aa_path_perm(const char *op, const struct cred *subj_cred, + struct aa_label *label, const struct path *path, + int flags, u32 request, struct path_cond *cond); -int aa_path_link(struct aa_label *label, struct dentry *old_dentry, - const struct path *new_dir, struct dentry *new_dentry); +int aa_path_link(const struct cred *subj_cred, struct aa_label *label, + struct dentry *old_dentry, const struct path *new_dir, + struct dentry *new_dentry); -int aa_file_perm(const char *op, struct aa_label *label, struct file *file, +int aa_file_perm(const char *op, const struct cred *subj_cred, + struct aa_label *label, struct file *file, u32 request, bool in_atomic); void aa_inherit_files(const struct cred *cred, struct files_struct *files); diff --git a/security/apparmor/include/ipc.h b/security/apparmor/include/ipc.h index a1ac6ffb95e9..74d17052f76b 100644 --- a/security/apparmor/include/ipc.h +++ b/security/apparmor/include/ipc.h @@ -13,6 +13,8 @@ #include -int aa_may_signal(struct aa_label *sender, struct aa_label *target, int sig); +int aa_may_signal(const struct cred *subj_cred, struct aa_label *sender, + const struct cred *target_cred, struct aa_label *target, + int sig); #endif /* __AA_IPC_H */ diff --git a/security/apparmor/include/mount.h b/security/apparmor/include/mount.h index a710683b2496..10c76f906a65 100644 --- a/security/apparmor/include/mount.h +++ b/security/apparmor/include/mount.h @@ -25,26 +25,33 @@ #define AA_MS_IGNORE_MASK (MS_KERNMOUNT | MS_NOSEC | MS_ACTIVE | MS_BORN) -int aa_remount(struct aa_label *label, const struct path *path, +int aa_remount(const struct cred *subj_cred, + struct aa_label *label, const struct path *path, unsigned long flags, void *data); -int aa_bind_mount(struct aa_label *label, const struct path *path, +int aa_bind_mount(const struct cred *subj_cred, + struct aa_label *label, const struct path *path, const char *old_name, unsigned long flags); -int aa_mount_change_type(struct aa_label *label, const struct path *path, +int aa_mount_change_type(const struct cred *subj_cred, + struct aa_label *label, const struct path *path, unsigned long flags); -int aa_move_mount(struct aa_label *label, const struct path *path, +int aa_move_mount(const struct cred *subj_cred, + struct aa_label *label, const struct path *path, const char *old_name); -int aa_new_mount(struct aa_label *label, const char *dev_name, +int aa_new_mount(const struct cred *subj_cred, + struct aa_label *label, const char *dev_name, const struct path *path, const char *type, unsigned long flags, void *data); -int aa_umount(struct aa_label *label, struct vfsmount *mnt, int flags); +int aa_umount(const struct cred *subj_cred, + struct aa_label *label, struct vfsmount *mnt, int flags); -int aa_pivotroot(struct aa_label *label, const struct path *old_path, +int aa_pivotroot(const struct cred *subj_cred, + struct aa_label *label, const struct path *old_path, const struct path *new_path); #endif /* __AA_MOUNT_H */ diff --git a/security/apparmor/include/net.h b/security/apparmor/include/net.h index a336e57864e8..aa8515af677f 100644 --- a/security/apparmor/include/net.h +++ b/security/apparmor/include/net.h @@ -93,7 +93,8 @@ void audit_net_cb(struct audit_buffer *ab, void *va); int aa_profile_af_perm(struct aa_profile *profile, struct apparmor_audit_data *ad, u32 request, u16 family, int type); -int aa_af_perm(struct aa_label *label, const char *op, u32 request, u16 family, +int aa_af_perm(const struct cred *subj_cred, struct aa_label *label, + const char *op, u32 request, u16 family, int type, int protocol); static inline int aa_profile_af_sk_perm(struct aa_profile *profile, struct apparmor_audit_data *ad, @@ -105,7 +106,8 @@ static inline int aa_profile_af_sk_perm(struct aa_profile *profile, } int aa_sk_perm(const char *op, u32 request, struct sock *sk); -int aa_sock_file_perm(struct aa_label *label, const char *op, u32 request, +int aa_sock_file_perm(const struct cred *subj_cred, struct aa_label *label, + const char *op, u32 request, struct socket *sock); int apparmor_secmark_check(struct aa_label *label, char *op, u32 request, diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h index 545f791cabda..fa15a5c7febb 100644 --- a/security/apparmor/include/policy.h +++ b/security/apparmor/include/policy.h @@ -370,9 +370,12 @@ static inline int AUDIT_MODE(struct aa_profile *profile) return profile->audit; } -bool aa_policy_view_capable(struct aa_label *label, struct aa_ns *ns); -bool aa_policy_admin_capable(struct aa_label *label, struct aa_ns *ns); -int aa_may_manage_policy(struct aa_label *label, struct aa_ns *ns, +bool aa_policy_view_capable(const struct cred *subj_cred, + struct aa_label *label, struct aa_ns *ns); +bool aa_policy_admin_capable(const struct cred *subj_cred, + struct aa_label *label, struct aa_ns *ns); +int aa_may_manage_policy(const struct cred *subj_cred, + struct aa_label *label, struct aa_ns *ns, u32 mask); bool aa_current_policy_view_capable(struct aa_ns *ns); bool aa_current_policy_admin_capable(struct aa_ns *ns); diff --git a/security/apparmor/include/resource.h b/security/apparmor/include/resource.h index 961d85d328ea..ad2c0da8e64f 100644 --- a/security/apparmor/include/resource.h +++ b/security/apparmor/include/resource.h @@ -33,7 +33,8 @@ struct aa_rlimit { extern struct aa_sfs_entry aa_sfs_entry_rlimit[]; int aa_map_resource(int resource); -int aa_task_setrlimit(struct aa_label *label, struct task_struct *task, +int aa_task_setrlimit(const struct cred *subj_cred, struct aa_label *label, + struct task_struct *task, unsigned int resource, struct rlimit *new_rlim); void __aa_transition_rlimits(struct aa_label *old, struct aa_label *new); diff --git a/security/apparmor/include/task.h b/security/apparmor/include/task.h index 13437d62c70f..29ba55107b7d 100644 --- a/security/apparmor/include/task.h +++ b/security/apparmor/include/task.h @@ -91,7 +91,8 @@ static inline void aa_clear_task_ctx_trans(struct aa_task_ctx *ctx) "segv usr2 pipe alrm term stkflt chld cont stop stp ttin ttou urg " \ "xcpu xfsz vtalrm prof winch io pwr sys emt lost" -int aa_may_ptrace(struct aa_label *tracer, struct aa_label *tracee, +int aa_may_ptrace(const struct cred *tracer_cred, struct aa_label *tracer, + const struct cred *tracee_cred, struct aa_label *tracee, u32 request); diff --git a/security/apparmor/ipc.c b/security/apparmor/ipc.c index fd8306399b82..c0d0dbd7b4c4 100644 --- a/security/apparmor/ipc.c +++ b/security/apparmor/ipc.c @@ -75,7 +75,8 @@ static void audit_signal_cb(struct audit_buffer *ab, void *va) FLAGS_NONE, GFP_ATOMIC); } -static int profile_signal_perm(struct aa_profile *profile, +static int profile_signal_perm(const struct cred *cred, + struct aa_profile *profile, struct aa_label *peer, u32 request, struct apparmor_audit_data *ad) { @@ -88,6 +89,7 @@ static int profile_signal_perm(struct aa_profile *profile, !ANY_RULE_MEDIATES(&profile->rules, AA_CLASS_SIGNAL)) return 0; + ad->subj_cred = cred; ad->peer = peer; /* TODO: secondary cache check */ state = aa_dfa_next(rules->policy.dfa, @@ -98,7 +100,9 @@ static int profile_signal_perm(struct aa_profile *profile, return aa_check_perms(profile, &perms, request, ad, audit_signal_cb); } -int aa_may_signal(struct aa_label *sender, struct aa_label *target, int sig) +int aa_may_signal(const struct cred *subj_cred, struct aa_label *sender, + const struct cred *target_cred, struct aa_label *target, + int sig) { struct aa_profile *profile; DEFINE_AUDIT_DATA(ad, LSM_AUDIT_DATA_NONE, AA_CLASS_SIGNAL, OP_SIGNAL); @@ -106,6 +110,8 @@ int aa_may_signal(struct aa_label *sender, struct aa_label *target, int sig) ad.signal = map_signal_num(sig); ad.unmappedsig = sig; return xcheck_labels(sender, target, profile, - profile_signal_perm(profile, target, MAY_WRITE, &ad), - profile_signal_perm(profile, sender, MAY_READ, &ad)); + profile_signal_perm(subj_cred, profile, target, + MAY_WRITE, &ad), + profile_signal_perm(target_cred, profile, sender, + MAY_READ, &ad)); } diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 359fbfbb4a66..60f95cc4532a 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -116,15 +116,17 @@ static int apparmor_ptrace_access_check(struct task_struct *child, unsigned int mode) { struct aa_label *tracer, *tracee; + const struct cred *cred; int error; + cred = get_task_cred(child); + tracee = cred_label(cred); /* ref count on cred */ tracer = __begin_current_label_crit_section(); - tracee = aa_get_task_label(child); - error = aa_may_ptrace(tracer, tracee, + error = aa_may_ptrace(current_cred(), tracer, cred, tracee, (mode & PTRACE_MODE_READ) ? AA_PTRACE_READ : AA_PTRACE_TRACE); - aa_put_label(tracee); __end_current_label_crit_section(tracer); + put_cred(cred); return error; } @@ -132,12 +134,15 @@ static int apparmor_ptrace_access_check(struct task_struct *child, static int apparmor_ptrace_traceme(struct task_struct *parent) { struct aa_label *tracer, *tracee; + const struct cred *cred; int error; tracee = __begin_current_label_crit_section(); - tracer = aa_get_task_label(parent); - error = aa_may_ptrace(tracer, tracee, AA_PTRACE_TRACE); - aa_put_label(tracer); + cred = get_task_cred(parent); + tracer = cred_label(cred); /* ref count on cred */ + error = aa_may_ptrace(cred, tracer, current_cred(), tracee, + AA_PTRACE_TRACE); + put_cred(cred); __end_current_label_crit_section(tracee); return error; @@ -188,7 +193,7 @@ static int apparmor_capable(const struct cred *cred, struct user_namespace *ns, label = aa_get_newest_cred_label(cred); if (!unconfined(label)) - error = aa_capable(label, cap, opts); + error = aa_capable(cred, label, cap, opts); aa_put_label(label); return error; @@ -211,7 +216,8 @@ static int common_perm(const char *op, const struct path *path, u32 mask, label = __begin_current_label_crit_section(); if (!unconfined(label)) - error = aa_path_perm(op, label, path, 0, mask, cond); + error = aa_path_perm(op, current_cred(), label, path, 0, mask, + cond); __end_current_label_crit_section(label); return error; @@ -357,7 +363,8 @@ static int apparmor_path_link(struct dentry *old_dentry, const struct path *new_ label = begin_current_label_crit_section(); if (!unconfined(label)) - error = aa_path_link(label, old_dentry, new_dir, new_dentry); + error = aa_path_link(current_cred(), label, old_dentry, new_dir, + new_dentry); end_current_label_crit_section(label); return error; @@ -396,23 +403,27 @@ static int apparmor_path_rename(const struct path *old_dir, struct dentry *old_d vfsuid = i_uid_into_vfsuid(idmap, d_backing_inode(old_dentry)); cond_exchange.uid = vfsuid_into_kuid(vfsuid); - error = aa_path_perm(OP_RENAME_SRC, label, &new_path, 0, + error = aa_path_perm(OP_RENAME_SRC, current_cred(), + label, &new_path, 0, MAY_READ | AA_MAY_GETATTR | MAY_WRITE | AA_MAY_SETATTR | AA_MAY_DELETE, &cond_exchange); if (!error) - error = aa_path_perm(OP_RENAME_DEST, label, &old_path, + error = aa_path_perm(OP_RENAME_DEST, current_cred(), + label, &old_path, 0, MAY_WRITE | AA_MAY_SETATTR | AA_MAY_CREATE, &cond_exchange); } if (!error) - error = aa_path_perm(OP_RENAME_SRC, label, &old_path, 0, + error = aa_path_perm(OP_RENAME_SRC, current_cred(), + label, &old_path, 0, MAY_READ | AA_MAY_GETATTR | MAY_WRITE | AA_MAY_SETATTR | AA_MAY_DELETE, &cond); if (!error) - error = aa_path_perm(OP_RENAME_DEST, label, &new_path, + error = aa_path_perm(OP_RENAME_DEST, current_cred(), + label, &new_path, 0, MAY_WRITE | AA_MAY_SETATTR | AA_MAY_CREATE, &cond); @@ -467,7 +478,8 @@ static int apparmor_file_open(struct file *file) vfsuid = i_uid_into_vfsuid(idmap, inode); cond.uid = vfsuid_into_kuid(vfsuid); - error = aa_path_perm(OP_OPEN, label, &file->f_path, 0, + error = aa_path_perm(OP_OPEN, file->f_cred, + label, &file->f_path, 0, aa_map_file_to_perms(file), &cond); /* todo cache full allowed permissions set and state */ fctx->allow = aa_map_file_to_perms(file); @@ -507,7 +519,7 @@ static int common_file_perm(const char *op, struct file *file, u32 mask, return -EACCES; label = __begin_current_label_crit_section(); - error = aa_file_perm(op, label, file, mask, in_atomic); + error = aa_file_perm(op, current_cred(), label, file, mask, in_atomic); __end_current_label_crit_section(label); return error; @@ -585,17 +597,21 @@ static int apparmor_sb_mount(const char *dev_name, const struct path *path, label = __begin_current_label_crit_section(); if (!unconfined(label)) { if (flags & MS_REMOUNT) - error = aa_remount(label, path, flags, data); + error = aa_remount(current_cred(), label, path, flags, + data); else if (flags & MS_BIND) - error = aa_bind_mount(label, path, dev_name, flags); + error = aa_bind_mount(current_cred(), label, path, + dev_name, flags); else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) - error = aa_mount_change_type(label, path, flags); + error = aa_mount_change_type(current_cred(), label, + path, flags); else if (flags & MS_MOVE) - error = aa_move_mount(label, path, dev_name); + error = aa_move_mount(current_cred(), label, path, + dev_name); else - error = aa_new_mount(label, dev_name, path, type, - flags, data); + error = aa_new_mount(current_cred(), label, dev_name, + path, type, flags, data); } __end_current_label_crit_section(label); @@ -609,7 +625,7 @@ static int apparmor_sb_umount(struct vfsmount *mnt, int flags) label = __begin_current_label_crit_section(); if (!unconfined(label)) - error = aa_umount(label, mnt, flags); + error = aa_umount(current_cred(), label, mnt, flags); __end_current_label_crit_section(label); return error; @@ -623,7 +639,7 @@ static int apparmor_sb_pivotroot(const struct path *old_path, label = aa_get_current_label(); if (!unconfined(label)) - error = aa_pivotroot(label, old_path, new_path); + error = aa_pivotroot(current_cred(), label, old_path, new_path); aa_put_label(label); return error; @@ -785,7 +801,8 @@ static int apparmor_task_setrlimit(struct task_struct *task, int error = 0; if (!unconfined(label)) - error = aa_task_setrlimit(label, task, resource, new_rlim); + error = aa_task_setrlimit(current_cred(), label, task, + resource, new_rlim); __end_current_label_crit_section(label); return error; @@ -794,26 +811,27 @@ static int apparmor_task_setrlimit(struct task_struct *task, static int apparmor_task_kill(struct task_struct *target, struct kernel_siginfo *info, int sig, const struct cred *cred) { + const struct cred *tc; struct aa_label *cl, *tl; int error; + tc = get_task_cred(target); + tl = aa_get_newest_cred_label(tc); if (cred) { /* * Dealing with USB IO specific behavior */ cl = aa_get_newest_cred_label(cred); - tl = aa_get_task_label(target); - error = aa_may_signal(cl, tl, sig); + error = aa_may_signal(cred, cl, tc, tl, sig); aa_put_label(cl); - aa_put_label(tl); return error; + } else { + cl = __begin_current_label_crit_section(); + error = aa_may_signal(current_cred(), cl, tc, tl, sig); + __end_current_label_crit_section(cl); } - - cl = __begin_current_label_crit_section(); - tl = aa_get_task_label(target); - error = aa_may_signal(cl, tl, sig); aa_put_label(tl); - __end_current_label_crit_section(cl); + put_cred(tc); return error; } @@ -879,7 +897,8 @@ static int apparmor_socket_create(int family, int type, int protocol, int kern) if (!(kern || unconfined(label))) error = af_select(family, create_perm(label, family, type, protocol), - aa_af_perm(label, OP_CREATE, AA_MAY_CREATE, + aa_af_perm(current_cred(), label, + OP_CREATE, AA_MAY_CREATE, family, type, protocol)); end_current_label_crit_section(label); diff --git a/security/apparmor/mount.c b/security/apparmor/mount.c index 3830bceff9c8..2bb77aacc49a 100644 --- a/security/apparmor/mount.c +++ b/security/apparmor/mount.c @@ -113,6 +113,7 @@ static void audit_cb(struct audit_buffer *ab, void *va) /** * audit_mount - handle the auditing of mount operations + * @subj_cred: cred of the subject * @profile: the profile being enforced (NOT NULL) * @op: operation being mediated (NOT NULL) * @name: name of object being mediated (MAYBE NULL) @@ -128,7 +129,8 @@ static void audit_cb(struct audit_buffer *ab, void *va) * * Returns: %0 or error on failure */ -static int audit_mount(struct aa_profile *profile, const char *op, +static int audit_mount(const struct cred *subj_cred, + struct aa_profile *profile, const char *op, const char *name, const char *src_name, const char *type, const char *trans, unsigned long flags, const void *data, u32 request, @@ -166,6 +168,7 @@ static int audit_mount(struct aa_profile *profile, const char *op, return error; } + ad.subj_cred = subj_cred; ad.name = name; ad.mnt.src_name = src_name; ad.mnt.type = type; @@ -284,6 +287,7 @@ static int path_flags(struct aa_profile *profile, const struct path *path) /** * match_mnt_path_str - handle path matching for mount + * @subj_cred: cred of confined subject * @profile: the confining profile * @mntpath: for the mntpnt (NOT NULL) * @buffer: buffer to be used to lookup mntpath @@ -296,7 +300,8 @@ static int path_flags(struct aa_profile *profile, const struct path *path) * * Returns: 0 on success else error */ -static int match_mnt_path_str(struct aa_profile *profile, +static int match_mnt_path_str(const struct cred *subj_cred, + struct aa_profile *profile, const struct path *mntpath, char *buffer, const char *devname, const char *type, unsigned long flags, void *data, bool binary, @@ -337,12 +342,14 @@ static int match_mnt_path_str(struct aa_profile *profile, error = 0; audit: - return audit_mount(profile, OP_MOUNT, mntpnt, devname, type, NULL, + return audit_mount(subj_cred, profile, OP_MOUNT, mntpnt, devname, + type, NULL, flags, data, AA_MAY_MOUNT, &perms, info, error); } /** * match_mnt - handle path matching for mount + * @subj_cred: cred of the subject * @profile: the confining profile * @path: for the mntpnt (NOT NULL) * @buffer: buffer to be used to lookup mntpath @@ -355,7 +362,8 @@ static int match_mnt_path_str(struct aa_profile *profile, * * Returns: 0 on success else error */ -static int match_mnt(struct aa_profile *profile, const struct path *path, +static int match_mnt(const struct cred *subj_cred, + struct aa_profile *profile, const struct path *path, char *buffer, const struct path *devpath, char *devbuffer, const char *type, unsigned long flags, void *data, bool binary) @@ -379,11 +387,12 @@ static int match_mnt(struct aa_profile *profile, const struct path *path, devname = ERR_PTR(error); } - return match_mnt_path_str(profile, path, buffer, devname, type, flags, - data, binary, info); + return match_mnt_path_str(subj_cred, profile, path, buffer, devname, + type, flags, data, binary, info); } -int aa_remount(struct aa_label *label, const struct path *path, +int aa_remount(const struct cred *subj_cred, + struct aa_label *label, const struct path *path, unsigned long flags, void *data) { struct aa_profile *profile; @@ -400,14 +409,16 @@ int aa_remount(struct aa_label *label, const struct path *path, if (!buffer) return -ENOMEM; error = fn_for_each_confined(label, profile, - match_mnt(profile, path, buffer, NULL, NULL, NULL, + match_mnt(subj_cred, profile, path, buffer, NULL, + NULL, NULL, flags, data, binary)); aa_put_buffer(buffer); return error; } -int aa_bind_mount(struct aa_label *label, const struct path *path, +int aa_bind_mount(const struct cred *subj_cred, + struct aa_label *label, const struct path *path, const char *dev_name, unsigned long flags) { struct aa_profile *profile; @@ -434,8 +445,8 @@ int aa_bind_mount(struct aa_label *label, const struct path *path, goto out; error = fn_for_each_confined(label, profile, - match_mnt(profile, path, buffer, &old_path, old_buffer, - NULL, flags, NULL, false)); + match_mnt(subj_cred, profile, path, buffer, &old_path, + old_buffer, NULL, flags, NULL, false)); out: aa_put_buffer(buffer); aa_put_buffer(old_buffer); @@ -444,7 +455,8 @@ int aa_bind_mount(struct aa_label *label, const struct path *path, return error; } -int aa_mount_change_type(struct aa_label *label, const struct path *path, +int aa_mount_change_type(const struct cred *subj_cred, + struct aa_label *label, const struct path *path, unsigned long flags) { struct aa_profile *profile; @@ -462,14 +474,16 @@ int aa_mount_change_type(struct aa_label *label, const struct path *path, if (!buffer) return -ENOMEM; error = fn_for_each_confined(label, profile, - match_mnt(profile, path, buffer, NULL, NULL, NULL, + match_mnt(subj_cred, profile, path, buffer, NULL, + NULL, NULL, flags, NULL, false)); aa_put_buffer(buffer); return error; } -int aa_move_mount(struct aa_label *label, const struct path *path, +int aa_move_mount(const struct cred *subj_cred, + struct aa_label *label, const struct path *path, const char *orig_name) { struct aa_profile *profile; @@ -493,7 +507,8 @@ int aa_move_mount(struct aa_label *label, const struct path *path, if (!buffer || !old_buffer) goto out; error = fn_for_each_confined(label, profile, - match_mnt(profile, path, buffer, &old_path, old_buffer, + match_mnt(subj_cred, profile, path, buffer, &old_path, + old_buffer, NULL, MS_MOVE, NULL, false)); out: aa_put_buffer(buffer); @@ -503,9 +518,9 @@ int aa_move_mount(struct aa_label *label, const struct path *path, return error; } -int aa_new_mount(struct aa_label *label, const char *dev_name, - const struct path *path, const char *type, unsigned long flags, - void *data) +int aa_new_mount(const struct cred *subj_cred, struct aa_label *label, + const char *dev_name, const struct path *path, + const char *type, unsigned long flags, void *data) { struct aa_profile *profile; char *buffer = NULL, *dev_buffer = NULL; @@ -550,12 +565,14 @@ int aa_new_mount(struct aa_label *label, const char *dev_name, goto out; } error = fn_for_each_confined(label, profile, - match_mnt(profile, path, buffer, dev_path, dev_buffer, + match_mnt(subj_cred, profile, path, buffer, + dev_path, dev_buffer, type, flags, data, binary)); } else { error = fn_for_each_confined(label, profile, - match_mnt_path_str(profile, path, buffer, dev_name, - type, flags, data, binary, NULL)); + match_mnt_path_str(subj_cred, profile, path, + buffer, dev_name, + type, flags, data, binary, NULL)); } out: @@ -567,7 +584,8 @@ int aa_new_mount(struct aa_label *label, const char *dev_name, return error; } -static int profile_umount(struct aa_profile *profile, const struct path *path, +static int profile_umount(const struct cred *subj_cred, + struct aa_profile *profile, const struct path *path, char *buffer) { struct aa_ruleset *rules = list_first_entry(&profile->rules, @@ -596,11 +614,13 @@ static int profile_umount(struct aa_profile *profile, const struct path *path, error = -EACCES; audit: - return audit_mount(profile, OP_UMOUNT, name, NULL, NULL, NULL, 0, NULL, + return audit_mount(subj_cred, profile, OP_UMOUNT, name, NULL, NULL, + NULL, 0, NULL, AA_MAY_UMOUNT, &perms, info, error); } -int aa_umount(struct aa_label *label, struct vfsmount *mnt, int flags) +int aa_umount(const struct cred *subj_cred, struct aa_label *label, + struct vfsmount *mnt, int flags) { struct aa_profile *profile; char *buffer = NULL; @@ -615,7 +635,7 @@ int aa_umount(struct aa_label *label, struct vfsmount *mnt, int flags) return -ENOMEM; error = fn_for_each_confined(label, profile, - profile_umount(profile, &path, buffer)); + profile_umount(subj_cred, profile, &path, buffer)); aa_put_buffer(buffer); return error; @@ -625,7 +645,8 @@ int aa_umount(struct aa_label *label, struct vfsmount *mnt, int flags) * * Returns: label for transition or ERR_PTR. Does not return NULL */ -static struct aa_label *build_pivotroot(struct aa_profile *profile, +static struct aa_label *build_pivotroot(const struct cred *subj_cred, + struct aa_profile *profile, const struct path *new_path, char *new_buffer, const struct path *old_path, @@ -670,7 +691,8 @@ static struct aa_label *build_pivotroot(struct aa_profile *profile, error = 0; audit: - error = audit_mount(profile, OP_PIVOTROOT, new_name, old_name, + error = audit_mount(subj_cred, profile, OP_PIVOTROOT, new_name, + old_name, NULL, trans_name, 0, NULL, AA_MAY_PIVOTROOT, &perms, info, error); if (error) @@ -679,7 +701,8 @@ static struct aa_label *build_pivotroot(struct aa_profile *profile, return aa_get_newest_label(&profile->label); } -int aa_pivotroot(struct aa_label *label, const struct path *old_path, +int aa_pivotroot(const struct cred *subj_cred, struct aa_label *label, + const struct path *old_path, const struct path *new_path) { struct aa_profile *profile; @@ -697,7 +720,8 @@ int aa_pivotroot(struct aa_label *label, const struct path *old_path, if (!old_buffer || !new_buffer) goto out; target = fn_label_build(label, profile, GFP_KERNEL, - build_pivotroot(profile, new_path, new_buffer, + build_pivotroot(subj_cred, profile, new_path, + new_buffer, old_path, old_buffer)); if (!target) { info = "label build failed"; @@ -723,7 +747,8 @@ int aa_pivotroot(struct aa_label *label, const struct path *old_path, fail: /* TODO: add back in auditing of new_name and old_name */ error = fn_for_each(label, profile, - audit_mount(profile, OP_PIVOTROOT, NULL /*new_name */, + audit_mount(subj_cred, profile, OP_PIVOTROOT, + NULL /*new_name */, NULL /* old_name */, NULL, NULL, 0, NULL, AA_MAY_PIVOTROOT, &nullperms, info, diff --git a/security/apparmor/net.c b/security/apparmor/net.c index 5e50f80e35db..704c171232ab 100644 --- a/security/apparmor/net.c +++ b/security/apparmor/net.c @@ -135,8 +135,8 @@ int aa_profile_af_perm(struct aa_profile *profile, return aa_check_perms(profile, &perms, request, ad, audit_net_cb); } -int aa_af_perm(struct aa_label *label, const char *op, u32 request, u16 family, - int type, int protocol) +int aa_af_perm(const struct cred *subj_cred, struct aa_label *label, + const char *op, u32 request, u16 family, int type, int protocol) { struct aa_profile *profile; DEFINE_AUDIT_NET(ad, op, NULL, family, type, protocol); @@ -146,7 +146,9 @@ int aa_af_perm(struct aa_label *label, const char *op, u32 request, u16 family, type)); } -static int aa_label_sk_perm(struct aa_label *label, const char *op, u32 request, +static int aa_label_sk_perm(const struct cred *subj_cred, + struct aa_label *label, + const char *op, u32 request, struct sock *sk) { struct aa_sk_ctx *ctx = SK_CTX(sk); @@ -159,6 +161,7 @@ static int aa_label_sk_perm(struct aa_label *label, const char *op, u32 request, struct aa_profile *profile; DEFINE_AUDIT_SK(ad, op, sk); + ad.subj_cred = subj_cred; error = fn_for_each_confined(label, profile, aa_profile_af_sk_perm(profile, &ad, request, sk)); } @@ -176,21 +179,21 @@ int aa_sk_perm(const char *op, u32 request, struct sock *sk) /* TODO: switch to begin_current_label ???? */ label = begin_current_label_crit_section(); - error = aa_label_sk_perm(label, op, request, sk); + error = aa_label_sk_perm(current_cred(), label, op, request, sk); end_current_label_crit_section(label); return error; } -int aa_sock_file_perm(struct aa_label *label, const char *op, u32 request, - struct socket *sock) +int aa_sock_file_perm(const struct cred *subj_cred, struct aa_label *label, + const char *op, u32 request, struct socket *sock) { AA_BUG(!label); AA_BUG(!sock); AA_BUG(!sock->sk); - return aa_label_sk_perm(label, op, request, sock->sk); + return aa_label_sk_perm(subj_cred, label, op, request, sock->sk); } #ifdef CONFIG_NETWORK_SECMARK diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index e5f1ef83b0fd..8a07793ce103 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -762,21 +762,23 @@ static int audit_policy(struct aa_label *subj_label, const char *op, /* don't call out to other LSMs in the stack for apparmor policy admin * permissions */ -static int policy_ns_capable(struct aa_label *label, +static int policy_ns_capable(const struct cred *subj_cred, + struct aa_label *label, struct user_namespace *userns, int cap) { int err; /* check for MAC_ADMIN cap in cred */ - err = cap_capable(current_cred(), userns, cap, CAP_OPT_NONE); + err = cap_capable(subj_cred, userns, cap, CAP_OPT_NONE); if (!err) - err = aa_capable(label, cap, CAP_OPT_NONE); + err = aa_capable(subj_cred, label, cap, CAP_OPT_NONE); return err; } /** * aa_policy_view_capable - check if viewing policy in at @ns is allowed + * @subj_cred: cred of subject * @label: label that is trying to view policy in ns * @ns: namespace being viewed by @label (may be NULL if @label's ns) * @@ -785,9 +787,10 @@ static int policy_ns_capable(struct aa_label *label, * If @ns is NULL then the namespace being viewed is assumed to be the * tasks current namespace. */ -bool aa_policy_view_capable(struct aa_label *label, struct aa_ns *ns) +bool aa_policy_view_capable(const struct cred *subj_cred, + struct aa_label *label, struct aa_ns *ns) { - struct user_namespace *user_ns = current_user_ns(); + struct user_namespace *user_ns = subj_cred->user_ns; struct aa_ns *view_ns = labels_view(label); bool root_in_user_ns = uid_eq(current_euid(), make_kuid(user_ns, 0)) || in_egroup_p(make_kgid(user_ns, 0)); @@ -804,15 +807,17 @@ bool aa_policy_view_capable(struct aa_label *label, struct aa_ns *ns) return response; } -bool aa_policy_admin_capable(struct aa_label *label, struct aa_ns *ns) +bool aa_policy_admin_capable(const struct cred *subj_cred, + struct aa_label *label, struct aa_ns *ns) { - struct user_namespace *user_ns = current_user_ns(); - bool capable = policy_ns_capable(label, user_ns, CAP_MAC_ADMIN) == 0; + struct user_namespace *user_ns = subj_cred->user_ns; + bool capable = policy_ns_capable(subj_cred, label, user_ns, + CAP_MAC_ADMIN) == 0; AA_DEBUG("cap_mac_admin? %d\n", capable); AA_DEBUG("policy locked? %d\n", aa_g_lock_policy); - return aa_policy_view_capable(label, ns) && capable && + return aa_policy_view_capable(subj_cred, label, ns) && capable && !aa_g_lock_policy; } @@ -822,7 +827,7 @@ bool aa_current_policy_view_capable(struct aa_ns *ns) bool res; label = __begin_current_label_crit_section(); - res = aa_policy_view_capable(label, ns); + res = aa_policy_view_capable(current_cred(), label, ns); __end_current_label_crit_section(label); return res; @@ -834,7 +839,7 @@ bool aa_current_policy_admin_capable(struct aa_ns *ns) bool res; label = __begin_current_label_crit_section(); - res = aa_policy_admin_capable(label, ns); + res = aa_policy_admin_capable(current_cred(), label, ns); __end_current_label_crit_section(label); return res; @@ -842,13 +847,15 @@ bool aa_current_policy_admin_capable(struct aa_ns *ns) /** * aa_may_manage_policy - can the current task manage policy + * @subj_cred; subjects cred * @label: label to check if it can manage policy * @ns: namespace being managed by @label (may be NULL if @label's ns) * @mask: contains the policy manipulation operation being done * * Returns: 0 if the task is allowed to manipulate policy else error */ -int aa_may_manage_policy(struct aa_label *label, struct aa_ns *ns, u32 mask) +int aa_may_manage_policy(const struct cred *subj_cred, struct aa_label *label, + struct aa_ns *ns, u32 mask) { const char *op; @@ -864,7 +871,7 @@ int aa_may_manage_policy(struct aa_label *label, struct aa_ns *ns, u32 mask) return audit_policy(label, op, NULL, NULL, "policy_locked", -EACCES); - if (!aa_policy_admin_capable(label, ns)) + if (!aa_policy_admin_capable(subj_cred, label, ns)) return audit_policy(label, op, NULL, NULL, "not policy admin", -EACCES); diff --git a/security/apparmor/resource.c b/security/apparmor/resource.c index 73ba26c646a5..dcc94c3153d5 100644 --- a/security/apparmor/resource.c +++ b/security/apparmor/resource.c @@ -43,6 +43,7 @@ static void audit_cb(struct audit_buffer *ab, void *va) /** * audit_resource - audit setting resource limit + * @subj_cred: cred setting the resource * @profile: profile being enforced (NOT NULL) * @resource: rlimit being auditing * @value: value being set @@ -52,13 +53,15 @@ static void audit_cb(struct audit_buffer *ab, void *va) * * Returns: 0 or ad->error else other error code on failure */ -static int audit_resource(struct aa_profile *profile, unsigned int resource, +static int audit_resource(const struct cred *subj_cred, + struct aa_profile *profile, unsigned int resource, unsigned long value, struct aa_label *peer, const char *info, int error) { DEFINE_AUDIT_DATA(ad, LSM_AUDIT_DATA_NONE, AA_CLASS_RLIMITS, OP_SETRLIMIT); + ad.subj_cred = subj_cred; ad.rlim.rlim = resource; ad.rlim.max = value; ad.peer = peer; @@ -82,7 +85,8 @@ int aa_map_resource(int resource) return rlim_map[resource]; } -static int profile_setrlimit(struct aa_profile *profile, unsigned int resource, +static int profile_setrlimit(const struct cred *subj_cred, + struct aa_profile *profile, unsigned int resource, struct rlimit *new_rlim) { struct aa_ruleset *rules = list_first_entry(&profile->rules, @@ -92,12 +96,13 @@ static int profile_setrlimit(struct aa_profile *profile, unsigned int resource, if (rules->rlimits.mask & (1 << resource) && new_rlim->rlim_max > rules->rlimits.limits[resource].rlim_max) e = -EACCES; - return audit_resource(profile, resource, new_rlim->rlim_max, NULL, NULL, - e); + return audit_resource(subj_cred, profile, resource, new_rlim->rlim_max, + NULL, NULL, e); } /** * aa_task_setrlimit - test permission to set an rlimit + * @subj_cred: cred setting the limit * @label: label confining the task (NOT NULL) * @task: task the resource is being set on * @resource: the resource being set @@ -107,7 +112,8 @@ static int profile_setrlimit(struct aa_profile *profile, unsigned int resource, * * Returns: 0 or error code if setting resource failed */ -int aa_task_setrlimit(struct aa_label *label, struct task_struct *task, +int aa_task_setrlimit(const struct cred *subj_cred, struct aa_label *label, + struct task_struct *task, unsigned int resource, struct rlimit *new_rlim) { struct aa_profile *profile; @@ -126,14 +132,15 @@ int aa_task_setrlimit(struct aa_label *label, struct task_struct *task, */ if (label != peer && - aa_capable(label, CAP_SYS_RESOURCE, CAP_OPT_NOAUDIT) != 0) + aa_capable(subj_cred, label, CAP_SYS_RESOURCE, CAP_OPT_NOAUDIT) != 0) error = fn_for_each(label, profile, - audit_resource(profile, resource, + audit_resource(subj_cred, profile, resource, new_rlim->rlim_max, peer, "cap_sys_resource", -EACCES)); else error = fn_for_each_confined(label, profile, - profile_setrlimit(profile, resource, new_rlim)); + profile_setrlimit(subj_cred, profile, resource, + new_rlim)); aa_put_label(peer); return error; diff --git a/security/apparmor/task.c b/security/apparmor/task.c index 79850e832142..0d7af707cccd 100644 --- a/security/apparmor/task.c +++ b/security/apparmor/task.c @@ -226,14 +226,16 @@ static void audit_ptrace_cb(struct audit_buffer *ab, void *va) /* assumes check for RULE_MEDIATES is already done */ /* TODO: conditionals */ -static int profile_ptrace_perm(struct aa_profile *profile, - struct aa_label *peer, u32 request, - struct apparmor_audit_data *ad) +static int profile_ptrace_perm(const struct cred *cred, + struct aa_profile *profile, + struct aa_label *peer, u32 request, + struct apparmor_audit_data *ad) { struct aa_ruleset *rules = list_first_entry(&profile->rules, typeof(*rules), list); struct aa_perms perms = { }; + ad->subj_cred = cred; ad->peer = peer; aa_profile_match_label(profile, rules, peer, AA_CLASS_PTRACE, request, &perms); @@ -241,7 +243,8 @@ static int profile_ptrace_perm(struct aa_profile *profile, return aa_check_perms(profile, &perms, request, ad, audit_ptrace_cb); } -static int profile_tracee_perm(struct aa_profile *tracee, +static int profile_tracee_perm(const struct cred *cred, + struct aa_profile *tracee, struct aa_label *tracer, u32 request, struct apparmor_audit_data *ad) { @@ -249,10 +252,11 @@ static int profile_tracee_perm(struct aa_profile *tracee, !ANY_RULE_MEDIATES(&tracee->rules, AA_CLASS_PTRACE)) return 0; - return profile_ptrace_perm(tracee, tracer, request, ad); + return profile_ptrace_perm(cred, tracee, tracer, request, ad); } -static int profile_tracer_perm(struct aa_profile *tracer, +static int profile_tracer_perm(const struct cred *cred, + struct aa_profile *tracer, struct aa_label *tracee, u32 request, struct apparmor_audit_data *ad) { @@ -260,7 +264,7 @@ static int profile_tracer_perm(struct aa_profile *tracer, return 0; if (ANY_RULE_MEDIATES(&tracer->rules, AA_CLASS_PTRACE)) - return profile_ptrace_perm(tracer, tracee, request, ad); + return profile_ptrace_perm(cred, tracer, tracee, request, ad); /* profile uses the old style capability check for ptrace */ if (&tracer->label == tracee) @@ -269,8 +273,8 @@ static int profile_tracer_perm(struct aa_profile *tracer, ad->subj_label = &tracer->label; ad->peer = tracee; ad->request = 0; - ad->error = aa_capable(&tracer->label, CAP_SYS_PTRACE, - CAP_OPT_NONE); + ad->error = aa_capable(cred, &tracer->label, CAP_SYS_PTRACE, + CAP_OPT_NONE); return aa_audit(AUDIT_APPARMOR_AUTO, tracer, ad, audit_ptrace_cb); } @@ -283,7 +287,8 @@ static int profile_tracer_perm(struct aa_profile *tracer, * * Returns: %0 else error code if permission denied or error */ -int aa_may_ptrace(struct aa_label *tracer, struct aa_label *tracee, +int aa_may_ptrace(const struct cred *tracer_cred, struct aa_label *tracer, + const struct cred *tracee_cred, struct aa_label *tracee, u32 request) { struct aa_profile *profile; @@ -291,6 +296,8 @@ int aa_may_ptrace(struct aa_label *tracer, struct aa_label *tracee, DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, AA_CLASS_PTRACE, OP_PTRACE); return xcheck_labels(tracer, tracee, profile, - profile_tracer_perm(profile, tracee, request, &sa), - profile_tracee_perm(profile, tracer, xrequest, &sa)); + profile_tracer_perm(tracer_cred, profile, tracee, + request, &sa), + profile_tracee_perm(tracee_cred, profile, tracer, + xrequest, &sa)); } From 96af45154a0be30485ad07f70f852b1456cb13d7 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Sun, 10 Sep 2023 03:35:22 -0700 Subject: [PATCH 0397/1397] apparmor: Fix regression in mount mediation [ Upstream commit 157a3537d6bc28ceb9a11fc8cb67f2152d860146 ] commit 2db154b3ea8e ("vfs: syscall: Add move_mount(2) to move mounts around") introduced a new move_mount(2) system call and a corresponding new LSM security_move_mount hook but did not implement this hook for any existing LSM. This creates a regression for AppArmor mediation of mount. This patch provides a base mapping of the move_mount syscall to the existing mount mediation. In the future we may introduce additional mediations around the new mount calls. Fixes: 2db154b3ea8e ("vfs: syscall: Add move_mount(2) to move mounts around") CC: stable@vger.kernel.org Reported-by: Andreas Steinmetz Signed-off-by: John Johansen Signed-off-by: Sasha Levin --- security/apparmor/include/mount.h | 7 +++-- security/apparmor/lsm.c | 20 ++++++++++++-- security/apparmor/mount.c | 46 +++++++++++++++++++------------ 3 files changed, 51 insertions(+), 22 deletions(-) diff --git a/security/apparmor/include/mount.h b/security/apparmor/include/mount.h index 10c76f906a65..46834f828179 100644 --- a/security/apparmor/include/mount.h +++ b/security/apparmor/include/mount.h @@ -38,9 +38,12 @@ int aa_mount_change_type(const struct cred *subj_cred, struct aa_label *label, const struct path *path, unsigned long flags); +int aa_move_mount_old(const struct cred *subj_cred, + struct aa_label *label, const struct path *path, + const char *old_name); int aa_move_mount(const struct cred *subj_cred, - struct aa_label *label, const struct path *path, - const char *old_name); + struct aa_label *label, const struct path *from_path, + const struct path *to_path); int aa_new_mount(const struct cred *subj_cred, struct aa_label *label, const char *dev_name, diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 60f95cc4532a..6fdab1b5ede5 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -607,8 +607,8 @@ static int apparmor_sb_mount(const char *dev_name, const struct path *path, error = aa_mount_change_type(current_cred(), label, path, flags); else if (flags & MS_MOVE) - error = aa_move_mount(current_cred(), label, path, - dev_name); + error = aa_move_mount_old(current_cred(), label, path, + dev_name); else error = aa_new_mount(current_cred(), label, dev_name, path, type, flags, data); @@ -618,6 +618,21 @@ static int apparmor_sb_mount(const char *dev_name, const struct path *path, return error; } +static int apparmor_move_mount(const struct path *from_path, + const struct path *to_path) +{ + struct aa_label *label; + int error = 0; + + label = __begin_current_label_crit_section(); + if (!unconfined(label)) + error = aa_move_mount(current_cred(), label, from_path, + to_path); + __end_current_label_crit_section(label); + + return error; +} + static int apparmor_sb_umount(struct vfsmount *mnt, int flags) { struct aa_label *label; @@ -1240,6 +1255,7 @@ static struct security_hook_list apparmor_hooks[] __ro_after_init = { LSM_HOOK_INIT(capget, apparmor_capget), LSM_HOOK_INIT(capable, apparmor_capable), + LSM_HOOK_INIT(move_mount, apparmor_move_mount), LSM_HOOK_INIT(sb_mount, apparmor_sb_mount), LSM_HOOK_INIT(sb_umount, apparmor_sb_umount), LSM_HOOK_INIT(sb_pivotroot, apparmor_sb_pivotroot), diff --git a/security/apparmor/mount.c b/security/apparmor/mount.c index 2bb77aacc49a..f2a114e54007 100644 --- a/security/apparmor/mount.c +++ b/security/apparmor/mount.c @@ -483,36 +483,46 @@ int aa_mount_change_type(const struct cred *subj_cred, } int aa_move_mount(const struct cred *subj_cred, - struct aa_label *label, const struct path *path, - const char *orig_name) + struct aa_label *label, const struct path *from_path, + const struct path *to_path) { struct aa_profile *profile; - char *buffer = NULL, *old_buffer = NULL; - struct path old_path; + char *to_buffer = NULL, *from_buffer = NULL; int error; AA_BUG(!label); - AA_BUG(!path); + AA_BUG(!from_path); + AA_BUG(!to_path); + + to_buffer = aa_get_buffer(false); + from_buffer = aa_get_buffer(false); + error = -ENOMEM; + if (!to_buffer || !from_buffer) + goto out; + error = fn_for_each_confined(label, profile, + match_mnt(subj_cred, profile, to_path, to_buffer, + from_path, from_buffer, + NULL, MS_MOVE, NULL, false)); +out: + aa_put_buffer(to_buffer); + aa_put_buffer(from_buffer); + + return error; +} + +int aa_move_mount_old(const struct cred *subj_cred, struct aa_label *label, + const struct path *path, const char *orig_name) +{ + struct path old_path; + int error; if (!orig_name || !*orig_name) return -EINVAL; - error = kern_path(orig_name, LOOKUP_FOLLOW, &old_path); if (error) return error; - buffer = aa_get_buffer(false); - old_buffer = aa_get_buffer(false); - error = -ENOMEM; - if (!buffer || !old_buffer) - goto out; - error = fn_for_each_confined(label, profile, - match_mnt(subj_cred, profile, path, buffer, &old_path, - old_buffer, - NULL, MS_MOVE, NULL, false)); -out: - aa_put_buffer(buffer); - aa_put_buffer(old_buffer); + error = aa_move_mount(subj_cred, label, &old_path, path); path_put(&old_path); return error; From 03ec9c28817d97be94cc68084e844a69b2529ea6 Mon Sep 17 00:00:00 2001 From: Masum Reza Date: Sun, 24 Sep 2023 16:46:55 +0530 Subject: [PATCH 0398/1397] Bluetooth: btusb: Add RTW8852BE device 13d3:3570 to device tables [ Upstream commit 02be109d3a405dbc4d53fb4b4473d7a113548088 ] This device is used in TP-Link TX20E WiFi+Bluetooth adapter. Relevant information in /sys/kernel/debug/usb/devices about the Bluetooth device is listed as the below. T: Bus=01 Lev=01 Prnt=01 Port=08 Cnt=01 Dev#= 2 Spd=12 MxCh= 0 D: Ver= 1.00 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=13d3 ProdID=3570 Rev= 0.00 S: Manufacturer=Realtek S: Product=Bluetooth Radio S: SerialNumber=00e04c000001 C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=1ms E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms Signed-off-by: Masum Reza Signed-off-by: Luiz Augusto von Dentz Stable-dep-of: da06ff1f585e ("Bluetooth: btusb: Add 0bda:b85b for Fn-Link RTL8852BE") Signed-off-by: Sasha Levin --- drivers/bluetooth/btusb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 63ebd5677d26..e3f11ea2a9fc 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -543,6 +543,8 @@ static const struct usb_device_id quirks_table[] = { BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0bda, 0x887b), .driver_info = BTUSB_REALTEK | BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x13d3, 0x3570), .driver_info = BTUSB_REALTEK | + BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x13d3, 0x3571), .driver_info = BTUSB_REALTEK | BTUSB_WIDEBAND_SPEECH }, From ac6d4cd9feeb7ad12f4ce672ac0bcb100ac70dfd Mon Sep 17 00:00:00 2001 From: Guan Wentao Date: Thu, 12 Oct 2023 19:21:17 +0800 Subject: [PATCH 0399/1397] Bluetooth: btusb: Add 0bda:b85b for Fn-Link RTL8852BE [ Upstream commit da06ff1f585ea784c79f80e7fab0e0c4ebb49c1c ] Add PID/VID 0bda:b85b for Realtek RTL8852BE USB bluetooth part. The PID/VID was reported by the patch last year. [1] Some SBCs like rockpi 5B A8 module contains the device. And it`s founded in website. [2] [3] Here is the device tables in /sys/kernel/debug/usb/devices . T: Bus=07 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#= 2 Spd=12 MxCh= 0 D: Ver= 1.00 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0bda ProdID=b85b Rev= 0.00 S: Manufacturer=Realtek S: Product=Bluetooth Radio S: SerialNumber=00e04c000001 C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=1ms E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms Link: https://lore.kernel.org/all/20220420052402.19049-1-tangmeng@uniontech.com/ [1] Link: https://forum.radxa.com/t/bluetooth-on-ubuntu/13051/4 [2] Link: https://ubuntuforums.org/showthread.php?t=2489527 [3] Cc: stable@vger.kernel.org Signed-off-by: Meng Tang Signed-off-by: Guan Wentao Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- drivers/bluetooth/btusb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index e3f11ea2a9fc..66080fae072f 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -543,6 +543,8 @@ static const struct usb_device_id quirks_table[] = { BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0bda, 0x887b), .driver_info = BTUSB_REALTEK | BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x0bda, 0xb85b), .driver_info = BTUSB_REALTEK | + BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x13d3, 0x3570), .driver_info = BTUSB_REALTEK | BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x13d3, 0x3571), .driver_info = BTUSB_REALTEK | From 54d72f6ab73e7a0d75c2a80f2906958112b7be40 Mon Sep 17 00:00:00 2001 From: Muhammad Ahmed Date: Mon, 18 Sep 2023 16:52:54 -0400 Subject: [PATCH 0400/1397] drm/amd/display: enable dsc_clk even if dsc_pg disabled [ Upstream commit 40255df370e94d44f0f0a924400d68db0ee31bec ] [why] need to enable dsc_clk regardless dsc_pg Reviewed-by: Charlene Liu Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Acked-by: Aurabindo Pillai Signed-off-by: Muhammad Ahmed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/core/dc.c | 8 ++++---- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c | 3 +++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 38abbd0c9d99..186936ad283a 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1843,7 +1843,7 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c if (dc->hwss.subvp_pipe_control_lock) dc->hwss.subvp_pipe_control_lock(dc, context, true, true, NULL, subvp_prev_use); - if (dc->debug.enable_double_buffered_dsc_pg_support) + if (dc->hwss.update_dsc_pg) dc->hwss.update_dsc_pg(dc, context, false); disable_dangling_plane(dc, context); @@ -1950,7 +1950,7 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c dc->hwss.optimize_bandwidth(dc, context); } - if (dc->debug.enable_double_buffered_dsc_pg_support) + if (dc->hwss.update_dsc_pg) dc->hwss.update_dsc_pg(dc, context, true); if (dc->ctx->dce_version >= DCE_VERSION_MAX) @@ -2197,7 +2197,7 @@ void dc_post_update_surfaces_to_stream(struct dc *dc) dc->hwss.optimize_bandwidth(dc, context); - if (dc->debug.enable_double_buffered_dsc_pg_support) + if (dc->hwss.update_dsc_pg) dc->hwss.update_dsc_pg(dc, context, true); } @@ -3533,7 +3533,7 @@ static void commit_planes_for_stream(struct dc *dc, if (get_seamless_boot_stream_count(context) == 0) dc->hwss.prepare_bandwidth(dc, context); - if (dc->debug.enable_double_buffered_dsc_pg_support) + if (dc->hwss.update_dsc_pg) dc->hwss.update_dsc_pg(dc, context, false); context_clock_trace(dc, context); diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c index be59e1c02f8a..c9140b50c345 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c @@ -77,6 +77,9 @@ void dcn32_dsc_pg_control( if (hws->ctx->dc->debug.disable_dsc_power_gate) return; + if (!hws->ctx->dc->debug.enable_double_buffered_dsc_pg_support) + return; + REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl); if (org_ip_request_cntl == 0) REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1); From 1443ec850448d98005ffbd1539dceced1a754c42 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 26 Jul 2023 13:57:03 -0700 Subject: [PATCH 0401/1397] torture: Make torture_hrtimeout_ns() take an hrtimer mode parameter [ Upstream commit a741deac787f0d2d7068638c067db20af9e63752 ] The current torture-test sleeps are waiting for a duration, but there are situations where it is better to wait for an absolute time, for example, when ending a stutter interval. This commit therefore adds an hrtimer mode parameter to torture_hrtimeout_ns(). Why not also the other torture_hrtimeout_*() functions? The theory is that most absolute times will be in nanoseconds, especially not (say) jiffies. Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Stable-dep-of: cca42bd8eb1b ("rcutorture: Fix stuttering races and other issues") Signed-off-by: Sasha Levin --- include/linux/torture.h | 3 ++- kernel/torture.c | 13 +++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/include/linux/torture.h b/include/linux/torture.h index bb466eec01e4..017f0f710815 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -81,7 +81,8 @@ static inline void torture_random_init(struct torture_random_state *trsp) } /* Definitions for high-resolution-timer sleeps. */ -int torture_hrtimeout_ns(ktime_t baset_ns, u32 fuzzt_ns, struct torture_random_state *trsp); +int torture_hrtimeout_ns(ktime_t baset_ns, u32 fuzzt_ns, const enum hrtimer_mode mode, + struct torture_random_state *trsp); int torture_hrtimeout_us(u32 baset_us, u32 fuzzt_ns, struct torture_random_state *trsp); int torture_hrtimeout_ms(u32 baset_ms, u32 fuzzt_us, struct torture_random_state *trsp); int torture_hrtimeout_jiffies(u32 baset_j, struct torture_random_state *trsp); diff --git a/kernel/torture.c b/kernel/torture.c index b28b05bbef02..e851b8e9390b 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -87,14 +87,15 @@ EXPORT_SYMBOL_GPL(verbose_torout_sleep); * nanosecond random fuzz. This function and its friends desynchronize * testing from the timer wheel. */ -int torture_hrtimeout_ns(ktime_t baset_ns, u32 fuzzt_ns, struct torture_random_state *trsp) +int torture_hrtimeout_ns(ktime_t baset_ns, u32 fuzzt_ns, const enum hrtimer_mode mode, + struct torture_random_state *trsp) { ktime_t hto = baset_ns; if (trsp) hto += torture_random(trsp) % fuzzt_ns; set_current_state(TASK_IDLE); - return schedule_hrtimeout(&hto, HRTIMER_MODE_REL); + return schedule_hrtimeout(&hto, mode); } EXPORT_SYMBOL_GPL(torture_hrtimeout_ns); @@ -106,7 +107,7 @@ int torture_hrtimeout_us(u32 baset_us, u32 fuzzt_ns, struct torture_random_state { ktime_t baset_ns = baset_us * NSEC_PER_USEC; - return torture_hrtimeout_ns(baset_ns, fuzzt_ns, trsp); + return torture_hrtimeout_ns(baset_ns, fuzzt_ns, HRTIMER_MODE_REL, trsp); } EXPORT_SYMBOL_GPL(torture_hrtimeout_us); @@ -123,7 +124,7 @@ int torture_hrtimeout_ms(u32 baset_ms, u32 fuzzt_us, struct torture_random_state fuzzt_ns = (u32)~0U; else fuzzt_ns = fuzzt_us * NSEC_PER_USEC; - return torture_hrtimeout_ns(baset_ns, fuzzt_ns, trsp); + return torture_hrtimeout_ns(baset_ns, fuzzt_ns, HRTIMER_MODE_REL, trsp); } EXPORT_SYMBOL_GPL(torture_hrtimeout_ms); @@ -136,7 +137,7 @@ int torture_hrtimeout_jiffies(u32 baset_j, struct torture_random_state *trsp) { ktime_t baset_ns = jiffies_to_nsecs(baset_j); - return torture_hrtimeout_ns(baset_ns, jiffies_to_nsecs(1), trsp); + return torture_hrtimeout_ns(baset_ns, jiffies_to_nsecs(1), HRTIMER_MODE_REL, trsp); } EXPORT_SYMBOL_GPL(torture_hrtimeout_jiffies); @@ -153,7 +154,7 @@ int torture_hrtimeout_s(u32 baset_s, u32 fuzzt_ms, struct torture_random_state * fuzzt_ns = (u32)~0U; else fuzzt_ns = fuzzt_ms * NSEC_PER_MSEC; - return torture_hrtimeout_ns(baset_ns, fuzzt_ns, trsp); + return torture_hrtimeout_ns(baset_ns, fuzzt_ns, HRTIMER_MODE_REL, trsp); } EXPORT_SYMBOL_GPL(torture_hrtimeout_s); From 5268d95c06152dc73e673de284d128c76e9ce895 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Sat, 29 Jul 2023 14:27:31 +0000 Subject: [PATCH 0402/1397] rcutorture: Fix stuttering races and other issues [ Upstream commit cca42bd8eb1b54a4c9bbf48c79d120e66619a3e4 ] The stuttering code isn't functioning as expected. Ideally, it should pause the torture threads for a designated period before resuming. Yet, it fails to halt the test for the correct duration. Additionally, a race condition exists, potentially causing the stuttering code to pause for an extended period if the 'spt' variable is non-zero due to the stutter orchestration thread's inadequate CPU time. Moreover, over-stuttering can hinder RCU's progress on TREE07 kernels. This happens as the stuttering code may run within a softirq due to RCU callbacks. Consequently, ksoftirqd keeps a CPU busy for several seconds, thus obstructing RCU's progress. This situation triggers a warning message in the logs: [ 2169.481783] rcu_torture_writer: rtort_pipe_count: 9 This warning suggests that an RCU torture object, although invisible to RCU readers, couldn't make it past the pipe array and be freed -- a strong indication that there weren't enough grace periods during the stutter interval. To address these issues, this patch sets the "stutter end" time to an absolute point in the future set by the main stutter thread. This is then used for waiting in stutter_wait(). While the stutter thread still defines this absolute time, the waiters' waiting logic doesn't rely on the stutter thread receiving sufficient CPU time to halt the stuttering as the halting is now self-controlled. Cc: stable@vger.kernel.org Signed-off-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Signed-off-by: Sasha Levin --- kernel/torture.c | 45 ++++++++++++--------------------------------- 1 file changed, 12 insertions(+), 33 deletions(-) diff --git a/kernel/torture.c b/kernel/torture.c index e851b8e9390b..c7b475883b9a 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -721,7 +721,7 @@ static void torture_shutdown_cleanup(void) * suddenly applied to or removed from the system. */ static struct task_struct *stutter_task; -static int stutter_pause_test; +static ktime_t stutter_till_abs_time; static int stutter; static int stutter_gap; @@ -731,30 +731,16 @@ static int stutter_gap; */ bool stutter_wait(const char *title) { - unsigned int i = 0; bool ret = false; - int spt; + ktime_t till_ns; cond_resched_tasks_rcu_qs(); - spt = READ_ONCE(stutter_pause_test); - for (; spt; spt = READ_ONCE(stutter_pause_test)) { - if (!ret && !rt_task(current)) { - sched_set_normal(current, MAX_NICE); - ret = true; - } - if (spt == 1) { - torture_hrtimeout_jiffies(1, NULL); - } else if (spt == 2) { - while (READ_ONCE(stutter_pause_test)) { - if (!(i++ & 0xffff)) - torture_hrtimeout_us(10, 0, NULL); - cond_resched(); - } - } else { - torture_hrtimeout_jiffies(round_jiffies_relative(HZ), NULL); - } - torture_shutdown_absorb(title); + till_ns = READ_ONCE(stutter_till_abs_time); + if (till_ns && ktime_before(ktime_get(), till_ns)) { + torture_hrtimeout_ns(till_ns, 0, HRTIMER_MODE_ABS, NULL); + ret = true; } + torture_shutdown_absorb(title); return ret; } EXPORT_SYMBOL_GPL(stutter_wait); @@ -765,23 +751,16 @@ EXPORT_SYMBOL_GPL(stutter_wait); */ static int torture_stutter(void *arg) { - DEFINE_TORTURE_RANDOM(rand); - int wtime; + ktime_t till_ns; VERBOSE_TOROUT_STRING("torture_stutter task started"); do { if (!torture_must_stop() && stutter > 1) { - wtime = stutter; - if (stutter > 2) { - WRITE_ONCE(stutter_pause_test, 1); - wtime = stutter - 3; - torture_hrtimeout_jiffies(wtime, &rand); - wtime = 2; - } - WRITE_ONCE(stutter_pause_test, 2); - torture_hrtimeout_jiffies(wtime, NULL); + till_ns = ktime_add_ns(ktime_get(), + jiffies_to_nsecs(stutter)); + WRITE_ONCE(stutter_till_abs_time, till_ns); + torture_hrtimeout_jiffies(stutter - 1, NULL); } - WRITE_ONCE(stutter_pause_test, 0); if (!torture_must_stop()) torture_hrtimeout_jiffies(stutter_gap, NULL); torture_shutdown_absorb("torture_stutter"); From fd7a4c555634e3bad0445e105f0428d51b732b58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 4 Sep 2023 12:53:34 +0300 Subject: [PATCH 0403/1397] selftests/resctrl: Remove bw_report and bm_type from main() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 47e36f16c7846bf3627ff68525e02555c53dc99e ] bw_report is always set to "reads" and bm_type is set to "fill_buf" but is never used. Set bw_report directly to "reads" in MBA/MBM test and remove bm_type. Signed-off-by: Ilpo Järvinen Tested-by: Shaopeng Tan Reviewed-by: Reinette Chatre Reviewed-by: Shaopeng Tan Reviewed-by: "Wieczor-Retman, Maciej" Signed-off-by: Shuah Khan Stable-dep-of: 3aff51464455 ("selftests/resctrl: Extend signal handler coverage to unmount on receiving signal") Signed-off-by: Sasha Levin --- tools/testing/selftests/resctrl/mba_test.c | 4 ++-- tools/testing/selftests/resctrl/mbm_test.c | 4 ++-- tools/testing/selftests/resctrl/resctrl.h | 4 ++-- .../testing/selftests/resctrl/resctrl_tests.c | 18 +++++++----------- 4 files changed, 13 insertions(+), 17 deletions(-) diff --git a/tools/testing/selftests/resctrl/mba_test.c b/tools/testing/selftests/resctrl/mba_test.c index c7d1ec6d81ea..c5c0588779d2 100644 --- a/tools/testing/selftests/resctrl/mba_test.c +++ b/tools/testing/selftests/resctrl/mba_test.c @@ -141,7 +141,7 @@ void mba_test_cleanup(void) remove(RESULT_FILE_NAME); } -int mba_schemata_change(int cpu_no, char *bw_report, char **benchmark_cmd) +int mba_schemata_change(int cpu_no, char **benchmark_cmd) { struct resctrl_val_param param = { .resctrl_val = MBA_STR, @@ -149,7 +149,7 @@ int mba_schemata_change(int cpu_no, char *bw_report, char **benchmark_cmd) .mongrp = "m1", .cpu_no = cpu_no, .filename = RESULT_FILE_NAME, - .bw_report = bw_report, + .bw_report = "reads", .setup = mba_setup }; int ret; diff --git a/tools/testing/selftests/resctrl/mbm_test.c b/tools/testing/selftests/resctrl/mbm_test.c index d0c26a5e89fb..724412186d63 100644 --- a/tools/testing/selftests/resctrl/mbm_test.c +++ b/tools/testing/selftests/resctrl/mbm_test.c @@ -109,7 +109,7 @@ void mbm_test_cleanup(void) remove(RESULT_FILE_NAME); } -int mbm_bw_change(size_t span, int cpu_no, char *bw_report, char **benchmark_cmd) +int mbm_bw_change(size_t span, int cpu_no, char **benchmark_cmd) { struct resctrl_val_param param = { .resctrl_val = MBM_STR, @@ -118,7 +118,7 @@ int mbm_bw_change(size_t span, int cpu_no, char *bw_report, char **benchmark_cmd .span = span, .cpu_no = cpu_no, .filename = RESULT_FILE_NAME, - .bw_report = bw_report, + .bw_report = "reads", .setup = mbm_setup }; int ret; diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h index 9e46fa90e7be..df75c8906032 100644 --- a/tools/testing/selftests/resctrl/resctrl.h +++ b/tools/testing/selftests/resctrl/resctrl.h @@ -93,10 +93,10 @@ int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags); int run_fill_buf(size_t span, int memflush, int op, bool once); int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param); -int mbm_bw_change(size_t span, int cpu_no, char *bw_report, char **benchmark_cmd); +int mbm_bw_change(size_t span, int cpu_no, char **benchmark_cmd); void tests_cleanup(void); void mbm_test_cleanup(void); -int mba_schemata_change(int cpu_no, char *bw_report, char **benchmark_cmd); +int mba_schemata_change(int cpu_no, char **benchmark_cmd); void mba_test_cleanup(void); int get_cbm_mask(char *cache_type, char *cbm_mask); int get_cache_size(int cpu_no, char *cache_type, unsigned long *cache_size); diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c index 59a361660d8c..116f67d833f7 100644 --- a/tools/testing/selftests/resctrl/resctrl_tests.c +++ b/tools/testing/selftests/resctrl/resctrl_tests.c @@ -70,8 +70,7 @@ void tests_cleanup(void) cat_test_cleanup(); } -static void run_mbm_test(char **benchmark_cmd, size_t span, - int cpu_no, char *bw_report) +static void run_mbm_test(char **benchmark_cmd, size_t span, int cpu_no) { int res; @@ -90,7 +89,7 @@ static void run_mbm_test(char **benchmark_cmd, size_t span, goto umount; } - res = mbm_bw_change(span, cpu_no, bw_report, benchmark_cmd); + res = mbm_bw_change(span, cpu_no, benchmark_cmd); ksft_test_result(!res, "MBM: bw change\n"); if ((get_vendor() == ARCH_INTEL) && res) ksft_print_msg("Intel MBM may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n"); @@ -99,7 +98,7 @@ static void run_mbm_test(char **benchmark_cmd, size_t span, umount_resctrlfs(); } -static void run_mba_test(char **benchmark_cmd, int cpu_no, char *bw_report) +static void run_mba_test(char **benchmark_cmd, int cpu_no) { int res; @@ -118,7 +117,7 @@ static void run_mba_test(char **benchmark_cmd, int cpu_no, char *bw_report) goto umount; } - res = mba_schemata_change(cpu_no, bw_report, benchmark_cmd); + res = mba_schemata_change(cpu_no, benchmark_cmd); ksft_test_result(!res, "MBA: schemata change\n"); umount: @@ -179,9 +178,9 @@ static void run_cat_test(int cpu_no, int no_of_bits) int main(int argc, char **argv) { bool has_ben = false, mbm_test = true, mba_test = true, cmt_test = true; - char *benchmark_cmd[BENCHMARK_ARGS], bw_report[64], bm_type[64]; char benchmark_cmd_area[BENCHMARK_ARGS][BENCHMARK_ARG_SIZE]; int c, cpu_no = 1, argc_new = argc, i, no_of_bits = 0; + char *benchmark_cmd[BENCHMARK_ARGS]; int ben_ind, ben_count, tests = 0; size_t span = 250 * MB; bool cat_test = true; @@ -284,9 +283,6 @@ int main(int argc, char **argv) benchmark_cmd[5] = NULL; } - sprintf(bw_report, "reads"); - sprintf(bm_type, "fill_buf"); - if (!check_resctrlfs_support()) return ksft_exit_skip("resctrl FS does not exist. Enable X86_CPU_RESCTRL config option.\n"); @@ -298,10 +294,10 @@ int main(int argc, char **argv) ksft_set_plan(tests ? : 4); if (mbm_test) - run_mbm_test(benchmark_cmd, span, cpu_no, bw_report); + run_mbm_test(benchmark_cmd, span, cpu_no); if (mba_test) - run_mba_test(benchmark_cmd, cpu_no, bw_report); + run_mba_test(benchmark_cmd, cpu_no); if (cmt_test) run_cmt_test(benchmark_cmd, cpu_no); From ef8454af673735ec44b851d6f78e6f6f3bcba30b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 4 Sep 2023 12:53:35 +0300 Subject: [PATCH 0404/1397] selftests/resctrl: Simplify span lifetime MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b1a901e078c4ee4a6fe13021c4577ef5f3155251 ] struct resctrl_val_param contains span member. resctrl_val(), however, never uses it because the value of span is embedded into the default benchmark command and parsed from it by run_benchmark(). Remove span from resctrl_val_param. Provide DEFAULT_SPAN for the code that needs it. CMT and CAT tests communicate span that is different from the DEFAULT_SPAN between their internal functions which is converted into passing it directly as a parameter. Signed-off-by: Ilpo Järvinen Tested-by: Shaopeng Tan Reviewed-by: Reinette Chatre Reviewed-by: Shaopeng Tan Reviewed-by: "Wieczor-Retman, Maciej" Signed-off-by: Shuah Khan Stable-dep-of: 3aff51464455 ("selftests/resctrl: Extend signal handler coverage to unmount on receiving signal") Signed-off-by: Sasha Levin --- tools/testing/selftests/resctrl/cache.c | 5 +++-- tools/testing/selftests/resctrl/cat_test.c | 13 +++++++------ tools/testing/selftests/resctrl/cmt_test.c | 11 ++++++----- tools/testing/selftests/resctrl/mbm_test.c | 5 ++--- tools/testing/selftests/resctrl/resctrl.h | 8 ++++---- tools/testing/selftests/resctrl/resctrl_tests.c | 9 ++++----- 6 files changed, 26 insertions(+), 25 deletions(-) diff --git a/tools/testing/selftests/resctrl/cache.c b/tools/testing/selftests/resctrl/cache.c index d3cbb829ff6a..a0318bd3a63d 100644 --- a/tools/testing/selftests/resctrl/cache.c +++ b/tools/testing/selftests/resctrl/cache.c @@ -205,10 +205,11 @@ int measure_cache_vals(struct resctrl_val_param *param, int bm_pid) * cache_val: execute benchmark and measure LLC occupancy resctrl * and perf cache miss for the benchmark * @param: parameters passed to cache_val() + * @span: buffer size for the benchmark * * Return: 0 on success. non-zero on failure. */ -int cat_val(struct resctrl_val_param *param) +int cat_val(struct resctrl_val_param *param, size_t span) { int memflush = 1, operation = 0, ret = 0; char *resctrl_val = param->resctrl_val; @@ -245,7 +246,7 @@ int cat_val(struct resctrl_val_param *param) if (ret) break; - if (run_fill_buf(param->span, memflush, operation, true)) { + if (run_fill_buf(span, memflush, operation, true)) { fprintf(stderr, "Error-running fill buffer\n"); ret = -1; goto pe_close; diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c index 3848dfb46aba..97b87285ab2a 100644 --- a/tools/testing/selftests/resctrl/cat_test.c +++ b/tools/testing/selftests/resctrl/cat_test.c @@ -41,7 +41,7 @@ static int cat_setup(struct resctrl_val_param *p) return ret; } -static int check_results(struct resctrl_val_param *param) +static int check_results(struct resctrl_val_param *param, size_t span) { char *token_array[8], temp[512]; unsigned long sum_llc_perf_miss = 0; @@ -76,7 +76,7 @@ static int check_results(struct resctrl_val_param *param) fclose(fp); no_of_bits = count_bits(param->mask); - return show_cache_info(sum_llc_perf_miss, no_of_bits, param->span / 64, + return show_cache_info(sum_llc_perf_miss, no_of_bits, span / 64, MAX_DIFF, MAX_DIFF_PERCENT, runs - 1, get_vendor() == ARCH_INTEL, false); } @@ -96,6 +96,7 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type) char cbm_mask[256]; int count_of_bits; char pipe_message; + size_t span; /* Get default cbm mask for L3/L2 cache */ ret = get_cbm_mask(cache_type, cbm_mask); @@ -140,7 +141,7 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type) /* Set param values for parent thread which will be allocated bitmask * with (max_bits - n) bits */ - param.span = cache_size * (count_of_bits - n) / count_of_bits; + span = cache_size * (count_of_bits - n) / count_of_bits; strcpy(param.ctrlgrp, "c2"); strcpy(param.mongrp, "m2"); strcpy(param.filename, RESULT_FILE_NAME2); @@ -162,7 +163,7 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type) param.mask = l_mask_1; strcpy(param.ctrlgrp, "c1"); strcpy(param.mongrp, "m1"); - param.span = cache_size * n / count_of_bits; + span = cache_size * n / count_of_bits; strcpy(param.filename, RESULT_FILE_NAME1); param.num_of_runs = 0; param.cpu_no = sibling_cpu_no; @@ -176,9 +177,9 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type) remove(param.filename); - ret = cat_val(¶m); + ret = cat_val(¶m, span); if (ret == 0) - ret = check_results(¶m); + ret = check_results(¶m, span); if (bm_pid == 0) { /* Tell parent that child is ready */ diff --git a/tools/testing/selftests/resctrl/cmt_test.c b/tools/testing/selftests/resctrl/cmt_test.c index cc93cb5ae7ee..33dbe51e7712 100644 --- a/tools/testing/selftests/resctrl/cmt_test.c +++ b/tools/testing/selftests/resctrl/cmt_test.c @@ -27,7 +27,7 @@ static int cmt_setup(struct resctrl_val_param *p) return 0; } -static int check_results(struct resctrl_val_param *param, int no_of_bits) +static int check_results(struct resctrl_val_param *param, size_t span, int no_of_bits) { char *token_array[8], temp[512]; unsigned long sum_llc_occu_resc = 0; @@ -58,7 +58,7 @@ static int check_results(struct resctrl_val_param *param, int no_of_bits) } fclose(fp); - return show_cache_info(sum_llc_occu_resc, no_of_bits, param->span, + return show_cache_info(sum_llc_occu_resc, no_of_bits, span, MAX_DIFF, MAX_DIFF_PERCENT, runs - 1, true, true); } @@ -74,6 +74,7 @@ int cmt_resctrl_val(int cpu_no, int n, char **benchmark_cmd) unsigned long long_mask; char cbm_mask[256]; int count_of_bits; + size_t span; int ret; ret = get_cbm_mask("L3", cbm_mask); @@ -102,13 +103,13 @@ int cmt_resctrl_val(int cpu_no, int n, char **benchmark_cmd) .cpu_no = cpu_no, .filename = RESULT_FILE_NAME, .mask = ~(long_mask << n) & long_mask, - .span = cache_size * n / count_of_bits, .num_of_runs = 0, .setup = cmt_setup, }; + span = cache_size * n / count_of_bits; if (strcmp(benchmark_cmd[0], "fill_buf") == 0) - sprintf(benchmark_cmd[1], "%zu", param.span); + sprintf(benchmark_cmd[1], "%zu", span); remove(RESULT_FILE_NAME); @@ -116,7 +117,7 @@ int cmt_resctrl_val(int cpu_no, int n, char **benchmark_cmd) if (ret) goto out; - ret = check_results(¶m, n); + ret = check_results(¶m, span, n); out: cmt_test_cleanup(); diff --git a/tools/testing/selftests/resctrl/mbm_test.c b/tools/testing/selftests/resctrl/mbm_test.c index 724412186d63..445aea1c64e8 100644 --- a/tools/testing/selftests/resctrl/mbm_test.c +++ b/tools/testing/selftests/resctrl/mbm_test.c @@ -109,13 +109,12 @@ void mbm_test_cleanup(void) remove(RESULT_FILE_NAME); } -int mbm_bw_change(size_t span, int cpu_no, char **benchmark_cmd) +int mbm_bw_change(int cpu_no, char **benchmark_cmd) { struct resctrl_val_param param = { .resctrl_val = MBM_STR, .ctrlgrp = "c1", .mongrp = "m1", - .span = span, .cpu_no = cpu_no, .filename = RESULT_FILE_NAME, .bw_report = "reads", @@ -129,7 +128,7 @@ int mbm_bw_change(size_t span, int cpu_no, char **benchmark_cmd) if (ret) goto out; - ret = check_results(span); + ret = check_results(DEFAULT_SPAN); out: mbm_test_cleanup(); diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h index df75c8906032..d33452fde5b9 100644 --- a/tools/testing/selftests/resctrl/resctrl.h +++ b/tools/testing/selftests/resctrl/resctrl.h @@ -33,6 +33,8 @@ #define END_OF_TESTS 1 +#define DEFAULT_SPAN (250 * MB) + #define PARENT_EXIT(err_msg) \ do { \ perror(err_msg); \ @@ -47,7 +49,6 @@ * @ctrlgrp: Name of the control monitor group (con_mon grp) * @mongrp: Name of the monitor group (mon grp) * @cpu_no: CPU number to which the benchmark would be binded - * @span: Memory bytes accessed in each benchmark iteration * @filename: Name of file to which the o/p should be written * @bw_report: Bandwidth report type (reads vs writes) * @setup: Call back function to setup test environment @@ -57,7 +58,6 @@ struct resctrl_val_param { char ctrlgrp[64]; char mongrp[64]; int cpu_no; - size_t span; char filename[64]; char *bw_report; unsigned long mask; @@ -93,7 +93,7 @@ int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags); int run_fill_buf(size_t span, int memflush, int op, bool once); int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param); -int mbm_bw_change(size_t span, int cpu_no, char **benchmark_cmd); +int mbm_bw_change(int cpu_no, char **benchmark_cmd); void tests_cleanup(void); void mbm_test_cleanup(void); int mba_schemata_change(int cpu_no, char **benchmark_cmd); @@ -103,7 +103,7 @@ int get_cache_size(int cpu_no, char *cache_type, unsigned long *cache_size); void ctrlc_handler(int signum, siginfo_t *info, void *ptr); int signal_handler_register(void); void signal_handler_unregister(void); -int cat_val(struct resctrl_val_param *param); +int cat_val(struct resctrl_val_param *param, size_t span); void cat_test_cleanup(void); int cat_perf_miss_val(int cpu_no, int no_of_bits, char *cache_type); int cmt_resctrl_val(int cpu_no, int n, char **benchmark_cmd); diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c index 116f67d833f7..1826b674ea30 100644 --- a/tools/testing/selftests/resctrl/resctrl_tests.c +++ b/tools/testing/selftests/resctrl/resctrl_tests.c @@ -70,7 +70,7 @@ void tests_cleanup(void) cat_test_cleanup(); } -static void run_mbm_test(char **benchmark_cmd, size_t span, int cpu_no) +static void run_mbm_test(char **benchmark_cmd, int cpu_no) { int res; @@ -89,7 +89,7 @@ static void run_mbm_test(char **benchmark_cmd, size_t span, int cpu_no) goto umount; } - res = mbm_bw_change(span, cpu_no, benchmark_cmd); + res = mbm_bw_change(cpu_no, benchmark_cmd); ksft_test_result(!res, "MBM: bw change\n"); if ((get_vendor() == ARCH_INTEL) && res) ksft_print_msg("Intel MBM may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n"); @@ -182,7 +182,6 @@ int main(int argc, char **argv) int c, cpu_no = 1, argc_new = argc, i, no_of_bits = 0; char *benchmark_cmd[BENCHMARK_ARGS]; int ben_ind, ben_count, tests = 0; - size_t span = 250 * MB; bool cat_test = true; for (i = 0; i < argc; i++) { @@ -276,7 +275,7 @@ int main(int argc, char **argv) benchmark_cmd[i] = benchmark_cmd_area[i]; strcpy(benchmark_cmd[0], "fill_buf"); - sprintf(benchmark_cmd[1], "%zu", span); + sprintf(benchmark_cmd[1], "%u", DEFAULT_SPAN); strcpy(benchmark_cmd[2], "1"); strcpy(benchmark_cmd[3], "0"); strcpy(benchmark_cmd[4], "false"); @@ -294,7 +293,7 @@ int main(int argc, char **argv) ksft_set_plan(tests ? : 4); if (mbm_test) - run_mbm_test(benchmark_cmd, span, cpu_no); + run_mbm_test(benchmark_cmd, cpu_no); if (mba_test) run_mba_test(benchmark_cmd, cpu_no); From b1d34cb556292014b04593428190352662020117 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 4 Sep 2023 12:53:37 +0300 Subject: [PATCH 0405/1397] selftests/resctrl: Make benchmark command const and build it with pointers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e33cb5702a9f287d829b0e9e6abe57f6a4aba6d2 ] Benchmark command is used in multiple tests so it should not be mutated by the tests but CMT test alters span argument. Due to the order of tests (CMT test runs last), mutating the span argument in CMT test does not trigger any real problems currently. Mark benchmark_cmd strings as const and setup the benchmark command using pointers. Because the benchmark command becomes const, the input arguments can be used directly. Besides being simpler, using the input arguments directly also removes the internal size restriction. CMT test has to create a copy of the benchmark command before altering the benchmark command. Signed-off-by: Ilpo Järvinen Tested-by: Shaopeng Tan Reviewed-by: Shaopeng Tan Reviewed-by: Reinette Chatre Reviewed-by: "Wieczor-Retman, Maciej" Signed-off-by: Shuah Khan Stable-dep-of: 3aff51464455 ("selftests/resctrl: Extend signal handler coverage to unmount on receiving signal") Signed-off-by: Sasha Levin --- tools/testing/selftests/resctrl/cmt_test.c | 25 +++++++++--- tools/testing/selftests/resctrl/mba_test.c | 2 +- tools/testing/selftests/resctrl/mbm_test.c | 2 +- tools/testing/selftests/resctrl/resctrl.h | 10 +++-- .../testing/selftests/resctrl/resctrl_tests.c | 39 ++++++++----------- tools/testing/selftests/resctrl/resctrl_val.c | 10 ++++- 6 files changed, 53 insertions(+), 35 deletions(-) diff --git a/tools/testing/selftests/resctrl/cmt_test.c b/tools/testing/selftests/resctrl/cmt_test.c index 33dbe51e7712..50bdbce9fba9 100644 --- a/tools/testing/selftests/resctrl/cmt_test.c +++ b/tools/testing/selftests/resctrl/cmt_test.c @@ -68,14 +68,17 @@ void cmt_test_cleanup(void) remove(RESULT_FILE_NAME); } -int cmt_resctrl_val(int cpu_no, int n, char **benchmark_cmd) +int cmt_resctrl_val(int cpu_no, int n, const char * const *benchmark_cmd) { + const char * const *cmd = benchmark_cmd; + const char *new_cmd[BENCHMARK_ARGS]; unsigned long cache_size = 0; unsigned long long_mask; + char *span_str = NULL; char cbm_mask[256]; int count_of_bits; size_t span; - int ret; + int ret, i; ret = get_cbm_mask("L3", cbm_mask); if (ret) @@ -108,12 +111,23 @@ int cmt_resctrl_val(int cpu_no, int n, char **benchmark_cmd) }; span = cache_size * n / count_of_bits; - if (strcmp(benchmark_cmd[0], "fill_buf") == 0) - sprintf(benchmark_cmd[1], "%zu", span); + + if (strcmp(cmd[0], "fill_buf") == 0) { + /* Duplicate the command to be able to replace span in it */ + for (i = 0; benchmark_cmd[i]; i++) + new_cmd[i] = benchmark_cmd[i]; + new_cmd[i] = NULL; + + ret = asprintf(&span_str, "%zu", span); + if (ret < 0) + return -1; + new_cmd[1] = span_str; + cmd = new_cmd; + } remove(RESULT_FILE_NAME); - ret = resctrl_val(benchmark_cmd, ¶m); + ret = resctrl_val(cmd, ¶m); if (ret) goto out; @@ -121,6 +135,7 @@ int cmt_resctrl_val(int cpu_no, int n, char **benchmark_cmd) out: cmt_test_cleanup(); + free(span_str); return ret; } diff --git a/tools/testing/selftests/resctrl/mba_test.c b/tools/testing/selftests/resctrl/mba_test.c index c5c0588779d2..d3bf4368341e 100644 --- a/tools/testing/selftests/resctrl/mba_test.c +++ b/tools/testing/selftests/resctrl/mba_test.c @@ -141,7 +141,7 @@ void mba_test_cleanup(void) remove(RESULT_FILE_NAME); } -int mba_schemata_change(int cpu_no, char **benchmark_cmd) +int mba_schemata_change(int cpu_no, const char * const *benchmark_cmd) { struct resctrl_val_param param = { .resctrl_val = MBA_STR, diff --git a/tools/testing/selftests/resctrl/mbm_test.c b/tools/testing/selftests/resctrl/mbm_test.c index 445aea1c64e8..d3c0d30c676a 100644 --- a/tools/testing/selftests/resctrl/mbm_test.c +++ b/tools/testing/selftests/resctrl/mbm_test.c @@ -109,7 +109,7 @@ void mbm_test_cleanup(void) remove(RESULT_FILE_NAME); } -int mbm_bw_change(int cpu_no, char **benchmark_cmd) +int mbm_bw_change(int cpu_no, const char * const *benchmark_cmd) { struct resctrl_val_param param = { .resctrl_val = MBM_STR, diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h index d33452fde5b9..8578a8b4e145 100644 --- a/tools/testing/selftests/resctrl/resctrl.h +++ b/tools/testing/selftests/resctrl/resctrl.h @@ -33,6 +33,8 @@ #define END_OF_TESTS 1 +#define BENCHMARK_ARGS 64 + #define DEFAULT_SPAN (250 * MB) #define PARENT_EXIT(err_msg) \ @@ -92,11 +94,11 @@ int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp, int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags); int run_fill_buf(size_t span, int memflush, int op, bool once); -int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param); -int mbm_bw_change(int cpu_no, char **benchmark_cmd); +int resctrl_val(const char * const *benchmark_cmd, struct resctrl_val_param *param); +int mbm_bw_change(int cpu_no, const char * const *benchmark_cmd); void tests_cleanup(void); void mbm_test_cleanup(void); -int mba_schemata_change(int cpu_no, char **benchmark_cmd); +int mba_schemata_change(int cpu_no, const char * const *benchmark_cmd); void mba_test_cleanup(void); int get_cbm_mask(char *cache_type, char *cbm_mask); int get_cache_size(int cpu_no, char *cache_type, unsigned long *cache_size); @@ -106,7 +108,7 @@ void signal_handler_unregister(void); int cat_val(struct resctrl_val_param *param, size_t span); void cat_test_cleanup(void); int cat_perf_miss_val(int cpu_no, int no_of_bits, char *cache_type); -int cmt_resctrl_val(int cpu_no, int n, char **benchmark_cmd); +int cmt_resctrl_val(int cpu_no, int n, const char * const *benchmark_cmd); unsigned int count_bits(unsigned long n); void cmt_test_cleanup(void); int get_core_sibling(int cpu_no); diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c index 1826b674ea30..1ac22c6d8ce8 100644 --- a/tools/testing/selftests/resctrl/resctrl_tests.c +++ b/tools/testing/selftests/resctrl/resctrl_tests.c @@ -10,9 +10,6 @@ */ #include "resctrl.h" -#define BENCHMARK_ARGS 64 -#define BENCHMARK_ARG_SIZE 64 - static int detect_vendor(void) { FILE *inf = fopen("/proc/cpuinfo", "r"); @@ -70,7 +67,7 @@ void tests_cleanup(void) cat_test_cleanup(); } -static void run_mbm_test(char **benchmark_cmd, int cpu_no) +static void run_mbm_test(const char * const *benchmark_cmd, int cpu_no) { int res; @@ -98,7 +95,7 @@ static void run_mbm_test(char **benchmark_cmd, int cpu_no) umount_resctrlfs(); } -static void run_mba_test(char **benchmark_cmd, int cpu_no) +static void run_mba_test(const char * const *benchmark_cmd, int cpu_no) { int res; @@ -124,7 +121,7 @@ static void run_mba_test(char **benchmark_cmd, int cpu_no) umount_resctrlfs(); } -static void run_cmt_test(char **benchmark_cmd, int cpu_no) +static void run_cmt_test(const char * const *benchmark_cmd, int cpu_no) { int res; @@ -178,11 +175,12 @@ static void run_cat_test(int cpu_no, int no_of_bits) int main(int argc, char **argv) { bool has_ben = false, mbm_test = true, mba_test = true, cmt_test = true; - char benchmark_cmd_area[BENCHMARK_ARGS][BENCHMARK_ARG_SIZE]; int c, cpu_no = 1, argc_new = argc, i, no_of_bits = 0; - char *benchmark_cmd[BENCHMARK_ARGS]; + const char *benchmark_cmd[BENCHMARK_ARGS]; int ben_ind, ben_count, tests = 0; + char *span_str = NULL; bool cat_test = true; + int ret; for (i = 0; i < argc; i++) { if (strcmp(argv[i], "-b") == 0) { @@ -262,23 +260,19 @@ int main(int argc, char **argv) ksft_exit_fail_msg("Too long benchmark command.\n"); /* Extract benchmark command from command line. */ - for (i = ben_ind; i < argc; i++) { - benchmark_cmd[i - ben_ind] = benchmark_cmd_area[i]; - if (strlen(argv[i]) >= BENCHMARK_ARG_SIZE) - ksft_exit_fail_msg("Too long benchmark command argument.\n"); - sprintf(benchmark_cmd[i - ben_ind], "%s", argv[i]); - } + for (i = 0; i < argc - ben_ind; i++) + benchmark_cmd[i] = argv[i + ben_ind]; benchmark_cmd[ben_count] = NULL; } else { /* If no benchmark is given by "-b" argument, use fill_buf. */ - for (i = 0; i < 5; i++) - benchmark_cmd[i] = benchmark_cmd_area[i]; - - strcpy(benchmark_cmd[0], "fill_buf"); - sprintf(benchmark_cmd[1], "%u", DEFAULT_SPAN); - strcpy(benchmark_cmd[2], "1"); - strcpy(benchmark_cmd[3], "0"); - strcpy(benchmark_cmd[4], "false"); + benchmark_cmd[0] = "fill_buf"; + ret = asprintf(&span_str, "%u", DEFAULT_SPAN); + if (ret < 0) + ksft_exit_fail_msg("Out of memory!\n"); + benchmark_cmd[1] = span_str; + benchmark_cmd[2] = "1"; + benchmark_cmd[3] = "0"; + benchmark_cmd[4] = "false"; benchmark_cmd[5] = NULL; } @@ -304,5 +298,6 @@ int main(int argc, char **argv) if (cat_test) run_cat_test(cpu_no, no_of_bits); + free(span_str); ksft_finished(); } diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c index ee07e0943f58..01bbe11a8983 100644 --- a/tools/testing/selftests/resctrl/resctrl_val.c +++ b/tools/testing/selftests/resctrl/resctrl_val.c @@ -629,7 +629,7 @@ measure_vals(struct resctrl_val_param *param, unsigned long *bw_resc_start) * * Return: 0 on success. non-zero on failure. */ -int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param) +int resctrl_val(const char * const *benchmark_cmd, struct resctrl_val_param *param) { char *resctrl_val = param->resctrl_val; unsigned long bw_resc_start = 0; @@ -710,7 +710,13 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param) if (ret) goto out; - value.sival_ptr = benchmark_cmd; + /* + * The cast removes constness but nothing mutates benchmark_cmd within + * the context of this process. At the receiving process, it becomes + * argv, which is mutable, on exec() but that's after fork() so it + * doesn't matter for the process running the tests. + */ + value.sival_ptr = (void *)benchmark_cmd; /* Taskset benchmark to specified cpu */ ret = taskset_benchmark(bm_pid, param->cpu_no); From 7989f7ad1f0a94bf47ab9d2bf0e96e1b974b6529 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 2 Oct 2023 12:48:08 +0300 Subject: [PATCH 0406/1397] selftests/resctrl: Extend signal handler coverage to unmount on receiving signal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3aff5146445582454c35900f3c0c972987cdd595 ] Unmounting resctrl FS has been moved into the per test functions in resctrl_tests.c by commit caddc0fbe495 ("selftests/resctrl: Move resctrl FS mount/umount to higher level"). In case a signal (SIGINT, SIGTERM, or SIGHUP) is received, the running selftest is aborted by ctrlc_handler() which then unmounts resctrl fs before exiting. The current section between signal_handler_register() and signal_handler_unregister(), however, does not cover the entire duration when resctrl FS is mounted. Move signal_handler_register() and signal_handler_unregister() calls from per test files into resctrl_tests.c to properly unmount resctrl fs. In order to not add signal_handler_register()/unregister() n times, create helpers test_prepare() and test_cleanup(). Do not call ksft_exit_fail_msg() in test_prepare() but only in the per test function to keep the control flow cleaner without adding calls to exit() deep into the call chain. Adjust child process kill() call in ctrlc_handler() to only be invoked if the child was already forked. Fixes: caddc0fbe495 ("selftests/resctrl: Move resctrl FS mount/umount to higher level") Signed-off-by: Ilpo Järvinen Tested-by: Shaopeng Tan Reviewed-by: Shaopeng Tan Reviewed-by: Reinette Chatre Cc: Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin --- tools/testing/selftests/resctrl/cat_test.c | 8 --- .../testing/selftests/resctrl/resctrl_tests.c | 69 ++++++++++++------- tools/testing/selftests/resctrl/resctrl_val.c | 22 +++--- 3 files changed, 55 insertions(+), 44 deletions(-) diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c index 97b87285ab2a..224ba8544d8a 100644 --- a/tools/testing/selftests/resctrl/cat_test.c +++ b/tools/testing/selftests/resctrl/cat_test.c @@ -167,12 +167,6 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type) strcpy(param.filename, RESULT_FILE_NAME1); param.num_of_runs = 0; param.cpu_no = sibling_cpu_no; - } else { - ret = signal_handler_register(); - if (ret) { - kill(bm_pid, SIGKILL); - goto out; - } } remove(param.filename); @@ -209,10 +203,8 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type) } close(pipefd[0]); kill(bm_pid, SIGKILL); - signal_handler_unregister(); } -out: cat_test_cleanup(); return ret; diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c index 1ac22c6d8ce8..31373b69e675 100644 --- a/tools/testing/selftests/resctrl/resctrl_tests.c +++ b/tools/testing/selftests/resctrl/resctrl_tests.c @@ -67,15 +67,39 @@ void tests_cleanup(void) cat_test_cleanup(); } -static void run_mbm_test(const char * const *benchmark_cmd, int cpu_no) +static int test_prepare(void) { int res; - ksft_print_msg("Starting MBM BW change ...\n"); + res = signal_handler_register(); + if (res) { + ksft_print_msg("Failed to register signal handler\n"); + return res; + } res = mount_resctrlfs(); if (res) { - ksft_exit_fail_msg("Failed to mount resctrl FS\n"); + signal_handler_unregister(); + ksft_print_msg("Failed to mount resctrl FS\n"); + return res; + } + return 0; +} + +static void test_cleanup(void) +{ + umount_resctrlfs(); + signal_handler_unregister(); +} + +static void run_mbm_test(const char * const *benchmark_cmd, int cpu_no) +{ + int res; + + ksft_print_msg("Starting MBM BW change ...\n"); + + if (test_prepare()) { + ksft_exit_fail_msg("Abnormal failure when preparing for the test\n"); return; } @@ -83,7 +107,7 @@ static void run_mbm_test(const char * const *benchmark_cmd, int cpu_no) !validate_resctrl_feature_request("L3_MON", "mbm_local_bytes") || (get_vendor() != ARCH_INTEL)) { ksft_test_result_skip("Hardware does not support MBM or MBM is disabled\n"); - goto umount; + goto cleanup; } res = mbm_bw_change(cpu_no, benchmark_cmd); @@ -91,8 +115,8 @@ static void run_mbm_test(const char * const *benchmark_cmd, int cpu_no) if ((get_vendor() == ARCH_INTEL) && res) ksft_print_msg("Intel MBM may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n"); -umount: - umount_resctrlfs(); +cleanup: + test_cleanup(); } static void run_mba_test(const char * const *benchmark_cmd, int cpu_no) @@ -101,9 +125,8 @@ static void run_mba_test(const char * const *benchmark_cmd, int cpu_no) ksft_print_msg("Starting MBA Schemata change ...\n"); - res = mount_resctrlfs(); - if (res) { - ksft_exit_fail_msg("Failed to mount resctrl FS\n"); + if (test_prepare()) { + ksft_exit_fail_msg("Abnormal failure when preparing for the test\n"); return; } @@ -111,14 +134,14 @@ static void run_mba_test(const char * const *benchmark_cmd, int cpu_no) !validate_resctrl_feature_request("L3_MON", "mbm_local_bytes") || (get_vendor() != ARCH_INTEL)) { ksft_test_result_skip("Hardware does not support MBA or MBA is disabled\n"); - goto umount; + goto cleanup; } res = mba_schemata_change(cpu_no, benchmark_cmd); ksft_test_result(!res, "MBA: schemata change\n"); -umount: - umount_resctrlfs(); +cleanup: + test_cleanup(); } static void run_cmt_test(const char * const *benchmark_cmd, int cpu_no) @@ -127,16 +150,15 @@ static void run_cmt_test(const char * const *benchmark_cmd, int cpu_no) ksft_print_msg("Starting CMT test ...\n"); - res = mount_resctrlfs(); - if (res) { - ksft_exit_fail_msg("Failed to mount resctrl FS\n"); + if (test_prepare()) { + ksft_exit_fail_msg("Abnormal failure when preparing for the test\n"); return; } if (!validate_resctrl_feature_request("L3_MON", "llc_occupancy") || !validate_resctrl_feature_request("L3", NULL)) { ksft_test_result_skip("Hardware does not support CMT or CMT is disabled\n"); - goto umount; + goto cleanup; } res = cmt_resctrl_val(cpu_no, 5, benchmark_cmd); @@ -144,8 +166,8 @@ static void run_cmt_test(const char * const *benchmark_cmd, int cpu_no) if ((get_vendor() == ARCH_INTEL) && res) ksft_print_msg("Intel CMT may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n"); -umount: - umount_resctrlfs(); +cleanup: + test_cleanup(); } static void run_cat_test(int cpu_no, int no_of_bits) @@ -154,22 +176,21 @@ static void run_cat_test(int cpu_no, int no_of_bits) ksft_print_msg("Starting CAT test ...\n"); - res = mount_resctrlfs(); - if (res) { - ksft_exit_fail_msg("Failed to mount resctrl FS\n"); + if (test_prepare()) { + ksft_exit_fail_msg("Abnormal failure when preparing for the test\n"); return; } if (!validate_resctrl_feature_request("L3", NULL)) { ksft_test_result_skip("Hardware does not support CAT or CAT is disabled\n"); - goto umount; + goto cleanup; } res = cat_perf_miss_val(cpu_no, no_of_bits, "L3"); ksft_test_result(!res, "CAT: test\n"); -umount: - umount_resctrlfs(); +cleanup: + test_cleanup(); } int main(int argc, char **argv) diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c index 01bbe11a8983..b8ca6fa40b3b 100644 --- a/tools/testing/selftests/resctrl/resctrl_val.c +++ b/tools/testing/selftests/resctrl/resctrl_val.c @@ -468,7 +468,9 @@ pid_t bm_pid, ppid; void ctrlc_handler(int signum, siginfo_t *info, void *ptr) { - kill(bm_pid, SIGKILL); + /* Only kill child after bm_pid is set after fork() */ + if (bm_pid) + kill(bm_pid, SIGKILL); umount_resctrlfs(); tests_cleanup(); ksft_print_msg("Ending\n\n"); @@ -485,6 +487,8 @@ int signal_handler_register(void) struct sigaction sigact = {}; int ret = 0; + bm_pid = 0; + sigact.sa_sigaction = ctrlc_handler; sigemptyset(&sigact.sa_mask); sigact.sa_flags = SA_SIGINFO; @@ -706,10 +710,6 @@ int resctrl_val(const char * const *benchmark_cmd, struct resctrl_val_param *par ksft_print_msg("Benchmark PID: %d\n", bm_pid); - ret = signal_handler_register(); - if (ret) - goto out; - /* * The cast removes constness but nothing mutates benchmark_cmd within * the context of this process. At the receiving process, it becomes @@ -721,19 +721,19 @@ int resctrl_val(const char * const *benchmark_cmd, struct resctrl_val_param *par /* Taskset benchmark to specified cpu */ ret = taskset_benchmark(bm_pid, param->cpu_no); if (ret) - goto unregister; + goto out; /* Write benchmark to specified control&monitoring grp in resctrl FS */ ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp, resctrl_val); if (ret) - goto unregister; + goto out; if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) || !strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) { ret = initialize_mem_bw_imc(); if (ret) - goto unregister; + goto out; initialize_mem_bw_resctrl(param->ctrlgrp, param->mongrp, param->cpu_no, resctrl_val); @@ -748,7 +748,7 @@ int resctrl_val(const char * const *benchmark_cmd, struct resctrl_val_param *par sizeof(pipe_message)) { perror("# failed reading message from child process"); close(pipefd[0]); - goto unregister; + goto out; } } close(pipefd[0]); @@ -757,7 +757,7 @@ int resctrl_val(const char * const *benchmark_cmd, struct resctrl_val_param *par if (sigqueue(bm_pid, SIGUSR1, value) == -1) { perror("# sigqueue SIGUSR1 to child"); ret = errno; - goto unregister; + goto out; } /* Give benchmark enough time to fully run */ @@ -786,8 +786,6 @@ int resctrl_val(const char * const *benchmark_cmd, struct resctrl_val_param *par } } -unregister: - signal_handler_unregister(); out: kill(bm_pid, SIGKILL); From e5f12229d9541c38a813002a0a8faaf637430b77 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 10 Nov 2023 16:13:15 +0100 Subject: [PATCH 0407/1397] parisc: Prevent booting 64-bit kernels on PA1.x machines commit a406b8b424fa01f244c1aab02ba186258448c36b upstream. Bail out early with error message when trying to boot a 64-bit kernel on 32-bit machines. This fixes the previous commit to include the check for true 64-bit kernels as well. Signed-off-by: Helge Deller Fixes: 591d2108f3abc ("parisc: Add runtime check to prevent PA2.0 kernels on PA1.x machines") Cc: # v6.0+ Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/head.S | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S index a171bf3c6b31..96e0264ac961 100644 --- a/arch/parisc/kernel/head.S +++ b/arch/parisc/kernel/head.S @@ -70,9 +70,8 @@ $bss_loop: stw,ma %arg2,4(%r1) stw,ma %arg3,4(%r1) -#if !defined(CONFIG_64BIT) && defined(CONFIG_PA20) - /* This 32-bit kernel was compiled for PA2.0 CPUs. Check current CPU - * and halt kernel if we detect a PA1.x CPU. */ +#if defined(CONFIG_PA20) + /* check for 64-bit capable CPU as required by current kernel */ ldi 32,%r10 mtctl %r10,%cr11 .level 2.0 From 5ff2daf7737fe9da0cf1989de2cebf9a02b9e07d Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 7 Nov 2023 14:33:32 +0100 Subject: [PATCH 0408/1397] parisc/pgtable: Do not drop upper 5 address bits of physical address commit 166b0110d1ee53290bd11618df6e3991c117495a upstream. When calculating the pfn for the iitlbt/idtlbt instruction, do not drop the upper 5 address bits. This doesn't seem to have an effect on physical hardware which uses less physical address bits, but in qemu the missing bits are visible. Signed-off-by: Helge Deller Cc: Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/entry.S | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index cab1ec23e0d7..ab23e61a6f01 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -475,13 +475,13 @@ * to a CPU TLB 4k PFN (4k => 12 bits to shift) */ #define PAGE_ADD_SHIFT (PAGE_SHIFT-12) #define PAGE_ADD_HUGE_SHIFT (REAL_HPAGE_SHIFT-12) + #define PFN_START_BIT (63-ASM_PFN_PTE_SHIFT+(63-58)-PAGE_ADD_SHIFT) /* Drop prot bits and convert to page addr for iitlbt and idtlbt */ .macro convert_for_tlb_insert20 pte,tmp #ifdef CONFIG_HUGETLB_PAGE copy \pte,\tmp - extrd,u \tmp,(63-ASM_PFN_PTE_SHIFT)+(63-58)+PAGE_ADD_SHIFT,\ - 64-PAGE_SHIFT-PAGE_ADD_SHIFT,\pte + extrd,u \tmp,PFN_START_BIT,PFN_START_BIT+1,\pte depdi _PAGE_SIZE_ENCODING_DEFAULT,63,\ (63-58)+PAGE_ADD_SHIFT,\pte @@ -489,8 +489,7 @@ depdi _HUGE_PAGE_SIZE_ENCODING_DEFAULT,63,\ (63-58)+PAGE_ADD_HUGE_SHIFT,\pte #else /* Huge pages disabled */ - extrd,u \pte,(63-ASM_PFN_PTE_SHIFT)+(63-58)+PAGE_ADD_SHIFT,\ - 64-PAGE_SHIFT-PAGE_ADD_SHIFT,\pte + extrd,u \pte,PFN_START_BIT,PFN_START_BIT+1,\pte depdi _PAGE_SIZE_ENCODING_DEFAULT,63,\ (63-58)+PAGE_ADD_SHIFT,\pte #endif From c0940ebe428777c9fe5856d6b305d75ca3e610ae Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 17 Nov 2023 16:43:52 +0100 Subject: [PATCH 0409/1397] parisc/power: Fix power soft-off when running on qemu commit 6ad6e15a9c46b8f0932cd99724f26f3db4db1cdf upstream. Firmware returns the physical address of the power switch, so need to use gsc_writel() instead of direct memory access. Fixes: d0c219472980 ("parisc/power: Add power soft-off when running on qemu") Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v6.0+ Signed-off-by: Greg Kroah-Hartman --- drivers/parisc/power.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/parisc/power.c b/drivers/parisc/power.c index 6ee0717e34ec..332bcc0053a5 100644 --- a/drivers/parisc/power.c +++ b/drivers/parisc/power.c @@ -201,7 +201,7 @@ static struct notifier_block parisc_panic_block = { static int qemu_power_off(struct sys_off_data *data) { /* this turns the system off via SeaBIOS */ - *(int *)data->cb_data = 0; + gsc_writel(0, (unsigned long) data->cb_data); pdc_soft_power_button(1); return NOTIFY_DONE; } From 2345ef960d6663c9b52f8f73fb5ca05732c56144 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 13 Nov 2023 11:12:57 +0100 Subject: [PATCH 0410/1397] parisc: fix mmap_base calculation when stack grows upwards commit 5f74f820f6fc844b95f9e5e406e0a07d97510420 upstream. Matoro reported various userspace crashes on the parisc platform with kernel 6.6 and bisected it to commit 3033cd430768 ("parisc: Use generic mmap top-down layout and brk randomization"). That commit switched parisc to use the common infrastructure to calculate mmap_base, but missed that the mmap_base() function takes care for architectures where the stack grows downwards only. Fix the mmap_base() calculation to include the stack-grows-upwards case and thus fix the userspace crashes on parisc. Link: https://lkml.kernel.org/r/ZVH2qeS1bG7/1J/l@p100 Fixes: 3033cd430768 ("parisc: Use generic mmap top-down layout and brk randomization") Signed-off-by: Helge Deller Reported-by: matoro Tested-by: matoro Cc: [6.6+] Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- arch/parisc/Kconfig | 6 +++--- arch/parisc/include/asm/elf.h | 10 +--------- arch/parisc/include/asm/processor.h | 2 ++ arch/parisc/kernel/sys_parisc.c | 2 +- mm/util.c | 10 ++++++++++ 5 files changed, 17 insertions(+), 13 deletions(-) diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index a15ab147af2e..68cbe666510a 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -138,11 +138,11 @@ config ARCH_MMAP_RND_COMPAT_BITS_MIN default 8 config ARCH_MMAP_RND_BITS_MAX - default 24 if 64BIT - default 17 + default 18 if 64BIT + default 13 config ARCH_MMAP_RND_COMPAT_BITS_MAX - default 17 + default 13 # unless you want to implement ACPI on PA-RISC ... ;-) config PM diff --git a/arch/parisc/include/asm/elf.h b/arch/parisc/include/asm/elf.h index 140eaa97bf21..2d73d3c3cd37 100644 --- a/arch/parisc/include/asm/elf.h +++ b/arch/parisc/include/asm/elf.h @@ -349,15 +349,7 @@ struct pt_regs; /* forward declaration... */ #define ELF_HWCAP 0 -/* Masks for stack and mmap randomization */ -#define BRK_RND_MASK (is_32bit_task() ? 0x07ffUL : 0x3ffffUL) -#define MMAP_RND_MASK (is_32bit_task() ? 0x1fffUL : 0x3ffffUL) -#define STACK_RND_MASK MMAP_RND_MASK - -struct mm_struct; -extern unsigned long arch_randomize_brk(struct mm_struct *); -#define arch_randomize_brk arch_randomize_brk - +#define STACK_RND_MASK 0x7ff /* 8MB of VA */ #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 struct linux_binprm; diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h index ff6cbdb6903b..ece4b3046515 100644 --- a/arch/parisc/include/asm/processor.h +++ b/arch/parisc/include/asm/processor.h @@ -47,6 +47,8 @@ #ifndef __ASSEMBLY__ +struct rlimit; +unsigned long mmap_upper_limit(struct rlimit *rlim_stack); unsigned long calc_max_stack_size(unsigned long stack_max); /* diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c index ab896eff7a1d..98af719d5f85 100644 --- a/arch/parisc/kernel/sys_parisc.c +++ b/arch/parisc/kernel/sys_parisc.c @@ -77,7 +77,7 @@ unsigned long calc_max_stack_size(unsigned long stack_max) * indicating that "current" should be used instead of a passed-in * value from the exec bprm as done with arch_pick_mmap_layout(). */ -static unsigned long mmap_upper_limit(struct rlimit *rlim_stack) +unsigned long mmap_upper_limit(struct rlimit *rlim_stack) { unsigned long stack_base; diff --git a/mm/util.c b/mm/util.c index 8cbbfd3a3d59..be798981acc7 100644 --- a/mm/util.c +++ b/mm/util.c @@ -414,6 +414,15 @@ static int mmap_is_legacy(struct rlimit *rlim_stack) static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack) { +#ifdef CONFIG_STACK_GROWSUP + /* + * For an upwards growing stack the calculation is much simpler. + * Memory for the maximum stack size is reserved at the top of the + * task. mmap_base starts directly below the stack and grows + * downwards. + */ + return PAGE_ALIGN_DOWN(mmap_upper_limit(rlim_stack) - rnd); +#else unsigned long gap = rlim_stack->rlim_cur; unsigned long pad = stack_guard_gap; @@ -431,6 +440,7 @@ static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack) gap = MAX_GAP; return PAGE_ALIGN(STACK_TOP - gap - rnd); +#endif } void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) From 429eff0936e39c87f1799a74e7dc5b098301a163 Mon Sep 17 00:00:00 2001 From: Basavaraj Natikar Date: Thu, 19 Oct 2023 13:29:20 +0300 Subject: [PATCH 0411/1397] xhci: Enable RPM on controllers that support low-power states commit a5d6264b638efeca35eff72177fd28d149e0764b upstream. Use the low-power states of the underlying platform to enable runtime PM. If the platform doesn't support runtime D3, then enabling default RPM will result in the controller malfunctioning, as in the case of hotplug devices not being detected because of a failed interrupt generation. Cc: Mario Limonciello Signed-off-by: Basavaraj Natikar Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20231019102924.2797346-16-mathias.nyman@linux.intel.com Cc: Oleksandr Natalenko Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index bde43cef8846..95ed9404f6f8 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -695,7 +695,9 @@ static int xhci_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) /* USB-2 and USB-3 roothubs initialized, allow runtime pm suspend */ pm_runtime_put_noidle(&dev->dev); - if (xhci->quirks & XHCI_DEFAULT_PM_RUNTIME_ALLOW) + if (pci_choose_state(dev, PMSG_SUSPEND) == PCI_D0) + pm_runtime_forbid(&dev->dev); + else if (xhci->quirks & XHCI_DEFAULT_PM_RUNTIME_ALLOW) pm_runtime_allow(&dev->dev); dma_set_max_seg_size(&dev->dev, UINT_MAX); From d612032717662a3a500963a10a7572a1ec1822c9 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 19 Oct 2023 23:01:49 -0500 Subject: [PATCH 0412/1397] smb3: fix creating FIFOs when mounting with "sfu" mount option commit 72bc63f5e23a38b65ff2a201bdc11401d4223fa9 upstream. Fixes some xfstests including generic/564 and generic/157 The "sfu" mount option can be useful for creating special files (character and block devices in particular) but could not create FIFOs. It did recognize existing empty files with the "system" attribute flag as FIFOs but this is too general, so to support creating FIFOs more safely use a new tag (but the same length as those for char and block devices ie "IntxLNK" and "IntxBLK") "LnxFIFO" to indicate that the file should be treated as a FIFO (when mounted with the "sfu"). For some additional context note that "sfu" followed the way that "Services for Unix" on Windows handled these special files (at least for character and block devices and symlinks), which is different than newer Windows which can handle special files as reparse points (which isn't an option to many servers). Cc: stable@vger.kernel.org Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/cifspdu.h | 2 +- fs/smb/client/inode.c | 4 ++++ fs/smb/client/smb2ops.c | 8 +++++++- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/cifspdu.h b/fs/smb/client/cifspdu.h index e17222fec9d2..a75220db5c1e 100644 --- a/fs/smb/client/cifspdu.h +++ b/fs/smb/client/cifspdu.h @@ -2570,7 +2570,7 @@ typedef struct { struct win_dev { - unsigned char type[8]; /* IntxCHR or IntxBLK */ + unsigned char type[8]; /* IntxCHR or IntxBLK or LnxFIFO*/ __le64 major; __le64 minor; } __attribute__((packed)); diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index d7c302442c1e..c03a286ed418 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -592,6 +592,10 @@ cifs_sfu_type(struct cifs_fattr *fattr, const char *path, cifs_dbg(FYI, "Symlink\n"); fattr->cf_mode |= S_IFLNK; fattr->cf_dtype = DT_LNK; + } else if (memcmp("LnxFIFO", pbuf, 8) == 0) { + cifs_dbg(FYI, "FIFO\n"); + fattr->cf_mode |= S_IFIFO; + fattr->cf_dtype = DT_FIFO; } else { fattr->cf_mode |= S_IFREG; /* file? */ fattr->cf_dtype = DT_REG; diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 9aeecee6b91b..4af0085239b7 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -5087,7 +5087,7 @@ smb2_make_node(unsigned int xid, struct inode *inode, * over SMB2/SMB3 and Samba will do this with SMB3.1.1 POSIX Extensions */ - if (!S_ISCHR(mode) && !S_ISBLK(mode)) + if (!S_ISCHR(mode) && !S_ISBLK(mode) && !S_ISFIFO(mode)) return rc; cifs_dbg(FYI, "sfu compat create special file\n"); @@ -5135,6 +5135,12 @@ smb2_make_node(unsigned int xid, struct inode *inode, pdev->minor = cpu_to_le64(MINOR(dev)); rc = tcon->ses->server->ops->sync_write(xid, &fid, &io_parms, &bytes_written, iov, 1); + } else if (S_ISFIFO(mode)) { + memcpy(pdev->type, "LnxFIFO", 8); + pdev->major = 0; + pdev->minor = 0; + rc = tcon->ses->server->ops->sync_write(xid, &fid, &io_parms, + &bytes_written, iov, 1); } tcon->ses->server->ops->close(xid, tcon, &fid); d_drop(dentry); From b8c0124b2340a210227e735ec0d9487e8f5a61b5 Mon Sep 17 00:00:00 2001 From: Steve French Date: Mon, 16 Oct 2023 12:18:23 -0500 Subject: [PATCH 0413/1397] smb3: fix touch -h of symlink commit 475efd9808a3094944a56240b2711349e433fb66 upstream. For example: touch -h -t 02011200 testfile where testfile is a symlink would not change the timestamp, but touch -t 02011200 testfile does work to change the timestamp of the target Suggested-by: David Howells Reported-by: Micah Veilleux Closes: https://bugzilla.samba.org/show_bug.cgi?id=14476 Cc: stable@vger.kernel.org Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/cifsfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 22869cda1356..ea3a7a668b45 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -1191,6 +1191,7 @@ const char *cifs_get_link(struct dentry *dentry, struct inode *inode, const struct inode_operations cifs_symlink_inode_ops = { .get_link = cifs_get_link, + .setattr = cifs_setattr, .permission = cifs_permission, .listxattr = cifs_listxattr, }; From 67062c84922b439071c301afffaf14457fa3569a Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 9 Nov 2023 15:28:12 -0600 Subject: [PATCH 0414/1397] smb3: allow dumping session and tcon id to improve stats analysis and debugging commit de4eceab578ead12a71e5b5588a57e142bbe8ceb upstream. When multiple mounts are to the same share from the same client it was not possible to determine which section of /proc/fs/cifs/Stats (and DebugData) correspond to that mount. In some recent examples this turned out to be a significant problem when trying to analyze performance data - since there are many cases where unless we know the tree id and session id we can't figure out which stats (e.g. number of SMB3.1.1 requests by type, the total time they take, which is slowest, how many fail etc.) apply to which mount. The only existing loosely related ioctl CIFS_IOC_GET_MNT_INFO does not return the information needed to uniquely identify which tcon is which mount although it does return various flags and device info. Add a cifs.ko ioctl CIFS_IOC_GET_TCON_INFO (0x800ccf0c) to return tid, session id, tree connect count. Cc: stable@vger.kernel.org Reviewed-by: Shyam Prasad N Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/cifs_ioctl.h | 6 ++++++ fs/smb/client/ioctl.c | 25 +++++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/fs/smb/client/cifs_ioctl.h b/fs/smb/client/cifs_ioctl.h index 332588e77c31..26327442e383 100644 --- a/fs/smb/client/cifs_ioctl.h +++ b/fs/smb/client/cifs_ioctl.h @@ -26,6 +26,11 @@ struct smb_mnt_fs_info { __u64 cifs_posix_caps; } __packed; +struct smb_mnt_tcon_info { + __u32 tid; + __u64 session_id; +} __packed; + struct smb_snapshot_array { __u32 number_of_snapshots; __u32 number_of_snapshots_returned; @@ -108,6 +113,7 @@ struct smb3_notify_info { #define CIFS_IOC_NOTIFY _IOW(CIFS_IOCTL_MAGIC, 9, struct smb3_notify) #define CIFS_DUMP_FULL_KEY _IOWR(CIFS_IOCTL_MAGIC, 10, struct smb3_full_key_debug_info) #define CIFS_IOC_NOTIFY_INFO _IOWR(CIFS_IOCTL_MAGIC, 11, struct smb3_notify_info) +#define CIFS_IOC_GET_TCON_INFO _IOR(CIFS_IOCTL_MAGIC, 12, struct smb_mnt_tcon_info) #define CIFS_IOC_SHUTDOWN _IOR('X', 125, __u32) /* diff --git a/fs/smb/client/ioctl.c b/fs/smb/client/ioctl.c index f7160003e0ed..73ededa8eba5 100644 --- a/fs/smb/client/ioctl.c +++ b/fs/smb/client/ioctl.c @@ -117,6 +117,20 @@ static long cifs_ioctl_copychunk(unsigned int xid, struct file *dst_file, return rc; } +static long smb_mnt_get_tcon_info(struct cifs_tcon *tcon, void __user *arg) +{ + int rc = 0; + struct smb_mnt_tcon_info tcon_inf; + + tcon_inf.tid = tcon->tid; + tcon_inf.session_id = tcon->ses->Suid; + + if (copy_to_user(arg, &tcon_inf, sizeof(struct smb_mnt_tcon_info))) + rc = -EFAULT; + + return rc; +} + static long smb_mnt_get_fsinfo(unsigned int xid, struct cifs_tcon *tcon, void __user *arg) { @@ -414,6 +428,17 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) tcon = tlink_tcon(pSMBFile->tlink); rc = smb_mnt_get_fsinfo(xid, tcon, (void __user *)arg); break; + case CIFS_IOC_GET_TCON_INFO: + cifs_sb = CIFS_SB(inode->i_sb); + tlink = cifs_sb_tlink(cifs_sb); + if (IS_ERR(tlink)) { + rc = PTR_ERR(tlink); + break; + } + tcon = tlink_tcon(tlink); + rc = smb_mnt_get_tcon_info(tcon, (void __user *)arg); + cifs_put_tlink(tlink); + break; case CIFS_ENUMERATE_SNAPSHOTS: if (pSMBFile == NULL) break; From 318e1c7e61c42a2066f511bf850c943ffb09f73c Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 7 Nov 2023 21:38:13 -0600 Subject: [PATCH 0415/1397] smb3: fix caching of ctime on setxattr commit 5923d6686a100c2b4cabd4c2ca9d5a12579c7614 upstream. Fixes xfstest generic/728 which had been failing due to incorrect ctime after setxattr and removexattr Update ctime on successful set of xattr Cc: stable@vger.kernel.org Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/xattr.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/xattr.c b/fs/smb/client/xattr.c index 4ad5531686d8..c2bf829310be 100644 --- a/fs/smb/client/xattr.c +++ b/fs/smb/client/xattr.c @@ -150,10 +150,13 @@ static int cifs_xattr_set(const struct xattr_handler *handler, if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) goto out; - if (pTcon->ses->server->ops->set_EA) + if (pTcon->ses->server->ops->set_EA) { rc = pTcon->ses->server->ops->set_EA(xid, pTcon, full_path, name, value, (__u16)size, cifs_sb->local_nls, cifs_sb); + if (rc == 0) + inode_set_ctime_current(inode); + } break; case XATTR_CIFS_ACL: From 0ab6f842452ce2cae04209d4671ac6289d0aef8a Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 24 Oct 2023 13:49:15 -0300 Subject: [PATCH 0416/1397] smb: client: fix use-after-free bug in cifs_debug_data_proc_show() commit d328c09ee9f15ee5a26431f5aad7c9239fa85e62 upstream. Skip SMB sessions that are being teared down (e.g. @ses->ses_status == SES_EXITING) in cifs_debug_data_proc_show() to avoid use-after-free in @ses. This fixes the following GPF when reading from /proc/fs/cifs/DebugData while mounting and umounting [ 816.251274] general protection fault, probably for non-canonical address 0x6b6b6b6b6b6b6d81: 0000 [#1] PREEMPT SMP NOPTI ... [ 816.260138] Call Trace: [ 816.260329] [ 816.260499] ? die_addr+0x36/0x90 [ 816.260762] ? exc_general_protection+0x1b3/0x410 [ 816.261126] ? asm_exc_general_protection+0x26/0x30 [ 816.261502] ? cifs_debug_tcon+0xbd/0x240 [cifs] [ 816.261878] ? cifs_debug_tcon+0xab/0x240 [cifs] [ 816.262249] cifs_debug_data_proc_show+0x516/0xdb0 [cifs] [ 816.262689] ? seq_read_iter+0x379/0x470 [ 816.262995] seq_read_iter+0x118/0x470 [ 816.263291] proc_reg_read_iter+0x53/0x90 [ 816.263596] ? srso_alias_return_thunk+0x5/0x7f [ 816.263945] vfs_read+0x201/0x350 [ 816.264211] ksys_read+0x75/0x100 [ 816.264472] do_syscall_64+0x3f/0x90 [ 816.264750] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 [ 816.265135] RIP: 0033:0x7fd5e669d381 Cc: stable@vger.kernel.org Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/cifs_debug.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index 76922fcc4bc6..9a0ccd87468e 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -452,6 +452,11 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) seq_printf(m, "\n\n\tSessions: "); i = 0; list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + spin_lock(&ses->ses_lock); + if (ses->ses_status == SES_EXITING) { + spin_unlock(&ses->ses_lock); + continue; + } i++; if ((ses->serverDomain == NULL) || (ses->serverOS == NULL) || @@ -472,6 +477,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) ses->ses_count, ses->serverOS, ses->serverNOS, ses->capabilities, ses->ses_status); } + spin_unlock(&ses->ses_lock); seq_printf(m, "\n\tSecurity type: %s ", get_security_type_str(server->ops->select_sectype(server, ses->sectype))); From 93877b9afc2994c89362007aac480a7b150f386f Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Mon, 30 Oct 2023 17:19:56 -0300 Subject: [PATCH 0417/1397] smb: client: fix use-after-free in smb2_query_info_compound() commit 5c86919455c1edec99ebd3338ad213b59271a71b upstream. The following UAF was triggered when running fstests generic/072 with KASAN enabled against Windows Server 2022 and mount options 'multichannel,max_channels=2,vers=3.1.1,mfsymlinks,noperm' BUG: KASAN: slab-use-after-free in smb2_query_info_compound+0x423/0x6d0 [cifs] Read of size 8 at addr ffff888014941048 by task xfs_io/27534 CPU: 0 PID: 27534 Comm: xfs_io Not tainted 6.6.0-rc7 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.2-3-gd478f380-rebuilt.opensuse.org 04/01/2014 Call Trace: dump_stack_lvl+0x4a/0x80 print_report+0xcf/0x650 ? srso_alias_return_thunk+0x5/0x7f ? srso_alias_return_thunk+0x5/0x7f ? __phys_addr+0x46/0x90 kasan_report+0xda/0x110 ? smb2_query_info_compound+0x423/0x6d0 [cifs] ? smb2_query_info_compound+0x423/0x6d0 [cifs] smb2_query_info_compound+0x423/0x6d0 [cifs] ? __pfx_smb2_query_info_compound+0x10/0x10 [cifs] ? srso_alias_return_thunk+0x5/0x7f ? __stack_depot_save+0x39/0x480 ? kasan_save_stack+0x33/0x60 ? kasan_set_track+0x25/0x30 ? ____kasan_slab_free+0x126/0x170 smb2_queryfs+0xc2/0x2c0 [cifs] ? __pfx_smb2_queryfs+0x10/0x10 [cifs] ? __pfx___lock_acquire+0x10/0x10 smb311_queryfs+0x210/0x220 [cifs] ? __pfx_smb311_queryfs+0x10/0x10 [cifs] ? srso_alias_return_thunk+0x5/0x7f ? __lock_acquire+0x480/0x26c0 ? lock_release+0x1ed/0x640 ? srso_alias_return_thunk+0x5/0x7f ? do_raw_spin_unlock+0x9b/0x100 cifs_statfs+0x18c/0x4b0 [cifs] statfs_by_dentry+0x9b/0xf0 fd_statfs+0x4e/0xb0 __do_sys_fstatfs+0x7f/0xe0 ? __pfx___do_sys_fstatfs+0x10/0x10 ? srso_alias_return_thunk+0x5/0x7f ? lockdep_hardirqs_on_prepare+0x136/0x200 ? srso_alias_return_thunk+0x5/0x7f do_syscall_64+0x3f/0x90 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 Allocated by task 27534: kasan_save_stack+0x33/0x60 kasan_set_track+0x25/0x30 __kasan_kmalloc+0x8f/0xa0 open_cached_dir+0x71b/0x1240 [cifs] smb2_query_info_compound+0x5c3/0x6d0 [cifs] smb2_queryfs+0xc2/0x2c0 [cifs] smb311_queryfs+0x210/0x220 [cifs] cifs_statfs+0x18c/0x4b0 [cifs] statfs_by_dentry+0x9b/0xf0 fd_statfs+0x4e/0xb0 __do_sys_fstatfs+0x7f/0xe0 do_syscall_64+0x3f/0x90 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 Freed by task 27534: kasan_save_stack+0x33/0x60 kasan_set_track+0x25/0x30 kasan_save_free_info+0x2b/0x50 ____kasan_slab_free+0x126/0x170 slab_free_freelist_hook+0xd0/0x1e0 __kmem_cache_free+0x9d/0x1b0 open_cached_dir+0xff5/0x1240 [cifs] smb2_query_info_compound+0x5c3/0x6d0 [cifs] smb2_queryfs+0xc2/0x2c0 [cifs] This is a race between open_cached_dir() and cached_dir_lease_break() where the cache entry for the open directory handle receives a lease break while creating it. And before returning from open_cached_dir(), we put the last reference of the new @cfid because of !@cfid->has_lease. Besides the UAF, while running xfstests a lot of missed lease breaks have been noticed in tests that run several concurrent statfs(2) calls on those cached fids CIFS: VFS: \\w22-root1.gandalf.test No task to wake, unknown frame... CIFS: VFS: \\w22-root1.gandalf.test Cmd: 18 Err: 0x0 Flags: 0x1... CIFS: VFS: \\w22-root1.gandalf.test smb buf 00000000715bfe83 len 108 CIFS: VFS: Dump pending requests: CIFS: VFS: \\w22-root1.gandalf.test No task to wake, unknown frame... CIFS: VFS: \\w22-root1.gandalf.test Cmd: 18 Err: 0x0 Flags: 0x1... CIFS: VFS: \\w22-root1.gandalf.test smb buf 000000005aa7316e len 108 ... To fix both, in open_cached_dir() ensure that @cfid->has_lease is set right before sending out compounded request so that any potential lease break will be get processed by demultiplex thread while we're still caching @cfid. And, if open failed for some reason, re-check @cfid->has_lease to decide whether or not put lease reference. Cc: stable@vger.kernel.org Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/cached_dir.c | 84 ++++++++++++++++++++++---------------- 1 file changed, 49 insertions(+), 35 deletions(-) diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index fe1bf5b6e0cb..59f6b8e32cc9 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -32,7 +32,7 @@ static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids, * fully cached or it may be in the process of * being deleted due to a lease break. */ - if (!cfid->has_lease) { + if (!cfid->time || !cfid->has_lease) { spin_unlock(&cfids->cfid_list_lock); return NULL; } @@ -193,10 +193,20 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, npath = path_no_prefix(cifs_sb, path); if (IS_ERR(npath)) { rc = PTR_ERR(npath); - kfree(utf16_path); - return rc; + goto out; } + if (!npath[0]) { + dentry = dget(cifs_sb->root); + } else { + dentry = path_to_dentry(cifs_sb, npath); + if (IS_ERR(dentry)) { + rc = -ENOENT; + goto out; + } + } + cfid->dentry = dentry; + /* * We do not hold the lock for the open because in case * SMB2_open needs to reconnect. @@ -249,6 +259,15 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, smb2_set_related(&rqst[1]); + /* + * Set @cfid->has_lease to true before sending out compounded request so + * its lease reference can be put in cached_dir_lease_break() due to a + * potential lease break right after the request is sent or while @cfid + * is still being cached. Concurrent processes won't be to use it yet + * due to @cfid->time being zero. + */ + cfid->has_lease = true; + rc = compound_send_recv(xid, ses, server, flags, 2, rqst, resp_buftype, rsp_iov); @@ -263,6 +282,8 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, cfid->tcon = tcon; cfid->is_open = true; + spin_lock(&cfids->cfid_list_lock); + o_rsp = (struct smb2_create_rsp *)rsp_iov[0].iov_base; oparms.fid->persistent_fid = o_rsp->PersistentFileId; oparms.fid->volatile_fid = o_rsp->VolatileFileId; @@ -270,18 +291,25 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, oparms.fid->mid = le64_to_cpu(o_rsp->hdr.MessageId); #endif /* CIFS_DEBUG2 */ - if (o_rsp->OplockLevel != SMB2_OPLOCK_LEVEL_LEASE) + rc = -EINVAL; + if (o_rsp->OplockLevel != SMB2_OPLOCK_LEVEL_LEASE) { + spin_unlock(&cfids->cfid_list_lock); goto oshr_free; + } smb2_parse_contexts(server, o_rsp, &oparms.fid->epoch, oparms.fid->lease_key, &oplock, NULL, NULL); - if (!(oplock & SMB2_LEASE_READ_CACHING_HE)) + if (!(oplock & SMB2_LEASE_READ_CACHING_HE)) { + spin_unlock(&cfids->cfid_list_lock); goto oshr_free; + } qi_rsp = (struct smb2_query_info_rsp *)rsp_iov[1].iov_base; - if (le32_to_cpu(qi_rsp->OutputBufferLength) < sizeof(struct smb2_file_all_info)) + if (le32_to_cpu(qi_rsp->OutputBufferLength) < sizeof(struct smb2_file_all_info)) { + spin_unlock(&cfids->cfid_list_lock); goto oshr_free; + } if (!smb2_validate_and_copy_iov( le16_to_cpu(qi_rsp->OutputBufferOffset), sizeof(struct smb2_file_all_info), @@ -289,37 +317,24 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, (char *)&cfid->file_all_info)) cfid->file_all_info_is_valid = true; - if (!npath[0]) - dentry = dget(cifs_sb->root); - else { - dentry = path_to_dentry(cifs_sb, npath); - if (IS_ERR(dentry)) { - rc = -ENOENT; - goto oshr_free; - } - } - spin_lock(&cfids->cfid_list_lock); - cfid->dentry = dentry; cfid->time = jiffies; - cfid->has_lease = true; spin_unlock(&cfids->cfid_list_lock); + /* At this point the directory handle is fully cached */ + rc = 0; oshr_free: - kfree(utf16_path); SMB2_open_free(&rqst[0]); SMB2_query_info_free(&rqst[1]); free_rsp_buf(resp_buftype[0], rsp_iov[0].iov_base); free_rsp_buf(resp_buftype[1], rsp_iov[1].iov_base); - spin_lock(&cfids->cfid_list_lock); - if (!cfid->has_lease) { - if (rc) { - if (cfid->on_list) { - list_del(&cfid->entry); - cfid->on_list = false; - cfids->num_entries--; - } - rc = -ENOENT; - } else { + if (rc) { + spin_lock(&cfids->cfid_list_lock); + if (cfid->on_list) { + list_del(&cfid->entry); + cfid->on_list = false; + cfids->num_entries--; + } + if (cfid->has_lease) { /* * We are guaranteed to have two references at this * point. One for the caller and one for a potential @@ -327,25 +342,24 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, * will be closed when the caller closes the cached * handle. */ + cfid->has_lease = false; spin_unlock(&cfids->cfid_list_lock); kref_put(&cfid->refcount, smb2_close_cached_fid); goto out; } + spin_unlock(&cfids->cfid_list_lock); } - spin_unlock(&cfids->cfid_list_lock); +out: if (rc) { if (cfid->is_open) SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid, cfid->fid.volatile_fid); free_cached_dir(cfid); - cfid = NULL; - } -out: - if (rc == 0) { + } else { *ret_cfid = cfid; atomic_inc(&tcon->num_remote_opens); } - + kfree(utf16_path); return rc; } From c1a5962f1462b64fe7b69f20a4b6af8067bc2d26 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Wed, 25 Oct 2023 14:58:35 -0300 Subject: [PATCH 0418/1397] smb: client: fix potential deadlock when releasing mids commit e6322fd177c6885a21dd4609dc5e5c973d1a2eb7 upstream. All release_mid() callers seem to hold a reference of @mid so there is no need to call kref_put(&mid->refcount, __release_mid) under @server->mid_lock spinlock. If they don't, then an use-after-free bug would have occurred anyways. By getting rid of such spinlock also fixes a potential deadlock as shown below CPU 0 CPU 1 ------------------------------------------------------------------ cifs_demultiplex_thread() cifs_debug_data_proc_show() release_mid() spin_lock(&server->mid_lock); spin_lock(&cifs_tcp_ses_lock) spin_lock(&server->mid_lock) __release_mid() smb2_find_smb_tcon() spin_lock(&cifs_tcp_ses_lock) *deadlock* Cc: stable@vger.kernel.org Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/cifsproto.h | 7 ++++++- fs/smb/client/smb2misc.c | 2 +- fs/smb/client/transport.c | 11 +---------- 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index 0c37eefa18a5..890ceddae07e 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -81,7 +81,7 @@ extern char *cifs_build_path_to_root(struct smb3_fs_context *ctx, extern char *build_wildcard_path_from_dentry(struct dentry *direntry); char *cifs_build_devname(char *nodename, const char *prepath); extern void delete_mid(struct mid_q_entry *mid); -extern void release_mid(struct mid_q_entry *mid); +void __release_mid(struct kref *refcount); extern void cifs_wake_up_task(struct mid_q_entry *mid); extern int cifs_handle_standard(struct TCP_Server_Info *server, struct mid_q_entry *mid); @@ -740,4 +740,9 @@ static inline bool dfs_src_pathname_equal(const char *s1, const char *s2) return true; } +static inline void release_mid(struct mid_q_entry *mid) +{ + kref_put(&mid->refcount, __release_mid); +} + #endif /* _CIFSPROTO_H */ diff --git a/fs/smb/client/smb2misc.c b/fs/smb/client/smb2misc.c index 25f7cd6f23d6..32dfa0f7a78c 100644 --- a/fs/smb/client/smb2misc.c +++ b/fs/smb/client/smb2misc.c @@ -787,7 +787,7 @@ __smb2_handle_cancelled_cmd(struct cifs_tcon *tcon, __u16 cmd, __u64 mid, { struct close_cancelled_open *cancelled; - cancelled = kzalloc(sizeof(*cancelled), GFP_ATOMIC); + cancelled = kzalloc(sizeof(*cancelled), GFP_KERNEL); if (!cancelled) return -ENOMEM; diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index 14710afdc2a3..d553b7a54621 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -76,7 +76,7 @@ alloc_mid(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server) return temp; } -static void __release_mid(struct kref *refcount) +void __release_mid(struct kref *refcount) { struct mid_q_entry *midEntry = container_of(refcount, struct mid_q_entry, refcount); @@ -156,15 +156,6 @@ static void __release_mid(struct kref *refcount) mempool_free(midEntry, cifs_mid_poolp); } -void release_mid(struct mid_q_entry *mid) -{ - struct TCP_Server_Info *server = mid->server; - - spin_lock(&server->mid_lock); - kref_put(&mid->refcount, __release_mid); - spin_unlock(&server->mid_lock); -} - void delete_mid(struct mid_q_entry *mid) { From 390c08fd3eeb3ebd0e9576de8b6eec8eae2d595d Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Thu, 9 Nov 2023 12:01:48 -0300 Subject: [PATCH 0419/1397] smb: client: fix mount when dns_resolver key is not available commit 5e2fd17f434d2fed78efb123e2fc6711e4f598f1 upstream. There was a wrong assumption that with CONFIG_CIFS_DFS_UPCALL=y there would always be a dns_resolver key set up so we could unconditionally upcall to resolve UNC hostname rather than using the value provided by mount(2). Only require it when performing automount of junctions within a DFS share so users that don't have dns_resolver key still can mount their regular shares with server hostname resolved by mount.cifs(8). Fixes: 348a04a8d113 ("smb: client: get rid of dfs code dep in namespace.c") Cc: stable@vger.kernel.org Tested-by: Eduard Bachmakov Reported-by: Eduard Bachmakov Closes: https://lore.kernel.org/all/CADCRUiNvZuiUZ0VGZZO9HRyPyw6x92kiA7o7Q4tsX5FkZqUkKg@mail.gmail.com/ Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/dfs.c | 18 +++++++++++++----- fs/smb/client/fs_context.h | 1 + fs/smb/client/namespace.c | 17 +++++++++++++++-- 3 files changed, 29 insertions(+), 7 deletions(-) diff --git a/fs/smb/client/dfs.c b/fs/smb/client/dfs.c index 81b84151450d..a8a1d386da65 100644 --- a/fs/smb/client/dfs.c +++ b/fs/smb/client/dfs.c @@ -263,15 +263,23 @@ static int __dfs_mount_share(struct cifs_mount_ctx *mnt_ctx) return rc; } -/* Resolve UNC hostname in @ctx->source and set ip addr in @ctx->dstaddr */ +/* + * If @ctx->dfs_automount, then update @ctx->dstaddr earlier with the DFS root + * server from where we'll start following any referrals. Otherwise rely on the + * value provided by mount(2) as the user might not have dns_resolver key set up + * and therefore failing to upcall to resolve UNC hostname under @ctx->source. + */ static int update_fs_context_dstaddr(struct smb3_fs_context *ctx) { struct sockaddr *addr = (struct sockaddr *)&ctx->dstaddr; - int rc; + int rc = 0; - rc = dns_resolve_server_name_to_ip(ctx->source, addr, NULL); - if (!rc) - cifs_set_port(addr, ctx->port); + if (!ctx->nodfs && ctx->dfs_automount) { + rc = dns_resolve_server_name_to_ip(ctx->source, addr, NULL); + if (!rc) + cifs_set_port(addr, ctx->port); + ctx->dfs_automount = false; + } return rc; } diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h index 9d8d34af0211..cf46916286d0 100644 --- a/fs/smb/client/fs_context.h +++ b/fs/smb/client/fs_context.h @@ -268,6 +268,7 @@ struct smb3_fs_context { bool witness:1; /* use witness protocol */ char *leaf_fullpath; struct cifs_ses *dfs_root_ses; + bool dfs_automount:1; /* set for dfs automount only */ }; extern const struct fs_parameter_spec smb3_fs_parameters[]; diff --git a/fs/smb/client/namespace.c b/fs/smb/client/namespace.c index c8f5ed8a69f1..a6968573b775 100644 --- a/fs/smb/client/namespace.c +++ b/fs/smb/client/namespace.c @@ -117,6 +117,18 @@ cifs_build_devname(char *nodename, const char *prepath) return dev; } +static bool is_dfs_mount(struct dentry *dentry) +{ + struct cifs_sb_info *cifs_sb = CIFS_SB(dentry->d_sb); + struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); + bool ret; + + spin_lock(&tcon->tc_lock); + ret = !!tcon->origin_fullpath; + spin_unlock(&tcon->tc_lock); + return ret; +} + /* Return full path out of a dentry set for automount */ static char *automount_fullpath(struct dentry *dentry, void *page) { @@ -212,8 +224,9 @@ static struct vfsmount *cifs_do_automount(struct path *path) ctx->source = NULL; goto out; } - cifs_dbg(FYI, "%s: ctx: source=%s UNC=%s prepath=%s\n", - __func__, ctx->source, ctx->UNC, ctx->prepath); + ctx->dfs_automount = is_dfs_mount(mntpt); + cifs_dbg(FYI, "%s: ctx: source=%s UNC=%s prepath=%s dfs_automount=%d\n", + __func__, ctx->source, ctx->UNC, ctx->prepath, ctx->dfs_automount); mnt = fc_mount(fc); out: From e42c5730c14a92ab597f24467b17cc10cf31979e Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Mon, 30 Oct 2023 11:00:09 +0000 Subject: [PATCH 0420/1397] cifs: reconnect helper should set reconnect for the right channel commit c3326a61cdbf3ce1273d9198b6cbf90965d7e029 upstream. We introduced a helper function to be used by non-cifsd threads to mark the connection for reconnect. For multichannel, when only a particular channel needs to be reconnected, this had a bug. This change fixes that by marking that particular channel for reconnect. Fixes: dca65818c80c ("cifs: use a different reconnect helper for non-cifsd threads") Cc: stable@vger.kernel.org Signed-off-by: Shyam Prasad N Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/connect.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 7b923e36501b..a549cae607f6 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -156,13 +156,14 @@ cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server, /* If server is a channel, select the primary channel */ pserver = SERVER_IS_CHAN(server) ? server->primary_server : server; - spin_lock(&pserver->srv_lock); + /* if we need to signal just this channel */ if (!all_channels) { - pserver->tcpStatus = CifsNeedReconnect; - spin_unlock(&pserver->srv_lock); + spin_lock(&server->srv_lock); + if (server->tcpStatus != CifsExiting) + server->tcpStatus = CifsNeedReconnect; + spin_unlock(&server->srv_lock); return; } - spin_unlock(&pserver->srv_lock); spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { From aabf4851d1605523be8f2518e6a549f3f080e198 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Mon, 30 Oct 2023 11:00:11 +0000 Subject: [PATCH 0421/1397] cifs: force interface update before a fresh session setup commit d9a6d78096056a3cb5c5f07a730ab92f2f9ac4e6 upstream. During a session reconnect, it is possible that the server moved to another physical server (happens in case of Azure files). So at this time, force a query of server interfaces again (in case of multichannel session), such that the secondary channels connect to the right IP addresses (possibly updated now). Cc: stable@vger.kernel.org Signed-off-by: Shyam Prasad N Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/connect.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index a549cae607f6..dd24b0140252 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -3850,8 +3850,12 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses); spin_unlock(&ses->chan_lock); - if (!is_binding) + if (!is_binding) { ses->ses_status = SES_IN_SETUP; + + /* force iface_list refresh */ + ses->iface_last_update = 0; + } spin_unlock(&ses->ses_lock); /* update ses ip_addr only for primary chan */ From 328004e6df5a9a8692e4ee04b3330b6b8931dba2 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Mon, 30 Oct 2023 11:00:10 +0000 Subject: [PATCH 0422/1397] cifs: do not reset chan_max if multichannel is not supported at mount commit 6e5e64c9477d58e73cb1a0e83eacad1f8df247cf upstream. If the mount command has specified multichannel as a mount option, but multichannel is found to be unsupported by the server at the time of mount, we set chan_max to 1. Which means that the user needs to remount the share if the server starts supporting multichannel. This change removes this reset. What it means is that if the user specified multichannel or max_channels during mount, and at this time, multichannel is not supported, but the server starts supporting it at a later point, the client will be capable of scaling out the number of channels. Cc: stable@vger.kernel.org Signed-off-by: Shyam Prasad N Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/sess.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index 79f26c560edf..c899b05c92f7 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -186,7 +186,6 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) } if (!(server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) { - ses->chan_max = 1; spin_unlock(&ses->chan_lock); cifs_server_dbg(VFS, "no multichannel support\n"); return 0; From 739bf98ce9deebbd12000cba81d2b48ee1d009de Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Mon, 6 Nov 2023 16:22:11 +0000 Subject: [PATCH 0423/1397] cifs: do not pass cifs_sb when trying to add channels commit 9599d59eb8fc0c0fd9480c4f22901533d08965ee upstream. The only reason why cifs_sb gets passed today to cifs_try_adding_channels is to pass the local_nls field for the new channels and binding session. However, the ses struct already has local_nls field that is setup during the first cifs_setup_session. So there is no need to pass cifs_sb. This change removes cifs_sb from the arg list for this and the functions that it calls and uses ses->local_nls instead. Cc: stable@vger.kernel.org Signed-off-by: Shyam Prasad N Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/cifsproto.h | 2 +- fs/smb/client/connect.c | 2 +- fs/smb/client/sess.c | 12 ++++++------ 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index 890ceddae07e..8e53abcfc5ec 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -610,7 +610,7 @@ void cifs_free_hash(struct shash_desc **sdesc); struct cifs_chan * cifs_ses_find_chan(struct cifs_ses *ses, struct TCP_Server_Info *server); -int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses); +int cifs_try_adding_channels(struct cifs_ses *ses); bool is_server_using_iface(struct TCP_Server_Info *server, struct cifs_server_iface *iface); bool is_ses_using_iface(struct cifs_ses *ses, struct cifs_server_iface *iface); diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index dd24b0140252..a9632c060bce 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -3561,7 +3561,7 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) ctx->prepath = NULL; out: - cifs_try_adding_channels(cifs_sb, mnt_ctx.ses); + cifs_try_adding_channels(mnt_ctx.ses); rc = mount_setup_tlink(cifs_sb, mnt_ctx.ses, mnt_ctx.tcon); if (rc) goto error; diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index c899b05c92f7..61cc7c415491 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -24,7 +24,7 @@ #include "fs_context.h" static int -cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses, +cifs_ses_add_channel(struct cifs_ses *ses, struct cifs_server_iface *iface); bool @@ -157,7 +157,7 @@ cifs_chan_is_iface_active(struct cifs_ses *ses, } /* returns number of channels added */ -int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) +int cifs_try_adding_channels(struct cifs_ses *ses) { struct TCP_Server_Info *server = ses->server; int old_chan_count, new_chan_count; @@ -230,7 +230,7 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) kref_get(&iface->refcount); spin_unlock(&ses->iface_lock); - rc = cifs_ses_add_channel(cifs_sb, ses, iface); + rc = cifs_ses_add_channel(ses, iface); spin_lock(&ses->iface_lock); if (rc) { @@ -354,7 +354,7 @@ cifs_ses_find_chan(struct cifs_ses *ses, struct TCP_Server_Info *server) } static int -cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses, +cifs_ses_add_channel(struct cifs_ses *ses, struct cifs_server_iface *iface) { struct TCP_Server_Info *chan_server; @@ -433,7 +433,7 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses, * This will be used for encoding/decoding user/domain/pw * during sess setup auth. */ - ctx->local_nls = cifs_sb->local_nls; + ctx->local_nls = ses->local_nls; /* Use RDMA if possible */ ctx->rdma = iface->rdma_capable; @@ -479,7 +479,7 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses, rc = cifs_negotiate_protocol(xid, ses, chan->server); if (!rc) - rc = cifs_setup_session(xid, ses, chan->server, cifs_sb->local_nls); + rc = cifs_setup_session(xid, ses, chan->server, ses->local_nls); mutex_unlock(&ses->session_mutex); From 0c00e422bf3dea146cd02e7f4bea951a66f4646e Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 6 Nov 2023 14:40:11 +0000 Subject: [PATCH 0424/1397] cifs: Fix encryption of cleared, but unset rq_iter data buffers commit 37de5a80e932f828c34abeaae63170d73930dca3 upstream. Each smb_rqst struct contains two things: an array of kvecs (rq_iov) that contains the protocol data for an RPC op and an iterator (rq_iter) that contains the data payload of an RPC op. When an smb_rqst is allocated rq_iter is it always cleared, but we don't set it up unless we're going to use it. The functions that determines the size of the ciphertext buffer that will be needed to encrypt a request, cifs_get_num_sgs(), assumes that rq_iter is always initialised - and employs user_backed_iter() to check that the iterator isn't user-backed. This used to incidentally work, because ->user_backed was set to false because the iterator has never been initialised, but with commit f1b4cb650b9a0eeba206d8f069fcdc532bfbcd74[1] which changes user_backed_iter() to determine this based on the iterator type insted, a warning is now emitted: WARNING: CPU: 7 PID: 4584 at fs/smb/client/cifsglob.h:2165 smb2_get_aead_req+0x3fc/0x420 [cifs] ... RIP: 0010:smb2_get_aead_req+0x3fc/0x420 [cifs] ... crypt_message+0x33e/0x550 [cifs] smb3_init_transform_rq+0x27d/0x3f0 [cifs] smb_send_rqst+0xc7/0x160 [cifs] compound_send_recv+0x3ca/0x9f0 [cifs] cifs_send_recv+0x25/0x30 [cifs] SMB2_tcon+0x38a/0x820 [cifs] cifs_get_smb_ses+0x69c/0xee0 [cifs] cifs_mount_get_session+0x76/0x1d0 [cifs] dfs_mount_share+0x74/0x9d0 [cifs] cifs_mount+0x6e/0x2e0 [cifs] cifs_smb3_do_mount+0x143/0x300 [cifs] smb3_get_tree+0x15e/0x290 [cifs] vfs_get_tree+0x2d/0xe0 do_new_mount+0x124/0x340 __se_sys_mount+0x143/0x1a0 The problem is that rq_iter was never set, so the type is 0 (ie. ITER_UBUF) which causes user_backed_iter() to return true. The code doesn't malfunction because it checks the size of the iterator - which is 0. Fix cifs_get_num_sgs() to ignore rq_iter if its count is 0, thereby bypassing the warnings. It might be better to explicitly initialise rq_iter to a zero-length ITER_BVEC, say, as it can always be reinitialised later. Fixes: d08089f649a0 ("cifs: Change the I/O paths to use an iterator rather than a page list") Reported-by: Damian Tometzki Closes: https://lore.kernel.org/r/ZUfQo47uo0p2ZsYg@fedora.fritz.box/ Tested-by: Damian Tometzki Cc: stable@vger.kernel.org cc: Eric Biggers cc: linux-cifs@vger.kernel.org cc: linux-fsdevel@vger.kernel.org Link: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=f1b4cb650b9a0eeba206d8f069fcdc532bfbcd74 [1] Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: David Howells Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/cifsglob.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 02082621d8e0..e55f49e278a2 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -2143,6 +2143,7 @@ static inline int cifs_get_num_sgs(const struct smb_rqst *rqst, unsigned int len, skip; unsigned int nents = 0; unsigned long addr; + size_t data_size; int i, j; /* @@ -2158,17 +2159,21 @@ static inline int cifs_get_num_sgs(const struct smb_rqst *rqst, * rqst[1+].rq_iov[0+] data to be encrypted/decrypted */ for (i = 0; i < num_rqst; i++) { + data_size = iov_iter_count(&rqst[i].rq_iter); + /* We really don't want a mixture of pinned and unpinned pages * in the sglist. It's hard to keep track of which is what. * Instead, we convert to a BVEC-type iterator higher up. */ - if (WARN_ON_ONCE(user_backed_iter(&rqst[i].rq_iter))) + if (data_size && + WARN_ON_ONCE(user_backed_iter(&rqst[i].rq_iter))) return -EIO; /* We also don't want to have any extra refs or pins to clean * up in the sglist. */ - if (WARN_ON_ONCE(iov_iter_extract_will_pin(&rqst[i].rq_iter))) + if (data_size && + WARN_ON_ONCE(iov_iter_extract_will_pin(&rqst[i].rq_iter))) return -EIO; for (j = 0; j < rqst[i].rq_nvec; j++) { @@ -2184,7 +2189,8 @@ static inline int cifs_get_num_sgs(const struct smb_rqst *rqst, } skip = 0; } - nents += iov_iter_npages(&rqst[i].rq_iter, INT_MAX); + if (data_size) + nents += iov_iter_npages(&rqst[i].rq_iter, INT_MAX); } nents += DIV_ROUND_UP(offset_in_page(sig) + SMB2_SIGNATURE_SIZE, PAGE_SIZE); return nents; From 024fe6a4ca845bcdc7d0a71ce9d2ced97ff93b97 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 10 Nov 2023 15:33:14 +1100 Subject: [PATCH 0425/1397] xfs: recovery should not clear di_flushiter unconditionally commit 7930d9e103700cde15833638855b750715c12091 upstream. Because on v3 inodes, di_flushiter doesn't exist. It overlaps with zero padding in the inode, except when NREXT64=1 configurations are in use and the zero padding is no longer padding but holds the 64 bit extent counter. This manifests obviously on big endian platforms (e.g. s390) because the log dinode is in host order and the overlap is the LSBs of the extent count field. It is not noticed on little endian machines because the overlap is at the MSB end of the extent count field and we need to get more than 2^^48 extents in the inode before it manifests. i.e. the heat death of the universe will occur before we see the problem in little endian machines. This is a zero-day issue for NREXT64=1 configuraitons on big endian machines. Fix it by only clearing di_flushiter on v2 inodes during recovery. Fixes: 9b7d16e34bbe ("xfs: Introduce XFS_DIFLAG2_NREXT64 and associated helpers") cc: stable@kernel.org # 5.19+ Signed-off-by: Dave Chinner Reviewed-by: "Darrick J. Wong" Signed-off-by: Chandan Babu R Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_inode_item_recover.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c index 0e5dba2343ea..e6609067ef26 100644 --- a/fs/xfs/xfs_inode_item_recover.c +++ b/fs/xfs/xfs_inode_item_recover.c @@ -369,24 +369,26 @@ xlog_recover_inode_commit_pass2( * superblock flag to determine whether we need to look at di_flushiter * to skip replay when the on disk inode is newer than the log one */ - if (!xfs_has_v3inodes(mp) && - ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) { - /* - * Deal with the wrap case, DI_MAX_FLUSH is less - * than smaller numbers - */ - if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH && - ldip->di_flushiter < (DI_MAX_FLUSH >> 1)) { - /* do nothing */ - } else { - trace_xfs_log_recover_inode_skip(log, in_f); - error = 0; - goto out_release; + if (!xfs_has_v3inodes(mp)) { + if (ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) { + /* + * Deal with the wrap case, DI_MAX_FLUSH is less + * than smaller numbers + */ + if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH && + ldip->di_flushiter < (DI_MAX_FLUSH >> 1)) { + /* do nothing */ + } else { + trace_xfs_log_recover_inode_skip(log, in_f); + error = 0; + goto out_release; + } } + + /* Take the opportunity to reset the flush iteration count */ + ldip->di_flushiter = 0; } - /* Take the opportunity to reset the flush iteration count */ - ldip->di_flushiter = 0; if (unlikely(S_ISREG(ldip->di_mode))) { if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) && From 23c73538752b0c30ad54e3f35495a2c7452a4f3d Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 17 Oct 2023 17:00:31 +0900 Subject: [PATCH 0426/1397] btrfs: zoned: wait for data BG to be finished on direct IO allocation commit 776a838f1fa95670c1c1cf7109a898090b473fa3 upstream. Running the fio command below on a ZNS device results in "Resource temporarily unavailable" error. $ sudo fio --name=w --directory=/mnt --filesize=1GB --bs=16MB --numjobs=16 \ --rw=write --ioengine=libaio --iodepth=128 --direct=1 fio: io_u error on file /mnt/w.2.0: Resource temporarily unavailable: write offset=117440512, buflen=16777216 fio: io_u error on file /mnt/w.2.0: Resource temporarily unavailable: write offset=134217728, buflen=16777216 ... This happens because -EAGAIN error returned from btrfs_reserve_extent() called from btrfs_new_extent_direct() is spilling over to the userland. btrfs_reserve_extent() returns -EAGAIN when there is no active zone available. Then, the caller should wait for some other on-going IO to finish a zone and retry the allocation. This logic is already implemented for buffered write in cow_file_range(), but it is missing for the direct IO counterpart. Implement the same logic for it. Reported-by: Shinichiro Kawasaki Fixes: 2ce543f47843 ("btrfs: zoned: wait until zone is finished when allocation didn't progress") CC: stable@vger.kernel.org # 6.1+ Tested-by: Shinichiro Kawasaki Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2c61f9da4ab4..c92c589b454d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6974,8 +6974,15 @@ static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode, int ret; alloc_hint = get_extent_allocation_hint(inode, start, len); +again: ret = btrfs_reserve_extent(root, len, len, fs_info->sectorsize, 0, alloc_hint, &ins, 1, 1); + if (ret == -EAGAIN) { + ASSERT(btrfs_is_zoned(fs_info)); + wait_on_bit_io(&inode->root->fs_info->flags, BTRFS_FS_NEED_ZONE_FINISH, + TASK_UNINTERRUPTIBLE); + goto again; + } if (ret) return ERR_PTR(ret); From 2dac108b9dd7fa08450750dec10ba5db429c8817 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 9 Nov 2023 15:19:54 +0100 Subject: [PATCH 0427/1397] ALSA: info: Fix potential deadlock at disconnection commit c7a60651953359f98dbf24b43e1bf561e1573ed4 upstream. As reported recently, ALSA core info helper may cause a deadlock at the forced device disconnection during the procfs operation. The proc_remove() (that is called from the snd_card_disconnect() helper) has a synchronization of the pending procfs accesses via wait_for_completion(). Meanwhile, ALSA procfs helper takes the global mutex_lock(&info_mutex) at both the proc_open callback and snd_card_info_disconnect() helper. Since the proc_open can't finish due to the mutex lock, wait_for_completion() never returns, either, hence it deadlocks. TASK#1 TASK#2 proc_reg_open() takes use_pde() snd_info_text_entry_open() snd_card_disconnect() snd_info_card_disconnect() takes mutex_lock(&info_mutex) proc_remove() wait_for_completion(unused_pde) ... waiting task#1 closes mutex_lock(&info_mutex) => DEADLOCK This patch is a workaround for avoiding the deadlock scenario above. The basic strategy is to move proc_remove() call outside the mutex lock. proc_remove() can work gracefully without extra locking, and it can delete the tree recursively alone. So, we call proc_remove() at snd_info_card_disconnection() at first, then delete the rest resources recursively within the info_mutex lock. After the change, the function snd_info_disconnect() doesn't do disconnection by itself any longer, but it merely clears the procfs pointer. So rename the function to snd_info_clear_entries() for avoiding confusion. The similar change is applied to snd_info_free_entry(), too. Since the proc_remove() is called only conditionally with the non-NULL entry->p, it's skipped after the snd_info_clear_entries() call. Reported-by: Shinhyung Kang Closes: https://lore.kernel.org/r/664457955.21699345385931.JavaMail.epsvc@epcpadp4 Reviewed-by: Jaroslav Kysela Cc: Link: https://lore.kernel.org/r/20231109141954.4283-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/info.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/sound/core/info.c b/sound/core/info.c index 0b2f04dcb589..e2f302e55bbb 100644 --- a/sound/core/info.c +++ b/sound/core/info.c @@ -56,7 +56,7 @@ struct snd_info_private_data { }; static int snd_info_version_init(void); -static void snd_info_disconnect(struct snd_info_entry *entry); +static void snd_info_clear_entries(struct snd_info_entry *entry); /* @@ -569,11 +569,16 @@ void snd_info_card_disconnect(struct snd_card *card) { if (!card) return; - mutex_lock(&info_mutex); + proc_remove(card->proc_root_link); - card->proc_root_link = NULL; if (card->proc_root) - snd_info_disconnect(card->proc_root); + proc_remove(card->proc_root->p); + + mutex_lock(&info_mutex); + if (card->proc_root) + snd_info_clear_entries(card->proc_root); + card->proc_root_link = NULL; + card->proc_root = NULL; mutex_unlock(&info_mutex); } @@ -745,15 +750,14 @@ struct snd_info_entry *snd_info_create_card_entry(struct snd_card *card, } EXPORT_SYMBOL(snd_info_create_card_entry); -static void snd_info_disconnect(struct snd_info_entry *entry) +static void snd_info_clear_entries(struct snd_info_entry *entry) { struct snd_info_entry *p; if (!entry->p) return; list_for_each_entry(p, &entry->children, list) - snd_info_disconnect(p); - proc_remove(entry->p); + snd_info_clear_entries(p); entry->p = NULL; } @@ -770,8 +774,9 @@ void snd_info_free_entry(struct snd_info_entry * entry) if (!entry) return; if (entry->p) { + proc_remove(entry->p); mutex_lock(&info_mutex); - snd_info_disconnect(entry); + snd_info_clear_entries(entry); mutex_unlock(&info_mutex); } From a2650aec825fa61a4532a9d621fa3da0ccc8d89c Mon Sep 17 00:00:00 2001 From: Eymen Yigit Date: Fri, 10 Nov 2023 18:07:15 +0300 Subject: [PATCH 0428/1397] ALSA: hda/realtek: Enable Mute LED on HP 255 G8 commit 8384c0baf223e1c3bc7b1c711d80a4c6106d210e upstream. This HP Notebook uses ALC236 codec with COEF 0x07 idx 1 controlling the mute LED. Enable already existing quirk for this device. Signed-off-by: Eymen Yigit Cc: Luka Guzenko Cc: Link: https://lore.kernel.org/r/20231110150715.5141-1-eymenyg01@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 76639738ae31..ed306feac611 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9746,6 +9746,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8898, "HP EliteBook 845 G8 Notebook PC", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x103c, 0x88d0, "HP Pavilion 15-eh1xxx (mainboard 88D0)", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8902, "HP OMEN 16", ALC285_FIXUP_HP_MUTE_LED), + SND_PCI_QUIRK(0x103c, 0x890e, "HP 255 G8 Notebook PC", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x8919, "HP Pavilion Aero Laptop 13-be0xxx", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x896d, "HP ZBook Firefly 16 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x896e, "HP EliteBook x360 830 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), From c2269161387bb6a98a19ac69c6e2c6935fb07815 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Fri, 10 Nov 2023 15:16:06 +0800 Subject: [PATCH 0429/1397] ALSA: hda/realtek - Add Dell ALC295 to pin fall back table commit 4b21a669ca21ed8f24ef4530b2918be5730114de upstream. Add ALC295 to pin fall back table. Remove 5 pin quirks for Dell ALC295. ALC295 was only support MIC2 for external MIC function. ALC295 assigned model "ALC269_FIXUP_DELL1_MIC_NO_PRESENCE" for pin fall back table. It was assigned wrong model. So, let's remove it. Fixes: fbc571290d9f ("ALSA: hda/realtek - Fixed Headphone Mic can't record on Dell platform") Signed-off-by: Kailang Yang Cc: Link: https://lore.kernel.org/r/7c1998e873834df98d59bd7e0d08c72e@realtek.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index ed306feac611..53ecec20890f 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10735,22 +10735,6 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x12, 0x90a60130}, {0x17, 0x90170110}, {0x21, 0x03211020}), - SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE, - {0x14, 0x90170110}, - {0x21, 0x04211020}), - SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE, - {0x14, 0x90170110}, - {0x21, 0x04211030}), - SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, - ALC295_STANDARD_PINS, - {0x17, 0x21014020}, - {0x18, 0x21a19030}), - SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, - ALC295_STANDARD_PINS, - {0x17, 0x21014040}, - {0x18, 0x21a19050}), - SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, - ALC295_STANDARD_PINS), SND_HDA_PIN_QUIRK(0x10ec0298, 0x1028, "Dell", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE, ALC298_STANDARD_PINS, {0x17, 0x90170110}), @@ -10794,6 +10778,9 @@ static const struct snd_hda_pin_quirk alc269_fallback_pin_fixup_tbl[] = { SND_HDA_PIN_QUIRK(0x10ec0289, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE, {0x19, 0x40000000}, {0x1b, 0x40000000}), + SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE, + {0x19, 0x40000000}, + {0x1b, 0x40000000}), SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, {0x19, 0x40000000}, {0x1a, 0x40000000}), From d4176c66f7a955ddabf1de613fa4e52f8763d19e Mon Sep 17 00:00:00 2001 From: Chandradeep Dey Date: Sat, 11 Nov 2023 19:25:49 +0100 Subject: [PATCH 0430/1397] ALSA: hda/realtek - Enable internal speaker of ASUS K6500ZC commit 713f040cd22285fcc506f40a0d259566e6758c3c upstream. Apply the already existing quirk chain ALC294_FIXUP_ASUS_SPK to enable the internal speaker of ASUS K6500ZC. Signed-off-by: Chandradeep Dey Cc: Link: https://lore.kernel.org/r/NizcVHQ--3-9@chandradeepdey.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 53ecec20890f..ac2d8f6f17f2 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9821,6 +9821,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x10a1, "ASUS UX391UA", ALC294_FIXUP_ASUS_SPK), SND_PCI_QUIRK(0x1043, 0x10c0, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x10d0, "ASUS X540LA/X540LJ", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1043, 0x10d3, "ASUS K6500ZC", ALC294_FIXUP_ASUS_SPK), SND_PCI_QUIRK(0x1043, 0x115d, "Asus 1015E", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x1043, 0x11c0, "ASUS X556UR", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x125e, "ASUS Q524UQK", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE), From 661cd04d5922e1f7da0631c36d1cf28dc43149f3 Mon Sep 17 00:00:00 2001 From: Matus Malych Date: Tue, 14 Nov 2023 14:35:25 +0100 Subject: [PATCH 0431/1397] ALSA: hda/realtek: Enable Mute LED on HP 255 G10 commit b944aa9d86d5f782bfe5e51336434c960304839c upstream. HP 255 G10 has a mute LED that can be made to work using quirk ALC236_FIXUP_HP_MUTE_LED_COEFBIT2. Enable already existing quirk - at correct line to keep order Signed-off-by: Matus Malych Cc: Link: https://lore.kernel.org/r/20231114133524.11340-1-matus@malych.org Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index ac2d8f6f17f2..0a7b751e19d0 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9782,6 +9782,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8abb, "HP ZBook Firefly 14 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ad1, "HP EliteBook 840 14 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ad2, "HP EliteBook 860 16 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8b2f, "HP 255 15.6 inch G10 Notebook PC", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x8b42, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b43, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b44, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), From 6f47efb241e7d06ec7d327d3b39565ca026200f6 Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Wed, 15 Nov 2023 16:21:16 +0000 Subject: [PATCH 0432/1397] ALSA: hda/realtek: Add quirks for HP Laptops commit 5d639b60971f003d3a9b2b31f8ec73b0718b5d57 upstream. These HP laptops use Realtek HDA codec combined with 2 or 4 CS35L41 Amplifiers using SPI with Internal Boost. Signed-off-by: Stefan Binding Cc: Link: https://lore.kernel.org/r/20231115162116.494968-3-sbinding@opensource.cirrus.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 0a7b751e19d0..14fc4191fe77 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9816,6 +9816,9 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8c70, "HP EliteBook 835 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c71, "HP EliteBook 845 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c72, "HP EliteBook 865 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8ca4, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8ca7, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), From 6a67593893440cf0cd7fa16001a7f23cfdedb99d Mon Sep 17 00:00:00 2001 From: Johnathan Mantey Date: Mon, 13 Nov 2023 08:30:29 -0800 Subject: [PATCH 0433/1397] Revert ncsi: Propagate carrier gain/loss events to the NCSI controller commit 9e2e7efbbbff69d8340abb56d375dd79d1f5770f upstream. This reverts commit 3780bb29311eccb7a1c9641032a112eed237f7e3. The cited commit introduced unwanted behavior. The intent for the commit was to be able to detect carrier loss/gain for just the NIC connected to the BMC. The unwanted effect is a carrier loss for auxiliary paths also causes the BMC to lose carrier. The BMC never regains carrier despite the secondary NIC regaining a link. This change, when merged, needs to be backported to stable kernels. 5.4-stable, 5.10-stable, 5.15-stable, 6.1-stable, 6.5-stable Fixes: 3780bb29311e ("ncsi: Propagate carrier gain/loss events to the NCSI controller") CC: stable@vger.kernel.org Signed-off-by: Johnathan Mantey Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ncsi/ncsi-aen.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c index f8854bff286c..62fb1031763d 100644 --- a/net/ncsi/ncsi-aen.c +++ b/net/ncsi/ncsi-aen.c @@ -89,11 +89,6 @@ static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp, if ((had_link == has_link) || chained) return 0; - if (had_link) - netif_carrier_off(ndp->ndev.dev); - else - netif_carrier_on(ndp->ndev.dev); - if (!ndp->multi_package && !nc->package->multi_channel) { if (had_link) { ndp->flags |= NCSI_DEV_RESHUFFLE; From ff4d0309780c9c40121206b446605ba18c31e675 Mon Sep 17 00:00:00 2001 From: Robert Marko Date: Fri, 10 Nov 2023 10:30:11 +0100 Subject: [PATCH 0434/1397] Revert "i2c: pxa: move to generic GPIO recovery" commit 7b211c7671212cad0b83603c674838c7e824d845 upstream. This reverts commit 0b01392c18b9993a584f36ace1d61118772ad0ca. Conversion of PXA to generic I2C recovery, makes the I2C bus completely lock up if recovery pinctrl is present in the DT and I2C recovery is enabled. So, until the generic I2C recovery can also work with PXA lets revert to have working I2C and I2C recovery again. Signed-off-by: Robert Marko Cc: stable@vger.kernel.org # 5.11+ Acked-by: Andi Shyti Acked-by: Russell King (Oracle) Acked-by: Linus Walleij Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-pxa.c | 76 ++++++++++++++++++++++++++++++++---- 1 file changed, 68 insertions(+), 8 deletions(-) diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c index 29be05af826b..3bd406470940 100644 --- a/drivers/i2c/busses/i2c-pxa.c +++ b/drivers/i2c/busses/i2c-pxa.c @@ -264,6 +264,9 @@ struct pxa_i2c { u32 hs_mask; struct i2c_bus_recovery_info recovery; + struct pinctrl *pinctrl; + struct pinctrl_state *pinctrl_default; + struct pinctrl_state *pinctrl_recovery; }; #define _IBMR(i2c) ((i2c)->reg_ibmr) @@ -1300,12 +1303,13 @@ static void i2c_pxa_prepare_recovery(struct i2c_adapter *adap) */ gpiod_set_value(i2c->recovery.scl_gpiod, ibmr & IBMR_SCLS); gpiod_set_value(i2c->recovery.sda_gpiod, ibmr & IBMR_SDAS); + + WARN_ON(pinctrl_select_state(i2c->pinctrl, i2c->pinctrl_recovery)); } static void i2c_pxa_unprepare_recovery(struct i2c_adapter *adap) { struct pxa_i2c *i2c = adap->algo_data; - struct i2c_bus_recovery_info *bri = adap->bus_recovery_info; u32 isr; /* @@ -1319,7 +1323,7 @@ static void i2c_pxa_unprepare_recovery(struct i2c_adapter *adap) i2c_pxa_do_reset(i2c); } - WARN_ON(pinctrl_select_state(bri->pinctrl, bri->pins_default)); + WARN_ON(pinctrl_select_state(i2c->pinctrl, i2c->pinctrl_default)); dev_dbg(&i2c->adap.dev, "recovery: IBMR 0x%08x ISR 0x%08x\n", readl(_IBMR(i2c)), readl(_ISR(i2c))); @@ -1341,20 +1345,76 @@ static int i2c_pxa_init_recovery(struct pxa_i2c *i2c) if (IS_ENABLED(CONFIG_I2C_PXA_SLAVE)) return 0; - bri->pinctrl = devm_pinctrl_get(dev); - if (PTR_ERR(bri->pinctrl) == -ENODEV) { - bri->pinctrl = NULL; + i2c->pinctrl = devm_pinctrl_get(dev); + if (PTR_ERR(i2c->pinctrl) == -ENODEV) + i2c->pinctrl = NULL; + if (IS_ERR(i2c->pinctrl)) + return PTR_ERR(i2c->pinctrl); + + if (!i2c->pinctrl) + return 0; + + i2c->pinctrl_default = pinctrl_lookup_state(i2c->pinctrl, + PINCTRL_STATE_DEFAULT); + i2c->pinctrl_recovery = pinctrl_lookup_state(i2c->pinctrl, "recovery"); + + if (IS_ERR(i2c->pinctrl_default) || IS_ERR(i2c->pinctrl_recovery)) { + dev_info(dev, "missing pinmux recovery information: %ld %ld\n", + PTR_ERR(i2c->pinctrl_default), + PTR_ERR(i2c->pinctrl_recovery)); + return 0; + } + + /* + * Claiming GPIOs can influence the pinmux state, and may glitch the + * I2C bus. Do this carefully. + */ + bri->scl_gpiod = devm_gpiod_get(dev, "scl", GPIOD_OUT_HIGH_OPEN_DRAIN); + if (bri->scl_gpiod == ERR_PTR(-EPROBE_DEFER)) + return -EPROBE_DEFER; + if (IS_ERR(bri->scl_gpiod)) { + dev_info(dev, "missing scl gpio recovery information: %pe\n", + bri->scl_gpiod); + return 0; + } + + /* + * We have SCL. Pull SCL low and wait a bit so that SDA glitches + * have no effect. + */ + gpiod_direction_output(bri->scl_gpiod, 0); + udelay(10); + bri->sda_gpiod = devm_gpiod_get(dev, "sda", GPIOD_OUT_HIGH_OPEN_DRAIN); + + /* Wait a bit in case of a SDA glitch, and then release SCL. */ + udelay(10); + gpiod_direction_output(bri->scl_gpiod, 1); + + if (bri->sda_gpiod == ERR_PTR(-EPROBE_DEFER)) + return -EPROBE_DEFER; + + if (IS_ERR(bri->sda_gpiod)) { + dev_info(dev, "missing sda gpio recovery information: %pe\n", + bri->sda_gpiod); return 0; } - if (IS_ERR(bri->pinctrl)) - return PTR_ERR(bri->pinctrl); bri->prepare_recovery = i2c_pxa_prepare_recovery; bri->unprepare_recovery = i2c_pxa_unprepare_recovery; + bri->recover_bus = i2c_generic_scl_recovery; i2c->adap.bus_recovery_info = bri; - return 0; + /* + * Claiming GPIOs can change the pinmux state, which confuses the + * pinctrl since pinctrl's idea of the current setting is unaffected + * by the pinmux change caused by claiming the GPIO. Work around that + * by switching pinctrl to the GPIO state here. We do it this way to + * avoid glitching the I2C bus. + */ + pinctrl_select_state(i2c->pinctrl, i2c->pinctrl_recovery); + + return pinctrl_select_state(i2c->pinctrl, i2c->pinctrl_default); } static int i2c_pxa_probe(struct platform_device *dev) From 64d84030ff5ebd48d87c4ea136894d42a40347d8 Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Tue, 31 Oct 2023 13:32:06 +0100 Subject: [PATCH 0435/1397] lsm: fix default return value for vm_enough_memory commit 866d648059d5faf53f1cd960b43fe8365ad93ea7 upstream. 1 is the return value that implements a "no-op" hook, not 0. Cc: stable@vger.kernel.org Fixes: 98e828a0650f ("security: Refactor declaration of LSM hooks") Signed-off-by: Ondrej Mosnacek Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- include/linux/lsm_hook_defs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index ac962c4cb44b..38f18fb0ec53 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -48,7 +48,7 @@ LSM_HOOK(int, 0, quota_on, struct dentry *dentry) LSM_HOOK(int, 0, syslog, int type) LSM_HOOK(int, 0, settime, const struct timespec64 *ts, const struct timezone *tz) -LSM_HOOK(int, 0, vm_enough_memory, struct mm_struct *mm, long pages) +LSM_HOOK(int, 1, vm_enough_memory, struct mm_struct *mm, long pages) LSM_HOOK(int, 0, bprm_creds_for_exec, struct linux_binprm *bprm) LSM_HOOK(int, 0, bprm_creds_from_file, struct linux_binprm *bprm, struct file *file) LSM_HOOK(int, 0, bprm_check_security, struct linux_binprm *bprm) From 37dab33f754abd24b384d2b7b07349dc6611381b Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Tue, 31 Oct 2023 13:32:07 +0100 Subject: [PATCH 0436/1397] lsm: fix default return value for inode_getsecctx commit b36995b8609a5a8fe5cf259a1ee768fcaed919f8 upstream. -EOPNOTSUPP is the return value that implements a "no-op" hook, not 0. Without this fix having only the BPF LSM enabled (with no programs attached) can cause uninitialized variable reads in nfsd4_encode_fattr(), because the BPF hook returns 0 without touching the 'ctxlen' variable and the corresponding 'contextlen' variable in nfsd4_encode_fattr() remains uninitialized, yet being treated as valid based on the 0 return value. Cc: stable@vger.kernel.org Fixes: 98e828a0650f ("security: Refactor declaration of LSM hooks") Reported-by: Benjamin Coddington Signed-off-by: Ondrej Mosnacek Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- include/linux/lsm_hook_defs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 38f18fb0ec53..2b8d85aae083 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -273,7 +273,7 @@ LSM_HOOK(void, LSM_RET_VOID, release_secctx, char *secdata, u32 seclen) LSM_HOOK(void, LSM_RET_VOID, inode_invalidate_secctx, struct inode *inode) LSM_HOOK(int, 0, inode_notifysecctx, struct inode *inode, void *ctx, u32 ctxlen) LSM_HOOK(int, 0, inode_setsecctx, struct dentry *dentry, void *ctx, u32 ctxlen) -LSM_HOOK(int, 0, inode_getsecctx, struct inode *inode, void **ctx, +LSM_HOOK(int, -EOPNOTSUPP, inode_getsecctx, struct inode *inode, void **ctx, u32 *ctxlen) #if defined(CONFIG_SECURITY) && defined(CONFIG_WATCH_QUEUE) From 16da1e7c64c21768bcacfd9e6db7c66b0ae29c41 Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Thu, 21 Sep 2023 02:02:36 -0700 Subject: [PATCH 0437/1397] sbsa_gwdt: Calculate timeout with 64-bit math commit 5d6aa89bba5bd6af2580f872b57f438dab883738 upstream. Commit abd3ac7902fb ("watchdog: sbsa: Support architecture version 1") introduced new timer math for watchdog revision 1 with the 48 bit offset register. The gwdt->clk and timeout are u32, but the argument being calculated is u64. Without a cast, the compiler performs u32 operations, truncating intermediate steps, resulting in incorrect values. A watchdog revision 1 implementation with a gwdt->clk of 1GHz and a timeout of 600s writes 3647256576 to the one shot watchdog instead of 300000000000, resulting in the watchdog firing in 3.6s instead of 600s. Force u64 math by casting the first argument (gwdt->clk) as a u64. Make the order of operations explicit with parenthesis. Fixes: abd3ac7902fb ("watchdog: sbsa: Support architecture version 1") Reported-by: Vanshidhar Konda Signed-off-by: Darren Hart Cc: Wim Van Sebroeck Cc: Guenter Roeck Cc: linux-watchdog@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: # 5.14.x Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/7d1713c5ffab19b0f3de796d82df19e8b1f340de.1695286124.git.darren@os.amperecomputing.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Signed-off-by: Greg Kroah-Hartman --- drivers/watchdog/sbsa_gwdt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/watchdog/sbsa_gwdt.c b/drivers/watchdog/sbsa_gwdt.c index 421ebcda62e6..5f23913ce3b4 100644 --- a/drivers/watchdog/sbsa_gwdt.c +++ b/drivers/watchdog/sbsa_gwdt.c @@ -152,14 +152,14 @@ static int sbsa_gwdt_set_timeout(struct watchdog_device *wdd, timeout = clamp_t(unsigned int, timeout, 1, wdd->max_hw_heartbeat_ms / 1000); if (action) - sbsa_gwdt_reg_write(gwdt->clk * timeout, gwdt); + sbsa_gwdt_reg_write((u64)gwdt->clk * timeout, gwdt); else /* * In the single stage mode, The first signal (WS0) is ignored, * the timeout is (WOR * 2), so the WOR should be configured * to half value of timeout. */ - sbsa_gwdt_reg_write(gwdt->clk / 2 * timeout, gwdt); + sbsa_gwdt_reg_write(((u64)gwdt->clk / 2) * timeout, gwdt); return 0; } From b382f69317efb2578ef327d3eb413ea983cac9e6 Mon Sep 17 00:00:00 2001 From: Tam Nguyen Date: Thu, 2 Nov 2023 10:30:08 +0700 Subject: [PATCH 0438/1397] i2c: designware: Disable TX_EMPTY irq while waiting for block length byte commit e8183fa10c25c7b3c20670bf2b430ddcc1ee03c0 upstream. During SMBus block data read process, we have seen high interrupt rate because of TX_EMPTY irq status while waiting for block length byte (the first data byte after the address phase). The interrupt handler does not do anything because the internal state is kept as STATUS_WRITE_IN_PROGRESS. Hence, we should disable TX_EMPTY IRQ until I2C DesignWare receives first data byte from I2C device, then re-enable it to resume SMBus transaction. It takes 0.789 ms for host to receive data length from slave. Without the patch, i2c_dw_isr() is called 99 times by TX_EMPTY interrupt. And it is none after applying the patch. Cc: stable@vger.kernel.org Co-developed-by: Chuong Tran Signed-off-by: Chuong Tran Signed-off-by: Tam Nguyen Acked-by: Jarkko Nikula Reviewed-by: Serge Semin Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-designware-master.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c index ca1035e010c7..85dbd0eb5392 100644 --- a/drivers/i2c/busses/i2c-designware-master.c +++ b/drivers/i2c/busses/i2c-designware-master.c @@ -519,10 +519,16 @@ i2c_dw_xfer_msg(struct dw_i2c_dev *dev) /* * Because we don't know the buffer length in the - * I2C_FUNC_SMBUS_BLOCK_DATA case, we can't stop - * the transaction here. + * I2C_FUNC_SMBUS_BLOCK_DATA case, we can't stop the + * transaction here. Also disable the TX_EMPTY IRQ + * while waiting for the data length byte to avoid the + * bogus interrupts flood. */ - if (buf_len > 0 || flags & I2C_M_RECV_LEN) { + if (flags & I2C_M_RECV_LEN) { + dev->status |= STATUS_WRITE_IN_PROGRESS; + intr_mask &= ~DW_IC_INTR_TX_EMPTY; + break; + } else if (buf_len > 0) { /* more bytes to be written */ dev->status |= STATUS_WRITE_IN_PROGRESS; break; @@ -558,6 +564,13 @@ i2c_dw_recv_len(struct dw_i2c_dev *dev, u8 len) msgs[dev->msg_read_idx].len = len; msgs[dev->msg_read_idx].flags &= ~I2C_M_RECV_LEN; + /* + * Received buffer length, re-enable TX_EMPTY interrupt + * to resume the SMBUS transaction. + */ + regmap_update_bits(dev->map, DW_IC_INTR_MASK, DW_IC_INTR_TX_EMPTY, + DW_IC_INTR_TX_EMPTY); + return len; } From 553a485d8d852d7cdc643ad70d3813e4ae9fe94c Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Mon, 23 Oct 2023 09:57:10 +0200 Subject: [PATCH 0439/1397] s390/ap: fix AP bus crash on early config change callback invocation commit e14aec23025eeb1f2159ba34dbc1458467c4c347 upstream. Fix kernel crash in AP bus code caused by very early invocation of the config change callback function via SCLP. After a fresh IML of the machine the crypto cards are still offline and will get switched online only with activation of any LPAR which has the card in it's configuration. A crypto card coming online is reported to the LPAR via SCLP and the AP bus offers a callback function to get this kind of information. However, it may happen that the callback is invoked before the AP bus init function is complete. As the callback triggers a synchronous AP bus scan, the scan may already run but some internal states are not initialized by the AP bus init function resulting in a crash like this: [ 11.635859] Unable to handle kernel pointer dereference in virtual kernel address space [ 11.635861] Failing address: 0000000000000000 TEID: 0000000000000887 [ 11.635862] Fault in home space mode while using kernel ASCE. [ 11.635864] AS:00000000894c4007 R3:00000001fece8007 S:00000001fece7800 P:000000000000013d [ 11.635879] Oops: 0004 ilc:1 [#1] SMP [ 11.635882] Modules linked in: [ 11.635884] CPU: 5 PID: 42 Comm: kworker/5:0 Not tainted 6.6.0-rc3-00003-g4dbf7cdc6b42 #12 [ 11.635886] Hardware name: IBM 3931 A01 751 (LPAR) [ 11.635887] Workqueue: events_long ap_scan_bus [ 11.635891] Krnl PSW : 0704c00180000000 0000000000000000 (0x0) [ 11.635895] R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3 [ 11.635897] Krnl GPRS: 0000000001000a00 0000000000000000 0000000000000006 0000000089591940 [ 11.635899] 0000000080000000 0000000000000a00 0000000000000000 0000000000000000 [ 11.635901] 0000000081870c00 0000000089591000 000000008834e4e2 0000000002625a00 [ 11.635903] 0000000081734200 0000038000913c18 000000008834c6d6 0000038000913ac8 [ 11.635906] Krnl Code:>0000000000000000: 0000 illegal [ 11.635906] 0000000000000002: 0000 illegal [ 11.635906] 0000000000000004: 0000 illegal [ 11.635906] 0000000000000006: 0000 illegal [ 11.635906] 0000000000000008: 0000 illegal [ 11.635906] 000000000000000a: 0000 illegal [ 11.635906] 000000000000000c: 0000 illegal [ 11.635906] 000000000000000e: 0000 illegal [ 11.635915] Call Trace: [ 11.635916] [<0000000000000000>] 0x0 [ 11.635918] [<000000008834e4e2>] ap_queue_init_state+0x82/0xb8 [ 11.635921] [<000000008834ba1c>] ap_scan_domains+0x6fc/0x740 [ 11.635923] [<000000008834c092>] ap_scan_adapter+0x632/0x8b0 [ 11.635925] [<000000008834c3e4>] ap_scan_bus+0xd4/0x288 [ 11.635927] [<00000000879a33ba>] process_one_work+0x19a/0x410 [ 11.635930] Discipline DIAG cannot be used without z/VM [ 11.635930] [<00000000879a3a2c>] worker_thread+0x3fc/0x560 [ 11.635933] [<00000000879aea60>] kthread+0x120/0x128 [ 11.635936] [<000000008792afa4>] __ret_from_fork+0x3c/0x58 [ 11.635938] [<00000000885ebe62>] ret_from_fork+0xa/0x30 [ 11.635942] Last Breaking-Event-Address: [ 11.635942] [<000000008834c6d4>] ap_wait+0xcc/0x148 This patch improves the ap_bus_force_rescan() function which is invoked by the config change callback by checking if a first initial AP bus scan has been done. If not, the force rescan request is simple ignored. Anyhow it does not make sense to trigger AP bus re-scans even before the very first bus scan is complete. Cc: stable@vger.kernel.org Reviewed-by: Holger Dengler Signed-off-by: Harald Freudenberger Signed-off-by: Vasily Gorbik Signed-off-by: Greg Kroah-Hartman --- drivers/s390/crypto/ap_bus.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index d09e08b71cfb..d6ad437883fa 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -1022,6 +1022,10 @@ EXPORT_SYMBOL(ap_driver_unregister); void ap_bus_force_rescan(void) { + /* Only trigger AP bus scans after the initial scan is done */ + if (atomic64_read(&ap_scan_bus_count) <= 0) + return; + /* processing a asynchronous bus rescan */ del_timer(&ap_config_timer); queue_work(system_long_wq, &ap_scan_work); From 73e80544954402203bc2448a2b6023377392fdab Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 28 Oct 2023 21:25:11 +0200 Subject: [PATCH 0440/1397] net: ethtool: Fix documentation of ethtool_sprintf() commit f55d8e60f10909dbc5524e261041e1d28d7d20d8 upstream. This function takes a pointer to a pointer, unlike sprintf() which is passed a plain pointer. Fix up the documentation to make this clear. Fixes: 7888fe53b706 ("ethtool: Add common function for filling out strings") Cc: Alexander Duyck Cc: Justin Stitt Cc: stable@vger.kernel.org Signed-off-by: Andrew Lunn Reviewed-by: Justin Stitt Link: https://lore.kernel.org/r/20231028192511.100001-1-andrew@lunn.ch Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- include/linux/ethtool.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 62b61527bcc4..1b523fd48586 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -1045,10 +1045,10 @@ static inline int ethtool_mm_frag_size_min_to_add(u32 val_min, u32 *val_add, /** * ethtool_sprintf - Write formatted string to ethtool string data - * @data: Pointer to start of string to update + * @data: Pointer to a pointer to the start of string to update * @fmt: Format of string to write * - * Write formatted string to data. Update data to point at start of + * Write formatted string to *data. Update *data to point at start of * next string. */ extern __printf(2, 3) void ethtool_sprintf(u8 **data, const char *fmt, ...); From 4517b1594fb03b9ca3a1edab9abef05ed5185cce Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Fri, 27 Oct 2023 08:57:38 +0200 Subject: [PATCH 0441/1397] net: dsa: lan9303: consequently nested-lock physical MDIO commit 5a22fbcc10f3f7d94c5d88afbbffa240a3677057 upstream. When LAN9303 is MDIO-connected two callchains exist into mdio->bus->write(): 1. switch ports 1&2 ("physical" PHYs): virtual (switch-internal) MDIO bus (lan9303_switch_ops->phy_{read|write})-> lan9303_mdio_phy_{read|write} -> mdiobus_{read|write}_nested 2. LAN9303 virtual PHY: virtual MDIO bus (lan9303_phy_{read|write}) -> lan9303_virt_phy_reg_{read|write} -> regmap -> lan9303_mdio_{read|write} If the latter functions just take mutex_lock(&sw_dev->device->bus->mdio_lock) it triggers a LOCKDEP false-positive splat. It's false-positive because the first mdio_lock in the second callchain above belongs to virtual MDIO bus, the second mdio_lock belongs to physical MDIO bus. Consequent annotation in lan9303_mdio_{read|write} as nested lock (similar to lan9303_mdio_phy_{read|write}, it's the same physical MDIO bus) prevents the following splat: WARNING: possible circular locking dependency detected 5.15.71 #1 Not tainted ------------------------------------------------------ kworker/u4:3/609 is trying to acquire lock: ffff000011531c68 (lan9303_mdio:131:(&lan9303_mdio_regmap_config)->lock){+.+.}-{3:3}, at: regmap_lock_mutex but task is already holding lock: ffff0000114c44d8 (&bus->mdio_lock){+.+.}-{3:3}, at: mdiobus_read which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&bus->mdio_lock){+.+.}-{3:3}: lock_acquire __mutex_lock mutex_lock_nested lan9303_mdio_read _regmap_read regmap_read lan9303_probe lan9303_mdio_probe mdio_probe really_probe __driver_probe_device driver_probe_device __device_attach_driver bus_for_each_drv __device_attach device_initial_probe bus_probe_device deferred_probe_work_func process_one_work worker_thread kthread ret_from_fork -> #0 (lan9303_mdio:131:(&lan9303_mdio_regmap_config)->lock){+.+.}-{3:3}: __lock_acquire lock_acquire.part.0 lock_acquire __mutex_lock mutex_lock_nested regmap_lock_mutex regmap_read lan9303_phy_read dsa_slave_phy_read __mdiobus_read mdiobus_read get_phy_device mdiobus_scan __mdiobus_register dsa_register_switch lan9303_probe lan9303_mdio_probe mdio_probe really_probe __driver_probe_device driver_probe_device __device_attach_driver bus_for_each_drv __device_attach device_initial_probe bus_probe_device deferred_probe_work_func process_one_work worker_thread kthread ret_from_fork other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&bus->mdio_lock); lock(lan9303_mdio:131:(&lan9303_mdio_regmap_config)->lock); lock(&bus->mdio_lock); lock(lan9303_mdio:131:(&lan9303_mdio_regmap_config)->lock); *** DEADLOCK *** 5 locks held by kworker/u4:3/609: #0: ffff000002842938 ((wq_completion)events_unbound){+.+.}-{0:0}, at: process_one_work #1: ffff80000bacbd60 (deferred_probe_work){+.+.}-{0:0}, at: process_one_work #2: ffff000007645178 (&dev->mutex){....}-{3:3}, at: __device_attach #3: ffff8000096e6e78 (dsa2_mutex){+.+.}-{3:3}, at: dsa_register_switch #4: ffff0000114c44d8 (&bus->mdio_lock){+.+.}-{3:3}, at: mdiobus_read stack backtrace: CPU: 1 PID: 609 Comm: kworker/u4:3 Not tainted 5.15.71 #1 Workqueue: events_unbound deferred_probe_work_func Call trace: dump_backtrace show_stack dump_stack_lvl dump_stack print_circular_bug check_noncircular __lock_acquire lock_acquire.part.0 lock_acquire __mutex_lock mutex_lock_nested regmap_lock_mutex regmap_read lan9303_phy_read dsa_slave_phy_read __mdiobus_read mdiobus_read get_phy_device mdiobus_scan __mdiobus_register dsa_register_switch lan9303_probe lan9303_mdio_probe ... Cc: stable@vger.kernel.org Fixes: dc7005831523 ("net: dsa: LAN9303: add MDIO managed mode support") Signed-off-by: Alexander Sverdlin Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20231027065741.534971-1-alexander.sverdlin@siemens.com Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/lan9303_mdio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/lan9303_mdio.c b/drivers/net/dsa/lan9303_mdio.c index d8ab2b77d201..167a86f39f27 100644 --- a/drivers/net/dsa/lan9303_mdio.c +++ b/drivers/net/dsa/lan9303_mdio.c @@ -32,7 +32,7 @@ static int lan9303_mdio_write(void *ctx, uint32_t reg, uint32_t val) struct lan9303_mdio *sw_dev = (struct lan9303_mdio *)ctx; reg <<= 2; /* reg num to offset */ - mutex_lock(&sw_dev->device->bus->mdio_lock); + mutex_lock_nested(&sw_dev->device->bus->mdio_lock, MDIO_MUTEX_NESTED); lan9303_mdio_real_write(sw_dev->device, reg, val & 0xffff); lan9303_mdio_real_write(sw_dev->device, reg + 2, (val >> 16) & 0xffff); mutex_unlock(&sw_dev->device->bus->mdio_lock); @@ -50,7 +50,7 @@ static int lan9303_mdio_read(void *ctx, uint32_t reg, uint32_t *val) struct lan9303_mdio *sw_dev = (struct lan9303_mdio *)ctx; reg <<= 2; /* reg num to offset */ - mutex_lock(&sw_dev->device->bus->mdio_lock); + mutex_lock_nested(&sw_dev->device->bus->mdio_lock, MDIO_MUTEX_NESTED); *val = lan9303_mdio_real_read(sw_dev->device, reg); *val |= (lan9303_mdio_real_read(sw_dev->device, reg + 2) << 16); mutex_unlock(&sw_dev->device->bus->mdio_lock); From 6d0bbeafd82eac165c38242b1a928b86de7316df Mon Sep 17 00:00:00 2001 From: Klaus Kudielka Date: Tue, 7 Nov 2023 18:44:02 +0100 Subject: [PATCH 0442/1397] net: phylink: initialize carrier state at creation commit 02d5fdbf4f2b8c406f7a4c98fa52aa181a11d733 upstream. Background: Turris Omnia (Armada 385); eth2 (mvneta) connected to SFP bus; SFP module is present, but no fiber connected, so definitely no carrier. After booting, eth2 is down, but netdev LED trigger surprisingly reports link active. Then, after "ip link set eth2 up", the link indicator goes away - as I would have expected it from the beginning. It turns out, that the default carrier state after netdev creation is "carrier ok". Some ethernet drivers explicitly call netif_carrier_off during probing, others (like mvneta) don't - which explains the current behaviour: only when the device is brought up, phylink_start calls netif_carrier_off. Fix this for all drivers using phylink, by calling netif_carrier_off in phylink_create. Fixes: 089381b27abe ("leds: initial support for Turris Omnia LEDs") Cc: stable@vger.kernel.org Suggested-by: Andrew Lunn Signed-off-by: Klaus Kudielka Reviewed-by: Russell King (Oracle) Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/phylink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 0d7354955d62..b5f012619e42 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -1631,6 +1631,7 @@ struct phylink *phylink_create(struct phylink_config *config, pl->config = config; if (config->type == PHYLINK_NETDEV) { pl->netdev = to_net_dev(config->dev); + netif_carrier_off(pl->netdev); } else if (config->type == PHYLINK_DEV) { pl->dev = config->dev; } else { From c7e9a848265c7ba991ee9bf9e0d3aef8d57fbfdf Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 2 Nov 2023 20:52:30 +0100 Subject: [PATCH 0443/1397] gfs2: don't withdraw if init_threads() got interrupted commit 0cdc6f44e9fdc2d20d720145bf99a39f611f6d61 upstream. In gfs2_fill_super(), when mounting a gfs2 filesystem is interrupted, kthread_create() can return -EINTR. When that happens, we roll back what has already been done and abort the mount. Since commit 62dd0f98a0e5 ("gfs2: Flag a withdraw if init_threads() fails), we are calling gfs2_withdraw_delayed() in gfs2_fill_super(); first via gfs2_make_fs_rw(), then directly. But gfs2_withdraw_delayed() only marks the filesystem as withdrawing and relies on a caller further up the stack to do the actual withdraw, which doesn't exist in the gfs2_fill_super() case. Because the filesystem is marked as withdrawing / withdrawn, function gfs2_lm_unmount() doesn't release the dlm lockspace, so when we try to mount that filesystem again, we get: gfs2: fsid=gohan:gohan0: Trying to join cluster "lock_dlm", "gohan:gohan0" gfs2: fsid=gohan:gohan0: dlm_new_lockspace error -17 Since commit b77b4a4815a9 ("gfs2: Rework freeze / thaw logic"), the deadlock this gfs2_withdraw_delayed() call was supposed to work around cannot occur anymore because freeze_go_callback() won't take the sb->s_umount semaphore unconditionally anymore, so we can get rid of the gfs2_withdraw_delayed() in gfs2_fill_super() entirely. Reported-by: Alexander Aring Signed-off-by: Andreas Gruenbacher Cc: stable@vger.kernel.org # v6.5+ Signed-off-by: Greg Kroah-Hartman --- fs/gfs2/ops_fstype.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 33ca04733e93..dd64140ae6d7 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1281,10 +1281,8 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc) if (!sb_rdonly(sb)) { error = init_threads(sdp); - if (error) { - gfs2_withdraw_delayed(sdp); + if (error) goto fail_per_node; - } } error = gfs2_freeze_lock_shared(sdp); From 74ee67b1a776db627a9f6766b1a09b10dfe66528 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 9 Sep 2023 22:25:06 +0200 Subject: [PATCH 0444/1397] i2c: i801: fix potential race in i801_block_transaction_byte_by_byte commit f78ca48a8ba9cdec96e8839351e49eec3233b177 upstream. Currently we set SMBHSTCNT_LAST_BYTE only after the host has started receiving the last byte. If we get e.g. preempted before setting SMBHSTCNT_LAST_BYTE, the host may be finished with receiving the byte before SMBHSTCNT_LAST_BYTE is set. Therefore change the code to set SMBHSTCNT_LAST_BYTE before writing SMBHSTSTS_BYTE_DONE for the byte before the last byte. Now the code is also consistent with what we do in i801_isr_byte_done(). Reported-by: Jean Delvare Closes: https://lore.kernel.org/linux-i2c/20230828152747.09444625@endymion.delvare/ Cc: stable@vger.kernel.org Acked-by: Andi Shyti Signed-off-by: Heiner Kallweit Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-i801.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 89631fdf6e2f..a87e3c15e5fc 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -681,15 +681,11 @@ static int i801_block_transaction_byte_by_byte(struct i801_priv *priv, return result ? priv->status : -ETIMEDOUT; } - for (i = 1; i <= len; i++) { - if (i == len && read_write == I2C_SMBUS_READ) - smbcmd |= SMBHSTCNT_LAST_BYTE; - outb_p(smbcmd, SMBHSTCNT(priv)); - - if (i == 1) - outb_p(inb(SMBHSTCNT(priv)) | SMBHSTCNT_START, - SMBHSTCNT(priv)); + if (len == 1 && read_write == I2C_SMBUS_READ) + smbcmd |= SMBHSTCNT_LAST_BYTE; + outb_p(smbcmd | SMBHSTCNT_START, SMBHSTCNT(priv)); + for (i = 1; i <= len; i++) { status = i801_wait_byte_done(priv); if (status) return status; @@ -712,9 +708,12 @@ static int i801_block_transaction_byte_by_byte(struct i801_priv *priv, data->block[0] = len; } - /* Retrieve/store value in SMBBLKDAT */ - if (read_write == I2C_SMBUS_READ) + if (read_write == I2C_SMBUS_READ) { data->block[i] = inb_p(SMBBLKDAT(priv)); + if (i == len - 1) + outb_p(smbcmd | SMBHSTCNT_LAST_BYTE, SMBHSTCNT(priv)); + } + if (read_write == I2C_SMBUS_WRITE && i+1 <= len) outb_p(data->block[i+1], SMBBLKDAT(priv)); From 9de9f078bdf58e70653f9db91abf7dd9f87bf98b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 19 Oct 2023 15:51:08 -0700 Subject: [PATCH 0445/1397] f2fs: do not return EFSCORRUPTED, but try to run online repair commit 50a472bbc79ff9d5a88be8019a60e936cadf9f13 upstream. If we return the error, there's no way to recover the status as of now, since fsck does not fix the xattr boundary issue. Cc: stable@vger.kernel.org Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/node.c | 4 +++- fs/f2fs/xattr.c | 20 +++++++++++++------- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index ed963c56ac32..8b30f11f37b4 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2751,7 +2751,9 @@ int f2fs_recover_xattr_data(struct inode *inode, struct page *page) f2fs_update_inode_page(inode); /* 3: update and set xattr node page dirty */ - memcpy(F2FS_NODE(xpage), F2FS_NODE(page), VALID_XATTR_BLOCK_SIZE); + if (page) + memcpy(F2FS_NODE(xpage), F2FS_NODE(page), + VALID_XATTR_BLOCK_SIZE); set_page_dirty(xpage); f2fs_put_page(xpage, 1); diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index a657284faee3..465d145360de 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -364,10 +364,10 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage, *xe = __find_xattr(cur_addr, last_txattr_addr, NULL, index, len, name); if (!*xe) { - f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr", + f2fs_err(F2FS_I_SB(inode), "lookup inode (%lu) has corrupted xattr", inode->i_ino); set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); - err = -EFSCORRUPTED; + err = -ENODATA; f2fs_handle_error(F2FS_I_SB(inode), ERROR_CORRUPTED_XATTR); goto out; @@ -584,13 +584,12 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) if ((void *)(entry) + sizeof(__u32) > last_base_addr || (void *)XATTR_NEXT_ENTRY(entry) > last_base_addr) { - f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr", + f2fs_err(F2FS_I_SB(inode), "list inode (%lu) has corrupted xattr", inode->i_ino); set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); - error = -EFSCORRUPTED; f2fs_handle_error(F2FS_I_SB(inode), ERROR_CORRUPTED_XATTR); - goto cleanup; + break; } if (!prefix) @@ -650,7 +649,7 @@ static int __f2fs_setxattr(struct inode *inode, int index, if (size > MAX_VALUE_LEN(inode)) return -E2BIG; - +retry: error = read_all_xattrs(inode, ipage, &base_addr); if (error) return error; @@ -660,7 +659,14 @@ static int __f2fs_setxattr(struct inode *inode, int index, /* find entry with wanted name. */ here = __find_xattr(base_addr, last_base_addr, NULL, index, len, name); if (!here) { - f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr", + if (!F2FS_I(inode)->i_xattr_nid) { + f2fs_notice(F2FS_I_SB(inode), + "recover xattr in inode (%lu)", inode->i_ino); + f2fs_recover_xattr_data(inode, NULL); + kfree(base_addr); + goto retry; + } + f2fs_err(F2FS_I_SB(inode), "set inode (%lu) has corrupted xattr", inode->i_ino); set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); error = -EFSCORRUPTED; From 00de7280a5156f356d6ec3d50f448b6df244b146 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 8 Sep 2023 15:41:42 -0700 Subject: [PATCH 0446/1397] f2fs: set the default compress_level on ioctl commit f5f3bd903a5d3e3b2ba89f11e0e29db25e60c048 upstream. Otherwise, we'll get a broken inode. # touch $FILE # f2fs_io setflags compression $FILE # f2fs_io set_coption 2 8 $FILE [ 112.227612] F2FS-fs (dm-51): sanity_check_compress_inode: inode (ino=8d3fe) has unsupported compress level: 0, run fsck to fix Cc: stable@vger.kernel.org Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/file.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index d034703eb323..a06f03d23762 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -4006,6 +4006,15 @@ static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg) F2FS_I(inode)->i_compress_algorithm = option.algorithm; F2FS_I(inode)->i_log_cluster_size = option.log_cluster_size; F2FS_I(inode)->i_cluster_size = BIT(option.log_cluster_size); + /* Set default level */ + if (F2FS_I(inode)->i_compress_algorithm == COMPRESS_ZSTD) + F2FS_I(inode)->i_compress_level = F2FS_ZSTD_DEFAULT_CLEVEL; + else + F2FS_I(inode)->i_compress_level = 0; + /* Adjust mount option level */ + if (option.algorithm == F2FS_OPTION(sbi).compress_algorithm && + F2FS_OPTION(sbi).compress_level) + F2FS_I(inode)->i_compress_level = F2FS_OPTION(sbi).compress_level; f2fs_mark_inode_dirty_sync(inode, true); if (!f2fs_is_compress_backend_ready(inode)) From 3eebe636cac53886bd5d1cdd55e082ec9e84983f Mon Sep 17 00:00:00 2001 From: Su Hui Date: Sun, 8 Oct 2023 14:39:30 +0800 Subject: [PATCH 0447/1397] f2fs: avoid format-overflow warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e0d4e8acb3789c5a8651061fbab62ca24a45c063 upstream. With gcc and W=1 option, there's a warning like this: fs/f2fs/compress.c: In function ‘f2fs_init_page_array_cache’: fs/f2fs/compress.c:1984:47: error: ‘%u’ directive writing between 1 and 7 bytes into a region of size between 5 and 8 [-Werror=format-overflow=] 1984 | sprintf(slab_name, "f2fs_page_array_entry-%u:%u", MAJOR(dev), MINOR(dev)); | ^~ String "f2fs_page_array_entry-%u:%u" can up to 35. The first "%u" can up to 4 and the second "%u" can up to 7, so total size is "24 + 4 + 7 = 35". slab_name's size should be 35 rather than 32. Cc: stable@vger.kernel.org Signed-off-by: Su Hui Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/compress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index d820801f473e..7514661bbfbb 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1976,7 +1976,7 @@ void f2fs_destroy_compress_inode(struct f2fs_sb_info *sbi) int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi) { dev_t dev = sbi->sb->s_bdev->bd_dev; - char slab_name[32]; + char slab_name[35]; if (!f2fs_sb_has_compression(sbi)) return 0; From d83309e7e006cee8afca83523559017c824fbf7a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 7 Sep 2023 11:11:00 -0700 Subject: [PATCH 0448/1397] f2fs: split initial and dynamic conditions for extent_cache commit f803982190f0265fd36cf84670aa6daefc2b0768 upstream. Let's allocate the extent_cache tree without dynamic conditions to avoid a missing condition causing a panic as below. # create a file w/ a compressed flag # disable the compression # panic while updating extent_cache F2FS-fs (dm-64): Swapfile: last extent is not aligned to section F2FS-fs (dm-64): Swapfile (3) is not align to section: 1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate(2097152 * N) Adding 124996k swap on ./swap-file. Priority:0 extents:2 across:17179494468k ================================================================== BUG: KASAN: null-ptr-deref in instrument_atomic_read_write out/common/include/linux/instrumented.h:101 [inline] BUG: KASAN: null-ptr-deref in atomic_try_cmpxchg_acquire out/common/include/asm-generic/atomic-instrumented.h:705 [inline] BUG: KASAN: null-ptr-deref in queued_write_lock out/common/include/asm-generic/qrwlock.h:92 [inline] BUG: KASAN: null-ptr-deref in __raw_write_lock out/common/include/linux/rwlock_api_smp.h:211 [inline] BUG: KASAN: null-ptr-deref in _raw_write_lock+0x5a/0x110 out/common/kernel/locking/spinlock.c:295 Write of size 4 at addr 0000000000000030 by task syz-executor154/3327 CPU: 0 PID: 3327 Comm: syz-executor154 Tainted: G O 5.10.185 #1 Hardware name: emulation qemu-x86/qemu-x86, BIOS 2023.01-21885-gb3cc1cd24d 01/01/2023 Call Trace: __dump_stack out/common/lib/dump_stack.c:77 [inline] dump_stack_lvl+0x17e/0x1c4 out/common/lib/dump_stack.c:118 __kasan_report+0x16c/0x260 out/common/mm/kasan/report.c:415 kasan_report+0x51/0x70 out/common/mm/kasan/report.c:428 kasan_check_range+0x2f3/0x340 out/common/mm/kasan/generic.c:186 __kasan_check_write+0x14/0x20 out/common/mm/kasan/shadow.c:37 instrument_atomic_read_write out/common/include/linux/instrumented.h:101 [inline] atomic_try_cmpxchg_acquire out/common/include/asm-generic/atomic-instrumented.h:705 [inline] queued_write_lock out/common/include/asm-generic/qrwlock.h:92 [inline] __raw_write_lock out/common/include/linux/rwlock_api_smp.h:211 [inline] _raw_write_lock+0x5a/0x110 out/common/kernel/locking/spinlock.c:295 __drop_extent_tree+0xdf/0x2f0 out/common/fs/f2fs/extent_cache.c:1155 f2fs_drop_extent_tree+0x17/0x30 out/common/fs/f2fs/extent_cache.c:1172 f2fs_insert_range out/common/fs/f2fs/file.c:1600 [inline] f2fs_fallocate+0x19fd/0x1f40 out/common/fs/f2fs/file.c:1764 vfs_fallocate+0x514/0x9b0 out/common/fs/open.c:310 ksys_fallocate out/common/fs/open.c:333 [inline] __do_sys_fallocate out/common/fs/open.c:341 [inline] __se_sys_fallocate out/common/fs/open.c:339 [inline] __x64_sys_fallocate+0xb8/0x100 out/common/fs/open.c:339 do_syscall_64+0x35/0x50 out/common/arch/x86/entry/common.c:46 Cc: stable@vger.kernel.org Fixes: 72840cccc0a1 ("f2fs: allocate the extent_cache by default") Reported-and-tested-by: syzbot+d342e330a37b48c094b7@syzkaller.appspotmail.com Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/extent_cache.c | 53 +++++++++++++++++------------------------- 1 file changed, 21 insertions(+), 32 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 0e2d49140c07..ad8dfac73bd4 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -74,40 +74,14 @@ static void __set_extent_info(struct extent_info *ei, } } -static bool __may_read_extent_tree(struct inode *inode) -{ - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - - if (!test_opt(sbi, READ_EXTENT_CACHE)) - return false; - if (is_inode_flag_set(inode, FI_NO_EXTENT)) - return false; - if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) && - !f2fs_sb_has_readonly(sbi)) - return false; - return S_ISREG(inode->i_mode); -} - -static bool __may_age_extent_tree(struct inode *inode) -{ - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - - if (!test_opt(sbi, AGE_EXTENT_CACHE)) - return false; - if (is_inode_flag_set(inode, FI_COMPRESSED_FILE)) - return false; - if (file_is_cold(inode)) - return false; - - return S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode); -} - static bool __init_may_extent_tree(struct inode *inode, enum extent_type type) { if (type == EX_READ) - return __may_read_extent_tree(inode); - else if (type == EX_BLOCK_AGE) - return __may_age_extent_tree(inode); + return test_opt(F2FS_I_SB(inode), READ_EXTENT_CACHE) && + S_ISREG(inode->i_mode); + if (type == EX_BLOCK_AGE) + return test_opt(F2FS_I_SB(inode), AGE_EXTENT_CACHE) && + (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)); return false; } @@ -120,7 +94,22 @@ static bool __may_extent_tree(struct inode *inode, enum extent_type type) if (list_empty(&F2FS_I_SB(inode)->s_list)) return false; - return __init_may_extent_tree(inode, type); + if (!__init_may_extent_tree(inode, type)) + return false; + + if (type == EX_READ) { + if (is_inode_flag_set(inode, FI_NO_EXTENT)) + return false; + if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) && + !f2fs_sb_has_readonly(F2FS_I_SB(inode))) + return false; + } else if (type == EX_BLOCK_AGE) { + if (is_inode_flag_set(inode, FI_COMPRESSED_FILE)) + return false; + if (file_is_cold(inode)) + return false; + } + return true; } static void __try_update_largest_extent(struct extent_tree *et, From 574bdc51e4893855db973308d8fe4d5b7d56ad43 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Fri, 6 Oct 2023 22:31:52 +0100 Subject: [PATCH 0449/1397] media: lirc: drop trailing space from scancode transmit commit c8a489f820179fb12251e262b50303c29de991ac upstream. When transmitting, infrared drivers expect an odd number of samples; iow without a trailing space. No problems have been observed so far, so this is just belt and braces. Fixes: 9b6192589be7 ("media: lirc: implement scancode sending") Cc: stable@vger.kernel.org Signed-off-by: Sean Young Signed-off-by: Hans Verkuil Signed-off-by: Greg Kroah-Hartman --- drivers/media/rc/lirc_dev.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c index 043d23aaa3cb..a537734832c5 100644 --- a/drivers/media/rc/lirc_dev.c +++ b/drivers/media/rc/lirc_dev.c @@ -276,7 +276,11 @@ static ssize_t lirc_transmit(struct file *file, const char __user *buf, if (ret < 0) goto out_kfree_raw; - count = ret; + /* drop trailing space */ + if (!(ret % 2)) + count = ret - 1; + else + count = ret; txbuf = kmalloc_array(count, sizeof(unsigned int), GFP_KERNEL); if (!txbuf) { From e981ecbac55a023ae7bc390c8512c44e41274f12 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Fri, 6 Oct 2023 12:54:25 +0100 Subject: [PATCH 0450/1397] media: sharp: fix sharp encoding commit 4f7efc71891462ab7606da7039f480d7c1584a13 upstream. The Sharp protocol[1] encoding has incorrect timings for bit space. [1] https://www.sbprojects.net/knowledge/ir/sharp.php Fixes: d35afc5fe097 ("[media] rc: ir-sharp-decoder: Add encode capability") Cc: stable@vger.kernel.org Reported-by: Joe Ferner Closes: https://sourceforge.net/p/lirc/mailman/message/38604507/ Signed-off-by: Sean Young Signed-off-by: Hans Verkuil Signed-off-by: Greg Kroah-Hartman --- drivers/media/rc/ir-sharp-decoder.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/media/rc/ir-sharp-decoder.c b/drivers/media/rc/ir-sharp-decoder.c index 3d8488c39c56..3311099cbd57 100644 --- a/drivers/media/rc/ir-sharp-decoder.c +++ b/drivers/media/rc/ir-sharp-decoder.c @@ -15,7 +15,9 @@ #define SHARP_UNIT 40 /* us */ #define SHARP_BIT_PULSE (8 * SHARP_UNIT) /* 320us */ #define SHARP_BIT_0_PERIOD (25 * SHARP_UNIT) /* 1ms (680us space) */ -#define SHARP_BIT_1_PERIOD (50 * SHARP_UNIT) /* 2ms (1680ms space) */ +#define SHARP_BIT_1_PERIOD (50 * SHARP_UNIT) /* 2ms (1680us space) */ +#define SHARP_BIT_0_SPACE (17 * SHARP_UNIT) /* 680us space */ +#define SHARP_BIT_1_SPACE (42 * SHARP_UNIT) /* 1680us space */ #define SHARP_ECHO_SPACE (1000 * SHARP_UNIT) /* 40 ms */ #define SHARP_TRAILER_SPACE (125 * SHARP_UNIT) /* 5 ms (even longer) */ @@ -168,8 +170,8 @@ static const struct ir_raw_timings_pd ir_sharp_timings = { .header_pulse = 0, .header_space = 0, .bit_pulse = SHARP_BIT_PULSE, - .bit_space[0] = SHARP_BIT_0_PERIOD, - .bit_space[1] = SHARP_BIT_1_PERIOD, + .bit_space[0] = SHARP_BIT_0_SPACE, + .bit_space[1] = SHARP_BIT_1_SPACE, .trailer_pulse = SHARP_BIT_PULSE, .trailer_space = SHARP_ECHO_SPACE, .msb_first = 1, From 47b12dc269d7c4d23818ebffdcc170ab39ddc3d4 Mon Sep 17 00:00:00 2001 From: Vikash Garodia Date: Thu, 10 Aug 2023 07:55:04 +0530 Subject: [PATCH 0451/1397] media: venus: hfi_parser: Add check to keep the number of codecs within range commit 0768a9dd809ef52440b5df7dce5a1c1c7e97abbd upstream. Supported codec bitmask is populated from the payload from venus firmware. There is a possible case when all the bits in the codec bitmask is set. In such case, core cap for decoder is filled and MAX_CODEC_NUM is utilized. Now while filling the caps for encoder, it can lead to access the caps array beyong 32 index. Hence leading to OOB write. The fix counts the supported encoder and decoder. If the count is more than max, then it skips accessing the caps. Cc: stable@vger.kernel.org Fixes: 1a73374a04e5 ("media: venus: hfi_parser: add common capability parser") Signed-off-by: Vikash Garodia Signed-off-by: Stanimir Varbanov Signed-off-by: Hans Verkuil Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/qcom/venus/hfi_parser.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/media/platform/qcom/venus/hfi_parser.c b/drivers/media/platform/qcom/venus/hfi_parser.c index 6cf74b2bc5ae..f28e72fecf95 100644 --- a/drivers/media/platform/qcom/venus/hfi_parser.c +++ b/drivers/media/platform/qcom/venus/hfi_parser.c @@ -19,6 +19,9 @@ static void init_codecs(struct venus_core *core) struct hfi_plat_caps *caps = core->caps, *cap; unsigned long bit; + if (hweight_long(core->dec_codecs) + hweight_long(core->enc_codecs) > MAX_CODEC_NUM) + return; + for_each_set_bit(bit, &core->dec_codecs, MAX_CODEC_NUM) { cap = &caps[core->codecs_count++]; cap->codec = BIT(bit); From db6bd4724356f4046f7b54dd4443cb272bfec6e8 Mon Sep 17 00:00:00 2001 From: Vikash Garodia Date: Thu, 10 Aug 2023 07:55:02 +0530 Subject: [PATCH 0452/1397] media: venus: hfi: fix the check to handle session buffer requirement commit b18e36dfd6c935da60a971310374f3dfec3c82e1 upstream. Buffer requirement, for different buffer type, comes from video firmware. While copying these requirements, there is an OOB possibility when the payload from firmware is more than expected size. Fix the check to avoid the OOB possibility. Cc: stable@vger.kernel.org Fixes: 09c2845e8fe4 ("[media] media: venus: hfi: add Host Firmware Interface (HFI)") Reviewed-by: Nathan Hebert Signed-off-by: Vikash Garodia Signed-off-by: Stanimir Varbanov Signed-off-by: Hans Verkuil Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/qcom/venus/hfi_msgs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/qcom/venus/hfi_msgs.c b/drivers/media/platform/qcom/venus/hfi_msgs.c index 7cab685a2ec8..0a041b4db9ef 100644 --- a/drivers/media/platform/qcom/venus/hfi_msgs.c +++ b/drivers/media/platform/qcom/venus/hfi_msgs.c @@ -398,7 +398,7 @@ session_get_prop_buf_req(struct hfi_msg_session_property_info_pkt *pkt, memcpy(&bufreq[idx], buf_req, sizeof(*bufreq)); idx++; - if (idx > HFI_BUFFER_TYPE_MAX) + if (idx >= HFI_BUFFER_TYPE_MAX) return HFI_ERR_SESSION_INVALID_PARAMETER; req_bytes -= sizeof(struct hfi_buffer_requirements); From 648f39454813fcb59ddac09748b65ca1c347f777 Mon Sep 17 00:00:00 2001 From: Vikash Garodia Date: Thu, 10 Aug 2023 07:55:03 +0530 Subject: [PATCH 0453/1397] media: venus: hfi: add checks to handle capabilities from firmware commit 8d0b89398b7ebc52103e055bf36b60b045f5258f upstream. The hfi parser, parses the capabilities received from venus firmware and copies them to core capabilities. Consider below api, for example, fill_caps - In this api, caps in core structure gets updated with the number of capabilities received in firmware data payload. If the same api is called multiple times, there is a possibility of copying beyond the max allocated size in core caps. Similar possibilities in fill_raw_fmts and fill_profile_level functions. Cc: stable@vger.kernel.org Fixes: 1a73374a04e5 ("media: venus: hfi_parser: add common capability parser") Signed-off-by: Vikash Garodia Signed-off-by: Stanimir Varbanov Signed-off-by: Hans Verkuil Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/qcom/venus/hfi_parser.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/media/platform/qcom/venus/hfi_parser.c b/drivers/media/platform/qcom/venus/hfi_parser.c index f28e72fecf95..c43839539d4d 100644 --- a/drivers/media/platform/qcom/venus/hfi_parser.c +++ b/drivers/media/platform/qcom/venus/hfi_parser.c @@ -89,6 +89,9 @@ static void fill_profile_level(struct hfi_plat_caps *cap, const void *data, { const struct hfi_profile_level *pl = data; + if (cap->num_pl + num >= HFI_MAX_PROFILE_COUNT) + return; + memcpy(&cap->pl[cap->num_pl], pl, num * sizeof(*pl)); cap->num_pl += num; } @@ -114,6 +117,9 @@ fill_caps(struct hfi_plat_caps *cap, const void *data, unsigned int num) { const struct hfi_capability *caps = data; + if (cap->num_caps + num >= MAX_CAP_ENTRIES) + return; + memcpy(&cap->caps[cap->num_caps], caps, num * sizeof(*caps)); cap->num_caps += num; } @@ -140,6 +146,9 @@ static void fill_raw_fmts(struct hfi_plat_caps *cap, const void *fmts, { const struct raw_formats *formats = fmts; + if (cap->num_fmts + num_fmts >= MAX_FMT_ENTRIES) + return; + memcpy(&cap->fmts[cap->num_fmts], formats, num_fmts * sizeof(*formats)); cap->num_fmts += num_fmts; } @@ -162,6 +171,9 @@ parse_raw_formats(struct venus_core *core, u32 codecs, u32 domain, void *data) rawfmts[i].buftype = fmt->buffer_type; i++; + if (i >= MAX_FMT_ENTRIES) + return; + if (pinfo->num_planes > MAX_PLANES) break; From 277fec855bc574c7d45e19862e8b7d07dd58fd36 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Mon, 4 Sep 2023 15:57:37 +0300 Subject: [PATCH 0454/1397] media: ccs: Correctly initialise try compose rectangle commit 724ff68e968b19d786870d333f9952bdd6b119cb upstream. Initialise the try sink compose rectangle size to the sink compose rectangle for binner and scaler sub-devices. This was missed due to the faulty condition that lead to the compose rectangles to be initialised for the pixel array sub-device where it is not relevant. Fixes: ccfc97bdb5ae ("[media] smiapp: Add driver") Cc: stable@vger.kernel.org Signed-off-by: Sakari Ailus Reviewed-by: Laurent Pinchart Signed-off-by: Hans Verkuil Signed-off-by: Greg Kroah-Hartman --- drivers/media/i2c/ccs/ccs-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/i2c/ccs/ccs-core.c b/drivers/media/i2c/ccs/ccs-core.c index 49e0d9a09530..6f8fbd82e21c 100644 --- a/drivers/media/i2c/ccs/ccs-core.c +++ b/drivers/media/i2c/ccs/ccs-core.c @@ -3097,7 +3097,7 @@ static int ccs_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh) try_fmt->code = sensor->internal_csi_format->code; try_fmt->field = V4L2_FIELD_NONE; - if (ssd != sensor->pixel_array) + if (ssd == sensor->pixel_array) continue; try_comp = v4l2_subdev_get_try_compose(sd, fh->state, i); From 658eaf65c3bd186ec1edf2f73b95647b2ce29efa Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 14 Sep 2023 18:53:17 +0300 Subject: [PATCH 0455/1397] drm/mediatek/dp: fix memory leak on ->get_edid callback audio detection commit dab12fa8d2bd3868cf2de485ed15a3feef28a13d upstream. The sads returned by drm_edid_to_sad() needs to be freed. Fixes: e71a8ebbe086 ("drm/mediatek: dp: Audio support for MT8195") Cc: Guillaume Ranquet Cc: Bo-Chen Chen Cc: AngeloGioacchino Del Regno Cc: Dmitry Osipenko Cc: Chun-Kuang Hu Cc: Philipp Zabel Cc: Matthias Brugger Cc: dri-devel@lists.freedesktop.org Cc: linux-mediatek@lists.infradead.org Cc: linux-kernel@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: # v6.1+ Signed-off-by: Jani Nikula Reviewed-by: Chen-Yu Tsai Link: https://patchwork.kernel.org/project/dri-devel/patch/20230914155317.2511876-1-jani.nikula@intel.com/ Signed-off-by: Chun-Kuang Hu Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/mediatek/mtk_dp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/mediatek/mtk_dp.c b/drivers/gpu/drm/mediatek/mtk_dp.c index 2cb47f663756..d669e1c786af 100644 --- a/drivers/gpu/drm/mediatek/mtk_dp.c +++ b/drivers/gpu/drm/mediatek/mtk_dp.c @@ -2034,7 +2034,6 @@ static struct edid *mtk_dp_get_edid(struct drm_bridge *bridge, bool enabled = mtk_dp->enabled; struct edid *new_edid = NULL; struct mtk_dp_audio_cfg *audio_caps = &mtk_dp->info.audio_cur_cfg; - struct cea_sad *sads; if (!enabled) { drm_atomic_bridge_chain_pre_enable(bridge, connector->state->state); @@ -2053,7 +2052,11 @@ static struct edid *mtk_dp_get_edid(struct drm_bridge *bridge, } if (new_edid) { + struct cea_sad *sads; + audio_caps->sad_count = drm_edid_to_sad(new_edid, &sads); + kfree(sads); + audio_caps->detect_monitor = drm_detect_monitor_audio(new_edid); } From 4aa9db1eab76654aedbebe49d336bf6bc8b397d6 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 14 Sep 2023 16:10:58 +0300 Subject: [PATCH 0456/1397] drm/mediatek/dp: fix memory leak on ->get_edid callback error path commit fcaf9761fd5884a64eaac48536f8c27ecfd2e6bc upstream. Setting new_edid to NULL leaks the buffer. Fixes: f70ac097a2cf ("drm/mediatek: Add MT8195 Embedded DisplayPort driver") Cc: Markus Schneider-Pargmann Cc: Guillaume Ranquet Cc: Bo-Chen Chen Cc: CK Hu Cc: AngeloGioacchino Del Regno Cc: Dmitry Osipenko Cc: Chun-Kuang Hu Cc: Philipp Zabel Cc: Matthias Brugger Cc: dri-devel@lists.freedesktop.org Cc: linux-mediatek@lists.infradead.org Cc: linux-kernel@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: # v6.1+ Signed-off-by: Jani Nikula Reviewed-by: Guillaume Ranquet Link: https://patchwork.kernel.org/project/dri-devel/patch/20230914131058.2472260-1-jani.nikula@intel.com/ Signed-off-by: Chun-Kuang Hu Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/mediatek/mtk_dp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/mediatek/mtk_dp.c b/drivers/gpu/drm/mediatek/mtk_dp.c index d669e1c786af..0e285df6577e 100644 --- a/drivers/gpu/drm/mediatek/mtk_dp.c +++ b/drivers/gpu/drm/mediatek/mtk_dp.c @@ -2048,6 +2048,7 @@ static struct edid *mtk_dp_get_edid(struct drm_bridge *bridge, */ if (mtk_dp_parse_capabilities(mtk_dp)) { drm_err(mtk_dp->drm_dev, "Can't parse capabilities\n"); + kfree(new_edid); new_edid = NULL; } From 5be21d6543a2ff987a022b85fbd5079653f5c982 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 17 Nov 2023 18:36:34 +0100 Subject: [PATCH 0457/1397] dm-bufio: fix no-sleep mode commit 2a695062a5a42aead8c539a344168d4806b3fda2 upstream. dm-bufio has a no-sleep mode. When activated (with the DM_BUFIO_CLIENT_NO_SLEEP flag), the bufio client is read-only and we could call dm_bufio_get from tasklets. This is used by dm-verity. Unfortunately, commit 450e8dee51aa ("dm bufio: improve concurrent IO performance") broke this and the kernel would warn that cache_get() was calling down_read() from no-sleeping context. The bug can be reproduced by using "veritysetup open" with the "--use-tasklets" flag. This commit fixes dm-bufio, so that the tasklet mode works again, by expanding use of the 'no_sleep_enabled' static_key to conditionally use either a rw_semaphore or rwlock_t (which are colocated in the buffer_tree structure using a union). Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org # v6.4 Fixes: 450e8dee51aa ("dm bufio: improve concurrent IO performance") Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-bufio.c | 87 ++++++++++++++++++++++++++++++------------- 1 file changed, 62 insertions(+), 25 deletions(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index bc309e41d074..486e1180cc3a 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -254,7 +254,7 @@ enum evict_result { typedef enum evict_result (*le_predicate)(struct lru_entry *le, void *context); -static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *context) +static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *context, bool no_sleep) { unsigned long tested = 0; struct list_head *h = lru->cursor; @@ -295,7 +295,8 @@ static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *con h = h->next; - cond_resched(); + if (!no_sleep) + cond_resched(); } return NULL; @@ -382,7 +383,10 @@ struct dm_buffer { */ struct buffer_tree { - struct rw_semaphore lock; + union { + struct rw_semaphore lock; + rwlock_t spinlock; + } u; struct rb_root root; } ____cacheline_aligned_in_smp; @@ -393,9 +397,12 @@ struct dm_buffer_cache { * on the locks. */ unsigned int num_locks; + bool no_sleep; struct buffer_tree trees[]; }; +static DEFINE_STATIC_KEY_FALSE(no_sleep_enabled); + static inline unsigned int cache_index(sector_t block, unsigned int num_locks) { return dm_hash_locks_index(block, num_locks); @@ -403,22 +410,34 @@ static inline unsigned int cache_index(sector_t block, unsigned int num_locks) static inline void cache_read_lock(struct dm_buffer_cache *bc, sector_t block) { - down_read(&bc->trees[cache_index(block, bc->num_locks)].lock); + if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep) + read_lock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock); + else + down_read(&bc->trees[cache_index(block, bc->num_locks)].u.lock); } static inline void cache_read_unlock(struct dm_buffer_cache *bc, sector_t block) { - up_read(&bc->trees[cache_index(block, bc->num_locks)].lock); + if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep) + read_unlock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock); + else + up_read(&bc->trees[cache_index(block, bc->num_locks)].u.lock); } static inline void cache_write_lock(struct dm_buffer_cache *bc, sector_t block) { - down_write(&bc->trees[cache_index(block, bc->num_locks)].lock); + if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep) + write_lock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock); + else + down_write(&bc->trees[cache_index(block, bc->num_locks)].u.lock); } static inline void cache_write_unlock(struct dm_buffer_cache *bc, sector_t block) { - up_write(&bc->trees[cache_index(block, bc->num_locks)].lock); + if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep) + write_unlock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock); + else + up_write(&bc->trees[cache_index(block, bc->num_locks)].u.lock); } /* @@ -442,18 +461,32 @@ static void lh_init(struct lock_history *lh, struct dm_buffer_cache *cache, bool static void __lh_lock(struct lock_history *lh, unsigned int index) { - if (lh->write) - down_write(&lh->cache->trees[index].lock); - else - down_read(&lh->cache->trees[index].lock); + if (lh->write) { + if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep) + write_lock_bh(&lh->cache->trees[index].u.spinlock); + else + down_write(&lh->cache->trees[index].u.lock); + } else { + if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep) + read_lock_bh(&lh->cache->trees[index].u.spinlock); + else + down_read(&lh->cache->trees[index].u.lock); + } } static void __lh_unlock(struct lock_history *lh, unsigned int index) { - if (lh->write) - up_write(&lh->cache->trees[index].lock); - else - up_read(&lh->cache->trees[index].lock); + if (lh->write) { + if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep) + write_unlock_bh(&lh->cache->trees[index].u.spinlock); + else + up_write(&lh->cache->trees[index].u.lock); + } else { + if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep) + read_unlock_bh(&lh->cache->trees[index].u.spinlock); + else + up_read(&lh->cache->trees[index].u.lock); + } } /* @@ -502,14 +535,18 @@ static struct dm_buffer *list_to_buffer(struct list_head *l) return le_to_buffer(le); } -static void cache_init(struct dm_buffer_cache *bc, unsigned int num_locks) +static void cache_init(struct dm_buffer_cache *bc, unsigned int num_locks, bool no_sleep) { unsigned int i; bc->num_locks = num_locks; + bc->no_sleep = no_sleep; for (i = 0; i < bc->num_locks; i++) { - init_rwsem(&bc->trees[i].lock); + if (no_sleep) + rwlock_init(&bc->trees[i].u.spinlock); + else + init_rwsem(&bc->trees[i].u.lock); bc->trees[i].root = RB_ROOT; } @@ -648,7 +685,7 @@ static struct dm_buffer *__cache_evict(struct dm_buffer_cache *bc, int list_mode struct lru_entry *le; struct dm_buffer *b; - le = lru_evict(&bc->lru[list_mode], __evict_pred, &w); + le = lru_evict(&bc->lru[list_mode], __evict_pred, &w, bc->no_sleep); if (!le) return NULL; @@ -702,7 +739,7 @@ static void __cache_mark_many(struct dm_buffer_cache *bc, int old_mode, int new_ struct evict_wrapper w = {.lh = lh, .pred = pred, .context = context}; while (true) { - le = lru_evict(&bc->lru[old_mode], __evict_pred, &w); + le = lru_evict(&bc->lru[old_mode], __evict_pred, &w, bc->no_sleep); if (!le) break; @@ -915,10 +952,11 @@ static void cache_remove_range(struct dm_buffer_cache *bc, { unsigned int i; + BUG_ON(bc->no_sleep); for (i = 0; i < bc->num_locks; i++) { - down_write(&bc->trees[i].lock); + down_write(&bc->trees[i].u.lock); __remove_range(bc, &bc->trees[i].root, begin, end, pred, release); - up_write(&bc->trees[i].lock); + up_write(&bc->trees[i].u.lock); } } @@ -979,8 +1017,6 @@ struct dm_bufio_client { struct dm_buffer_cache cache; /* must be last member */ }; -static DEFINE_STATIC_KEY_FALSE(no_sleep_enabled); - /*----------------------------------------------------------------*/ #define dm_bufio_in_request() (!!current->bio_list) @@ -1871,7 +1907,8 @@ static void *new_read(struct dm_bufio_client *c, sector_t block, if (need_submit) submit_io(b, REQ_OP_READ, read_endio); - wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE); + if (nf != NF_GET) /* we already tested this condition above */ + wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE); if (b->read_error) { int error = blk_status_to_errno(b->read_error); @@ -2421,7 +2458,7 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign r = -ENOMEM; goto bad_client; } - cache_init(&c->cache, num_locks); + cache_init(&c->cache, num_locks, (flags & DM_BUFIO_CLIENT_NO_SLEEP) != 0); c->bdev = bdev; c->block_size = block_size; From 5798525216e39d2d7c01b85d4394c5f368ce3dfd Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 17 Nov 2023 18:37:25 +0100 Subject: [PATCH 0458/1397] dm-verity: don't use blocking calls from tasklets commit 28f07f2ab4b3a2714f1fefcc58ada4bcc195f806 upstream. The commit 5721d4e5a9cd enhanced dm-verity, so that it can verify blocks from tasklets rather than from workqueues. This reportedly improves performance significantly. However, dm-verity was using the flag CRYPTO_TFM_REQ_MAY_SLEEP from tasklets which resulted in warnings about sleeping function being called from non-sleeping context. BUG: sleeping function called from invalid context at crypto/internal.h:206 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 14, name: ksoftirqd/0 preempt_count: 100, expected: 0 RCU nest depth: 0, expected: 0 CPU: 0 PID: 14 Comm: ksoftirqd/0 Tainted: G W 6.7.0-rc1 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 Call Trace: dump_stack_lvl+0x32/0x50 __might_resched+0x110/0x160 crypto_hash_walk_done+0x54/0xb0 shash_ahash_update+0x51/0x60 verity_hash_update.isra.0+0x4a/0x130 [dm_verity] verity_verify_io+0x165/0x550 [dm_verity] ? free_unref_page+0xdf/0x170 ? psi_group_change+0x113/0x390 verity_tasklet+0xd/0x70 [dm_verity] tasklet_action_common.isra.0+0xb3/0xc0 __do_softirq+0xaf/0x1ec ? smpboot_thread_fn+0x1d/0x200 ? sort_range+0x20/0x20 run_ksoftirqd+0x15/0x30 smpboot_thread_fn+0xed/0x200 kthread+0xdc/0x110 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x28/0x40 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork_asm+0x11/0x20 This commit fixes dm-verity so that it doesn't use the flags CRYPTO_TFM_REQ_MAY_SLEEP and CRYPTO_TFM_REQ_MAY_BACKLOG from tasklets. The crypto API would do GFP_ATOMIC allocation instead, it could return -ENOMEM and we catch -ENOMEM in verity_tasklet and requeue the request to the workqueue. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org # v6.0+ Fixes: 5721d4e5a9cd ("dm verity: Add optional "try_verify_in_tasklet" feature") Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-verity-fec.c | 4 ++-- drivers/md/dm-verity-target.c | 23 ++++++++++++----------- drivers/md/dm-verity.h | 2 +- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index 3ef9f018da60..2099c755119e 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c @@ -185,7 +185,7 @@ static int fec_is_erasure(struct dm_verity *v, struct dm_verity_io *io, { if (unlikely(verity_hash(v, verity_io_hash_req(v, io), data, 1 << v->data_dev_block_bits, - verity_io_real_digest(v, io)))) + verity_io_real_digest(v, io), true))) return 0; return memcmp(verity_io_real_digest(v, io), want_digest, @@ -386,7 +386,7 @@ static int fec_decode_rsb(struct dm_verity *v, struct dm_verity_io *io, /* Always re-validate the corrected block against the expected hash */ r = verity_hash(v, verity_io_hash_req(v, io), fio->output, 1 << v->data_dev_block_bits, - verity_io_real_digest(v, io)); + verity_io_real_digest(v, io), true); if (unlikely(r < 0)) return r; diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 26adcfea0302..e115fcfe723c 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -135,20 +135,21 @@ static int verity_hash_update(struct dm_verity *v, struct ahash_request *req, * Wrapper for crypto_ahash_init, which handles verity salting. */ static int verity_hash_init(struct dm_verity *v, struct ahash_request *req, - struct crypto_wait *wait) + struct crypto_wait *wait, bool may_sleep) { int r; ahash_request_set_tfm(req, v->tfm); - ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP | - CRYPTO_TFM_REQ_MAY_BACKLOG, - crypto_req_done, (void *)wait); + ahash_request_set_callback(req, + may_sleep ? CRYPTO_TFM_REQ_MAY_SLEEP | CRYPTO_TFM_REQ_MAY_BACKLOG : 0, + crypto_req_done, (void *)wait); crypto_init_wait(wait); r = crypto_wait_req(crypto_ahash_init(req), wait); if (unlikely(r < 0)) { - DMERR("crypto_ahash_init failed: %d", r); + if (r != -ENOMEM) + DMERR("crypto_ahash_init failed: %d", r); return r; } @@ -179,12 +180,12 @@ static int verity_hash_final(struct dm_verity *v, struct ahash_request *req, } int verity_hash(struct dm_verity *v, struct ahash_request *req, - const u8 *data, size_t len, u8 *digest) + const u8 *data, size_t len, u8 *digest, bool may_sleep) { int r; struct crypto_wait wait; - r = verity_hash_init(v, req, &wait); + r = verity_hash_init(v, req, &wait, may_sleep); if (unlikely(r < 0)) goto out; @@ -322,7 +323,7 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io, r = verity_hash(v, verity_io_hash_req(v, io), data, 1 << v->hash_dev_block_bits, - verity_io_real_digest(v, io)); + verity_io_real_digest(v, io), !io->in_tasklet); if (unlikely(r < 0)) goto release_ret_r; @@ -556,7 +557,7 @@ static int verity_verify_io(struct dm_verity_io *io) continue; } - r = verity_hash_init(v, req, &wait); + r = verity_hash_init(v, req, &wait, !io->in_tasklet); if (unlikely(r < 0)) return r; @@ -652,7 +653,7 @@ static void verity_tasklet(unsigned long data) io->in_tasklet = true; err = verity_verify_io(io); - if (err == -EAGAIN) { + if (err == -EAGAIN || err == -ENOMEM) { /* fallback to retrying with work-queue */ INIT_WORK(&io->work, verity_work); queue_work(io->v->verify_wq, &io->work); @@ -1033,7 +1034,7 @@ static int verity_alloc_zero_digest(struct dm_verity *v) goto out; r = verity_hash(v, req, zero_data, 1 << v->data_dev_block_bits, - v->zero_digest); + v->zero_digest, true); out: kfree(req); diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index 2f555b420367..f96f4e281ee4 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -128,7 +128,7 @@ extern int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io, u8 *data, size_t len)); extern int verity_hash(struct dm_verity *v, struct ahash_request *req, - const u8 *data, size_t len, u8 *digest); + const u8 *data, size_t len, u8 *digest, bool may_sleep); extern int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io, sector_t block, u8 *digest, bool *is_zero); From 47a9c946d850671c2492bb0cc20f151a3648a6de Mon Sep 17 00:00:00 2001 From: Mahmoud Adam Date: Fri, 10 Nov 2023 19:21:04 +0100 Subject: [PATCH 0459/1397] nfsd: fix file memleak on client_opens_release commit bc1b5acb40201a0746d68a7d7cfc141899937f4f upstream. seq_release should be called to free the allocated seq_file Cc: stable@vger.kernel.org # v5.3+ Signed-off-by: Mahmoud Adam Reviewed-by: Jeff Layton Fixes: 78599c42ae3c ("nfsd4: add file to display list of client's opens") Reviewed-by: NeilBrown Tested-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 8534693eb6a4..529b3ed3b317 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2797,7 +2797,7 @@ static int client_opens_release(struct inode *inode, struct file *file) /* XXX: alternatively, we could get/drop in seq start/stop */ drop_client(clp); - return 0; + return seq_release(inode, file); } static const struct file_operations client_states_fops = { From 68eb0889fd03fb8e7518aeabb5d2064a01a3cfd3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 10 Nov 2023 11:28:39 -0500 Subject: [PATCH 0460/1397] NFSD: Update nfsd_cache_append() to use xdr_stream commit 49cecd8628a9855cd993792a0377559ea32d5e7c upstream. When inserting a DRC-cached response into the reply buffer, ensure that the reply buffer's xdr_stream is updated properly. Otherwise the server will send a garbage response. Cc: stable@vger.kernel.org # v6.3+ Reviewed-by: Jeff Layton Tested-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfscache.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 80621a709510..abb453be71ca 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -640,24 +640,17 @@ void nfsd_cache_update(struct svc_rqst *rqstp, struct nfsd_cacherep *rp, return; } -/* - * Copy cached reply to current reply buffer. Should always fit. - * FIXME as reply is in a page, we should just attach the page, and - * keep a refcount.... - */ static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data) { - struct kvec *vec = &rqstp->rq_res.head[0]; - - if (vec->iov_len + data->iov_len > PAGE_SIZE) { - printk(KERN_WARNING "nfsd: cached reply too large (%zd).\n", - data->iov_len); - return 0; - } - memcpy((char*)vec->iov_base + vec->iov_len, data->iov_base, data->iov_len); - vec->iov_len += data->iov_len; - return 1; + __be32 *p; + + p = xdr_reserve_space(&rqstp->rq_res_stream, data->iov_len); + if (unlikely(!p)) + return false; + memcpy(p, data->iov_base, data->iov_len); + xdr_commit_encode(&rqstp->rq_res_stream); + return true; } /* From cff8bf67f126b1a330ada1a0fd8c98fd69f3cb1c Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 8 Nov 2023 14:12:15 +0800 Subject: [PATCH 0461/1397] LoongArch: Mark __percpu functions as always inline commit 71945968d8b128c955204baa33ec03bdd91bdc26 upstream. A recent change to the optimization pipeline in LLVM reveals some fragility around the inlining of LoongArch's __percpu functions, which manifests as a BUILD_BUG() failure: In file included from kernel/sched/build_policy.c:17: In file included from include/linux/sched/cputime.h:5: In file included from include/linux/sched/signal.h:5: In file included from include/linux/rculist.h:11: In file included from include/linux/rcupdate.h:26: In file included from include/linux/irqflags.h:18: arch/loongarch/include/asm/percpu.h:97:3: error: call to '__compiletime_assert_51' declared with 'error' attribute: BUILD_BUG failed 97 | BUILD_BUG(); | ^ include/linux/build_bug.h:59:21: note: expanded from macro 'BUILD_BUG' 59 | #define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed") | ^ include/linux/build_bug.h:39:37: note: expanded from macro 'BUILD_BUG_ON_MSG' 39 | #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg) | ^ include/linux/compiler_types.h:425:2: note: expanded from macro 'compiletime_assert' 425 | _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) | ^ include/linux/compiler_types.h:413:2: note: expanded from macro '_compiletime_assert' 413 | __compiletime_assert(condition, msg, prefix, suffix) | ^ include/linux/compiler_types.h:406:4: note: expanded from macro '__compiletime_assert' 406 | prefix ## suffix(); \ | ^ :86:1: note: expanded from here 86 | __compiletime_assert_51 | ^ 1 error generated. If these functions are not inlined (which the compiler is free to do even with functions marked with the standard 'inline' keyword), the BUILD_BUG() in the default case cannot be eliminated since the compiler cannot prove it is never used, resulting in a build failure due to the error attribute. Mark these functions as __always_inline to guarantee inlining so that the BUILD_BUG() only triggers when the default case genuinely cannot be eliminated due to an unexpected size. Cc: Closes: https://github.com/ClangBuiltLinux/linux/issues/1955 Fixes: 46859ac8af52 ("LoongArch: Add multi-processor (SMP) support") Link: https://github.com/llvm/llvm-project/commit/1a2e77cf9e11dbf56b5720c607313a566eebb16e Suggested-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Signed-off-by: Huacai Chen Signed-off-by: Greg Kroah-Hartman --- arch/loongarch/include/asm/percpu.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/loongarch/include/asm/percpu.h b/arch/loongarch/include/asm/percpu.h index b9f567e66016..ed5da02b1cf6 100644 --- a/arch/loongarch/include/asm/percpu.h +++ b/arch/loongarch/include/asm/percpu.h @@ -32,7 +32,7 @@ static inline void set_my_cpu_offset(unsigned long off) #define __my_cpu_offset __my_cpu_offset #define PERCPU_OP(op, asm_op, c_op) \ -static inline unsigned long __percpu_##op(void *ptr, \ +static __always_inline unsigned long __percpu_##op(void *ptr, \ unsigned long val, int size) \ { \ unsigned long ret; \ @@ -63,7 +63,7 @@ PERCPU_OP(and, and, &) PERCPU_OP(or, or, |) #undef PERCPU_OP -static inline unsigned long __percpu_read(void *ptr, int size) +static __always_inline unsigned long __percpu_read(void *ptr, int size) { unsigned long ret; @@ -100,7 +100,7 @@ static inline unsigned long __percpu_read(void *ptr, int size) return ret; } -static inline void __percpu_write(void *ptr, unsigned long val, int size) +static __always_inline void __percpu_write(void *ptr, unsigned long val, int size) { switch (size) { case 1: @@ -132,8 +132,8 @@ static inline void __percpu_write(void *ptr, unsigned long val, int size) } } -static inline unsigned long __percpu_xchg(void *ptr, unsigned long val, - int size) +static __always_inline unsigned long __percpu_xchg(void *ptr, unsigned long val, + int size) { switch (size) { case 1: From 0e9a6b8a7d88f28c401b2a9413ffd958faf10570 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Wed, 8 Nov 2023 21:12:39 +0900 Subject: [PATCH 0462/1397] tracing: fprobe-event: Fix to check tracepoint event and return commit ce51e6153f7781bcde0f8bb4c81d6fd85ee422e6 upstream. Fix to check the tracepoint event is not valid with $retval. The commit 08c9306fc2e3 ("tracing/fprobe-event: Assume fprobe is a return event by $retval") introduced automatic return probe conversion with $retval. But since tracepoint event does not support return probe, $retval is not acceptable. Without this fix, ftracetest, tprobe_syntax_errors.tc fails; [22] Tracepoint probe event parser error log check [FAIL] ---- # tail 22-tprobe_syntax_errors.tc-log.mRKroL + ftrace_errlog_check trace_fprobe t kfree ^$retval dynamic_events + printf %s t kfree + wc -c + pos=8 + printf %s t kfree ^$retval + tr -d ^ + command=t kfree $retval + echo Test command: t kfree $retval Test command: t kfree $retval + echo ---- So 't kfree $retval' should fail (tracepoint doesn't support return probe) but passed it. Link: https://lore.kernel.org/all/169944555933.45057.12831706585287704173.stgit@devnote2/ Fixes: 08c9306fc2e3 ("tracing/fprobe-event: Assume fprobe is a return event by $retval") Cc: stable@vger.kernel.org Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_fprobe.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c index 8bfe23af9c73..7d2ddbcfa377 100644 --- a/kernel/trace/trace_fprobe.c +++ b/kernel/trace/trace_fprobe.c @@ -927,11 +927,12 @@ static int parse_symbol_and_return(int argc, const char *argv[], for (i = 2; i < argc; i++) { tmp = strstr(argv[i], "$retval"); if (tmp && !isalnum(tmp[7]) && tmp[7] != '_') { + if (is_tracepoint) { + trace_probe_log_set_index(i); + trace_probe_log_err(tmp - argv[i], RETVAL_ON_PROBE); + return -EINVAL; + } *is_return = true; - /* - * NOTE: Don't check is_tracepoint here, because it will - * be checked when the argument is parsed. - */ break; } } From 6d6ab317502cd0bf30893806fae180f7b735b1fc Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Thu, 2 Nov 2023 10:36:49 +0100 Subject: [PATCH 0463/1397] swiotlb: do not free decrypted pages if dynamic commit a5e3b127455d073f146a2a4ea3e7117635d34c5c upstream. Fix these two error paths: 1. When set_memory_decrypted() fails, pages may be left fully or partially decrypted. 2. Decrypted pages may be freed if swiotlb_alloc_tlb() determines that the physical address is too high. To fix the first issue, call set_memory_encrypted() on the allocated region after a failed decryption attempt. If that also fails, leak the pages. To fix the second issue, check that the TLB physical address is below the requested limit before decrypting. Let the caller differentiate between unsuitable physical address (=> retry from a lower zone) and allocation failures (=> no point in retrying). Cc: stable@vger.kernel.org Fixes: 79636caad361 ("swiotlb: if swiotlb is full, fall back to a transient memory pool") Signed-off-by: Petr Tesarik Reviewed-by: Rick Edgecombe Signed-off-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- kernel/dma/swiotlb.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index dff067bd56b1..0e1632f75421 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -558,29 +558,40 @@ void __init swiotlb_exit(void) * alloc_dma_pages() - allocate pages to be used for DMA * @gfp: GFP flags for the allocation. * @bytes: Size of the buffer. + * @phys_limit: Maximum allowed physical address of the buffer. * * Allocate pages from the buddy allocator. If successful, make the allocated * pages decrypted that they can be used for DMA. * - * Return: Decrypted pages, or %NULL on failure. + * Return: Decrypted pages, %NULL on allocation failure, or ERR_PTR(-EAGAIN) + * if the allocated physical address was above @phys_limit. */ -static struct page *alloc_dma_pages(gfp_t gfp, size_t bytes) +static struct page *alloc_dma_pages(gfp_t gfp, size_t bytes, u64 phys_limit) { unsigned int order = get_order(bytes); struct page *page; + phys_addr_t paddr; void *vaddr; page = alloc_pages(gfp, order); if (!page) return NULL; - vaddr = page_address(page); + paddr = page_to_phys(page); + if (paddr + bytes - 1 > phys_limit) { + __free_pages(page, order); + return ERR_PTR(-EAGAIN); + } + + vaddr = phys_to_virt(paddr); if (set_memory_decrypted((unsigned long)vaddr, PFN_UP(bytes))) goto error; return page; error: - __free_pages(page, order); + /* Intentional leak if pages cannot be encrypted again. */ + if (!set_memory_encrypted((unsigned long)vaddr, PFN_UP(bytes))) + __free_pages(page, order); return NULL; } @@ -618,11 +629,7 @@ static struct page *swiotlb_alloc_tlb(struct device *dev, size_t bytes, else if (phys_limit <= DMA_BIT_MASK(32)) gfp |= __GFP_DMA32; - while ((page = alloc_dma_pages(gfp, bytes)) && - page_to_phys(page) + bytes - 1 > phys_limit) { - /* allocated, but too high */ - __free_pages(page, get_order(bytes)); - + while (IS_ERR(page = alloc_dma_pages(gfp, bytes, phys_limit))) { if (IS_ENABLED(CONFIG_ZONE_DMA32) && phys_limit < DMA_BIT_MASK(64) && !(gfp & (__GFP_DMA32 | __GFP_DMA))) From ce7612496a4ba6068bc68aa1fa9d947dadb4ad9b Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Wed, 8 Nov 2023 12:12:49 +0100 Subject: [PATCH 0464/1397] swiotlb: fix out-of-bounds TLB allocations with CONFIG_SWIOTLB_DYNAMIC commit 53c87e846e335e3c18044c397cc35178163d7827 upstream. Limit the free list length to the size of the IO TLB. Transient pool can be smaller than IO_TLB_SEGSIZE, but the free list is initialized with the assumption that the total number of slots is a multiple of IO_TLB_SEGSIZE. As a result, swiotlb_area_find_slots() may allocate slots past the end of a transient IO TLB buffer. Reported-by: Niklas Schnelle Closes: https://lore.kernel.org/linux-iommu/104a8c8fedffd1ff8a2890983e2ec1c26bff6810.camel@linux.ibm.com/ Fixes: 79636caad361 ("swiotlb: if swiotlb is full, fall back to a transient memory pool") Cc: stable@vger.kernel.org Signed-off-by: Petr Tesarik Reviewed-by: Halil Pasic Signed-off-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- kernel/dma/swiotlb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 0e1632f75421..2048194a03be 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -283,7 +283,8 @@ static void swiotlb_init_io_tlb_pool(struct io_tlb_pool *mem, phys_addr_t start, } for (i = 0; i < mem->nslabs; i++) { - mem->slots[i].list = IO_TLB_SEGSIZE - io_tlb_offset(i); + mem->slots[i].list = min(IO_TLB_SEGSIZE - io_tlb_offset(i), + mem->nslabs - i); mem->slots[i].orig_addr = INVALID_PHYS_ADDR; mem->slots[i].alloc_size = 0; } From 7aa18f77e1c2075c06829efa9903c0962b2f954a Mon Sep 17 00:00:00 2001 From: Minda Chen Date: Wed, 2 Aug 2023 14:42:15 +0800 Subject: [PATCH 0465/1397] riscv: Using TOOLCHAIN_HAS_ZIHINTPAUSE marco replace zihintpause commit dd16ac404a685cce07e67261a94c6225d90ea7ba upstream. Actually it is a part of Conor's commit aae538cd03bc ("riscv: fix detection of toolchain Zihintpause support"). It is looks like a merge issue. Samuel's commit 0b1d60d6dd9e ("riscv: Fix build with CONFIG_CC_OPTIMIZE_FOR_SIZE=y") do not base on Conor's commit and revert to __riscv_zihintpause. So this patch can fix it. Signed-off-by: Minda Chen Fixes: 3c349eacc559 ("Merge patch "riscv: Fix build with CONFIG_CC_OPTIMIZE_FOR_SIZE=y"") Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20230802064215.31111-1-minda.chen@starfivetech.com Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: Greg Kroah-Hartman --- arch/riscv/include/asm/vdso/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/vdso/processor.h b/arch/riscv/include/asm/vdso/processor.h index 14f5d27783b8..96b65a5396df 100644 --- a/arch/riscv/include/asm/vdso/processor.h +++ b/arch/riscv/include/asm/vdso/processor.h @@ -14,7 +14,7 @@ static inline void cpu_relax(void) __asm__ __volatile__ ("div %0, %0, zero" : "=r" (dummy)); #endif -#ifdef __riscv_zihintpause +#ifdef CONFIG_TOOLCHAIN_HAS_ZIHINTPAUSE /* * Reduce instruction retirement. * This assumes the PC changes. From a08b414d1ecec1ddc22b2d87f5c189aab1b3097b Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Mon, 21 Aug 2023 16:57:09 +0200 Subject: [PATCH 0466/1397] riscv: put interrupt entries into .irqentry.text commit 87615e95f6f9ccd36d4a3905a2d87f91967ea9d2 upstream. The interrupt entries are expected to be in the .irqentry.text section. For example, for kprobes to work properly, exception code cannot be probed; this is ensured by blacklisting addresses in the .irqentry.text section. Fixes: 7db91e57a0ac ("RISC-V: Task implementation") Signed-off-by: Nam Cao Link: https://lore.kernel.org/r/20230821145708.21270-1-namcaov@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: Greg Kroah-Hartman --- arch/riscv/kernel/entry.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 3d11aa3af105..278d01d2911f 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -16,6 +16,8 @@ #include #include + .section .irqentry.text, "ax" + SYM_CODE_START(handle_exception) /* * If coming from userspace, preserve the user thread pointer and load From bf564cf9cac7d9e72c1455fc015f0b81408c3703 Mon Sep 17 00:00:00 2001 From: Song Shuai Date: Wed, 9 Aug 2023 11:10:23 +0800 Subject: [PATCH 0467/1397] riscv: mm: Update the comment of CONFIG_PAGE_OFFSET commit 559fe94a449cba5b50a7cffea60474b385598c00 upstream. Since the commit 011f09d12052 set sv57 as default for CONFIG_64BIT, the comment of CONFIG_PAGE_OFFSET should be updated too. Fixes: 011f09d12052 ("riscv: mm: Set sv57 on defaultly") Signed-off-by: Song Shuai Link: https://lore.kernel.org/r/20230809031023.3575407-1-songshuaishuai@tinylab.org Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: Greg Kroah-Hartman --- arch/riscv/include/asm/page.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index 5488ecc337b6..57e887bfa34c 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -33,8 +33,8 @@ #define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL) #endif /* - * By default, CONFIG_PAGE_OFFSET value corresponds to SV48 address space so - * define the PAGE_OFFSET value for SV39. + * By default, CONFIG_PAGE_OFFSET value corresponds to SV57 address space so + * define the PAGE_OFFSET value for SV48 and SV39. */ #define PAGE_OFFSET_L4 _AC(0xffffaf8000000000, UL) #define PAGE_OFFSET_L3 _AC(0xffffffd800000000, UL) From 2c8b096a575000605da454d498369f5e15e9739b Mon Sep 17 00:00:00 2001 From: Song Shuai Date: Tue, 29 Aug 2023 21:39:20 -0700 Subject: [PATCH 0468/1397] riscv: correct pt_level name via pgtable_l5/4_enabled commit e59e5e2754bf983fc58ad18f99b5eec01f1a0745 upstream. The pt_level uses CONFIG_PGTABLE_LEVELS to display page table names. But if page mode is downgraded from kernel cmdline or restricted by the hardware in 64BIT, it will give a wrong name. Like, using no4lvl for sv39, ptdump named the 1G-mapping as "PUD" that should be "PGD": 0xffffffd840000000-0xffffffd900000000 0x00000000c0000000 3G PUD D A G . . W R V So select "P4D/PUD" or "PGD" via pgtable_l5/4_enabled to correct it. Fixes: e8a62cc26ddf ("riscv: Implement sv48 support") Reviewed-by: Alexandre Ghiti Signed-off-by: Song Shuai Link: https://lore.kernel.org/r/20230712115740.943324-1-suagrfillet@gmail.com Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230830044129.11481-3-palmer@rivosinc.com Signed-off-by: Palmer Dabbelt Signed-off-by: Greg Kroah-Hartman --- arch/riscv/mm/ptdump.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c index 20a9f991a6d7..e9090b38f811 100644 --- a/arch/riscv/mm/ptdump.c +++ b/arch/riscv/mm/ptdump.c @@ -384,6 +384,9 @@ static int __init ptdump_init(void) kernel_ptd_info.base_addr = KERN_VIRT_START; + pg_level[1].name = pgtable_l5_enabled ? "P4D" : "PGD"; + pg_level[2].name = pgtable_l4_enabled ? "PUD" : "PGD"; + for (i = 0; i < ARRAY_SIZE(pg_level); i++) for (j = 0; j < ARRAY_SIZE(pte_bits); j++) pg_level[i].mask |= pte_bits[j].mask; From b59f21292e1858477d4007d9645678b88ae17abf Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Tue, 29 Aug 2023 20:25:00 +0200 Subject: [PATCH 0469/1397] riscv: kprobes: allow writing to x0 commit 8cb22bec142624d21bc85ff96b7bad10b6220e6a upstream. Instructions can write to x0, so we should simulate these instructions normally. Currently, the kernel hangs if an instruction who writes to x0 is simulated. Fixes: c22b0bcb1dd0 ("riscv: Add kprobes supported") Cc: stable@vger.kernel.org Signed-off-by: Nam Cao Reviewed-by: Charlie Jenkins Acked-by: Guo Ren Link: https://lore.kernel.org/r/20230829182500.61875-1-namcaov@gmail.com Signed-off-by: Palmer Dabbelt Signed-off-by: Greg Kroah-Hartman --- arch/riscv/kernel/probes/simulate-insn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/probes/simulate-insn.c b/arch/riscv/kernel/probes/simulate-insn.c index d3099d67816d..6c166029079c 100644 --- a/arch/riscv/kernel/probes/simulate-insn.c +++ b/arch/riscv/kernel/probes/simulate-insn.c @@ -24,7 +24,7 @@ static inline bool rv_insn_reg_set_val(struct pt_regs *regs, u32 index, unsigned long val) { if (index == 0) - return false; + return true; else if (index <= 31) *((unsigned long *)regs + index) = val; else From cd93b7952c631ae0d122343ce7cb6c35edfa1ced Mon Sep 17 00:00:00 2001 From: Victor Shih Date: Tue, 12 Sep 2023 17:17:10 +0800 Subject: [PATCH 0470/1397] mmc: sdhci-pci-gli: A workaround to allow GL9750 to enter ASPM L1.2 commit d7133797e9e1b72fd89237f68cb36d745599ed86 upstream. When GL9750 enters ASPM L1 sub-states, it will stay at L1.1 and will not enter L1.2. The workaround is to toggle PM state to allow GL9750 to enter ASPM L1.2. Signed-off-by: Victor Shih Link: https://lore.kernel.org/r/20230912091710.7797-1-victorshihgli@gmail.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-pci-gli.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/mmc/host/sdhci-pci-gli.c b/drivers/mmc/host/sdhci-pci-gli.c index 6f4dea4efa39..044b4704d5bb 100644 --- a/drivers/mmc/host/sdhci-pci-gli.c +++ b/drivers/mmc/host/sdhci-pci-gli.c @@ -25,6 +25,9 @@ #define GLI_9750_WT_EN_ON 0x1 #define GLI_9750_WT_EN_OFF 0x0 +#define PCI_GLI_9750_PM_CTRL 0xFC +#define PCI_GLI_9750_PM_STATE GENMASK(1, 0) + #define SDHCI_GLI_9750_CFG2 0x848 #define SDHCI_GLI_9750_CFG2_L1DLY GENMASK(28, 24) #define GLI_9750_CFG2_L1DLY_VALUE 0x1F @@ -539,8 +542,12 @@ static void sdhci_gl9750_set_clock(struct sdhci_host *host, unsigned int clock) static void gl9750_hw_setting(struct sdhci_host *host) { + struct sdhci_pci_slot *slot = sdhci_priv(host); + struct pci_dev *pdev; u32 value; + pdev = slot->chip->pdev; + gl9750_wt_on(host); value = sdhci_readl(host, SDHCI_GLI_9750_CFG2); @@ -550,6 +557,13 @@ static void gl9750_hw_setting(struct sdhci_host *host) GLI_9750_CFG2_L1DLY_VALUE); sdhci_writel(host, value, SDHCI_GLI_9750_CFG2); + /* toggle PM state to allow GL9750 to enter ASPM L1.2 */ + pci_read_config_dword(pdev, PCI_GLI_9750_PM_CTRL, &value); + value |= PCI_GLI_9750_PM_STATE; + pci_write_config_dword(pdev, PCI_GLI_9750_PM_CTRL, value); + value &= ~PCI_GLI_9750_PM_STATE; + pci_write_config_dword(pdev, PCI_GLI_9750_PM_CTRL, value); + gl9750_wt_off(host); } From 9feb80bea42025ec47f4f70ce5cf7ac30016f754 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Mon, 6 Nov 2023 10:19:18 -0800 Subject: [PATCH 0471/1397] mm: fix for negative counter: nr_file_hugepages commit a48d5bdc877b85201e42cef9c2fdf5378164c23a upstream. While qualifiying the 6.4 release, the following warning was detected in messages: vmstat_refresh: nr_file_hugepages -15664 The warning is caused by the incorrect updating of the NR_FILE_THPS counter in the function split_huge_page_to_list. The if case is checking for folio_test_swapbacked, but the else case is missing the check for folio_test_pmd_mappable. The other functions that manipulate the counter like __filemap_add_folio and filemap_unaccount_folio have the corresponding check. I have a test case, which reproduces the problem. It can be found here: https://github.com/sroeschus/testcase/blob/main/vmstat_refresh/madv.c The test case reproduces on an XFS filesystem. Running the same test case on a BTRFS filesystem does not reproduce the problem. AFAIK version 6.1 until 6.6 are affected by this problem. [akpm@linux-foundation.org: whitespace fix] [shr@devkernel.io: test for folio_test_pmd_mappable()] Link: https://lkml.kernel.org/r/20231108171517.2436103-1-shr@devkernel.io Link: https://lkml.kernel.org/r/20231106181918.1091043-1-shr@devkernel.io Signed-off-by: Stefan Roesch Co-debugged-by: Johannes Weiner Acked-by: Johannes Weiner Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand Reviewed-by: Yang Shi Cc: Rik van Riel Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/huge_memory.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 064fbd90822b..874000f97bfc 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2737,13 +2737,15 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) int nr = folio_nr_pages(folio); xas_split(&xas, folio, folio_order(folio)); - if (folio_test_swapbacked(folio)) { - __lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, - -nr); - } else { - __lruvec_stat_mod_folio(folio, NR_FILE_THPS, - -nr); - filemap_nr_thps_dec(mapping); + if (folio_test_pmd_mappable(folio)) { + if (folio_test_swapbacked(folio)) { + __lruvec_stat_mod_folio(folio, + NR_SHMEM_THPS, -nr); + } else { + __lruvec_stat_mod_folio(folio, + NR_FILE_THPS, -nr); + filemap_nr_thps_dec(mapping); + } } } From 572d3e8b57f17a3b6bc738dd64730ef7ce85676b Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 7 Nov 2023 09:18:02 -0800 Subject: [PATCH 0472/1397] mm: kmem: drop __GFP_NOFAIL when allocating objcg vectors commit 24948e3b7b12e0031a6edb4f49bbb9fb2ad1e4e9 upstream. Objcg vectors attached to slab pages to store slab object ownership information are allocated using gfp flags for the original slab allocation. Depending on slab page order and the size of slab objects, objcg vector can take several pages. If the original allocation was done with the __GFP_NOFAIL flag, it triggered a warning in the page allocation code. Indeed, order > 1 pages should not been allocated with the __GFP_NOFAIL flag. Fix this by simply dropping the __GFP_NOFAIL flag when allocating the objcg vector. It effectively allows to skip the accounting of a single slab object under a heavy memory pressure. An alternative would be to implement the mechanism to fallback to order-0 allocations for accounting metadata, which is also not perfect because it will increase performance penalty and memory footprint of the kernel memory accounting under memory pressure. Link: https://lkml.kernel.org/r/ZUp8ZFGxwmCx4ZFr@P9FQF9L96D.corp.robot.car Signed-off-by: Roman Gushchin Reported-by: Christoph Lameter Closes: https://lkml.kernel.org/r/6b42243e-f197-600a-5d22-56bd728a5ad8@gentwo.org Acked-by: Shakeel Butt Cc: Matthew Wilcox Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/memcontrol.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 5b009b233ab8..8a881ab21f6c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2864,7 +2864,8 @@ static void commit_charge(struct folio *folio, struct mem_cgroup *memcg) * Moreover, it should not come from DMA buffer and is not readily * reclaimable. So those GFP bits should be masked off. */ -#define OBJCGS_CLEAR_MASK (__GFP_DMA | __GFP_RECLAIMABLE | __GFP_ACCOUNT) +#define OBJCGS_CLEAR_MASK (__GFP_DMA | __GFP_RECLAIMABLE | \ + __GFP_ACCOUNT | __GFP_NOFAIL) /* * mod_objcg_mlstate() may be called with irq enabled, so From 57ced2eb77343a91d28f4a73675b05fe7b555def Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 14 Nov 2023 00:16:13 +0100 Subject: [PATCH 0473/1397] mptcp: deal with large GSO size commit 9fce92f050f448a0d1ddd9083ef967d9930f1e52 upstream. After the blamed commit below, the TCP sockets (and the MPTCP subflows) can build egress packets larger than 64K. That exceeds the maximum DSS data size, the length being misrepresent on the wire and the stream being corrupted, as later observed on the receiver: WARNING: CPU: 0 PID: 9696 at net/mptcp/protocol.c:705 __mptcp_move_skbs_from_subflow+0x2604/0x26e0 CPU: 0 PID: 9696 Comm: syz-executor.7 Not tainted 6.6.0-rc5-gcd8bdf563d46 #45 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014 netlink: 8 bytes leftover after parsing attributes in process `syz-executor.4'. RIP: 0010:__mptcp_move_skbs_from_subflow+0x2604/0x26e0 net/mptcp/protocol.c:705 RSP: 0018:ffffc90000006e80 EFLAGS: 00010246 RAX: ffffffff83e9f674 RBX: ffff88802f45d870 RCX: ffff888102ad0000 netlink: 8 bytes leftover after parsing attributes in process `syz-executor.4'. RDX: 0000000080000303 RSI: 0000000000013908 RDI: 0000000000003908 RBP: ffffc90000007110 R08: ffffffff83e9e078 R09: 1ffff1100e548c8a R10: dffffc0000000000 R11: ffffed100e548c8b R12: 0000000000013908 R13: dffffc0000000000 R14: 0000000000003908 R15: 000000000031cf29 FS: 00007f239c47e700(0000) GS:ffff88811b200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f239c45cd78 CR3: 000000006a66c006 CR4: 0000000000770ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000600 PKRU: 55555554 Call Trace: mptcp_data_ready+0x263/0xac0 net/mptcp/protocol.c:819 subflow_data_ready+0x268/0x6d0 net/mptcp/subflow.c:1409 tcp_data_queue+0x21a1/0x7a60 net/ipv4/tcp_input.c:5151 tcp_rcv_established+0x950/0x1d90 net/ipv4/tcp_input.c:6098 tcp_v6_do_rcv+0x554/0x12f0 net/ipv6/tcp_ipv6.c:1483 tcp_v6_rcv+0x2e26/0x3810 net/ipv6/tcp_ipv6.c:1749 ip6_protocol_deliver_rcu+0xd6b/0x1ae0 net/ipv6/ip6_input.c:438 ip6_input+0x1c5/0x470 net/ipv6/ip6_input.c:483 ipv6_rcv+0xef/0x2c0 include/linux/netfilter.h:304 __netif_receive_skb+0x1ea/0x6a0 net/core/dev.c:5532 process_backlog+0x353/0x660 net/core/dev.c:5974 __napi_poll+0xc6/0x5a0 net/core/dev.c:6536 net_rx_action+0x6a0/0xfd0 net/core/dev.c:6603 __do_softirq+0x184/0x524 kernel/softirq.c:553 do_softirq+0xdd/0x130 kernel/softirq.c:454 Address the issue explicitly bounding the maximum GSO size to what MPTCP actually allows. Reported-by: Christoph Paasch Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/450 Fixes: 7c4e983c4f3c ("net: allow gso_max_size to exceed 65536") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts Link: https://lore.kernel.org/r/20231114-upstream-net-20231113-mptcp-misc-fixes-6-7-rc2-v1-1-7b9cd6a7b7f4@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/mptcp/protocol.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 886ab689a8ae..c1527f520dce 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1231,6 +1231,8 @@ static void mptcp_update_infinite_map(struct mptcp_sock *msk, mptcp_do_fallback(ssk); } +#define MPTCP_MAX_GSO_SIZE (GSO_LEGACY_MAX_SIZE - (MAX_TCP_HEADER + 1)) + static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, struct mptcp_data_frag *dfrag, struct mptcp_sendmsg_info *info) @@ -1257,6 +1259,8 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, return -EAGAIN; /* compute send limit */ + if (unlikely(ssk->sk_gso_max_size > MPTCP_MAX_GSO_SIZE)) + ssk->sk_gso_max_size = MPTCP_MAX_GSO_SIZE; info->mss_now = tcp_send_mss(ssk, &info->size_goal, info->flags); copy = info->size_goal; From 43fa3b0d7bc2a7f0679c153e485110e5e35cfcf2 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 14 Nov 2023 00:16:15 +0100 Subject: [PATCH 0474/1397] mptcp: add validity check for sending RM_ADDR commit 8df220b29282e8b450ea57be62e1eccd4996837c upstream. This patch adds the validity check for sending RM_ADDRs for userspace PM in mptcp_pm_remove_addrs(), only send a RM_ADDR when the address is in the anno_list or conn_list. Fixes: 8b1c94da1e48 ("mptcp: only send RM_ADDR in nl_cmd_remove") Cc: stable@vger.kernel.org Signed-off-by: Geliang Tang Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts Link: https://lore.kernel.org/r/20231114-upstream-net-20231113-mptcp-misc-fixes-6-7-rc2-v1-3-7b9cd6a7b7f4@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/mptcp/pm_netlink.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 9661f3812682..3011bc378462 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1538,8 +1538,9 @@ void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list) struct mptcp_pm_addr_entry *entry; list_for_each_entry(entry, rm_list, list) { - remove_anno_list_by_saddr(msk, &entry->addr); - if (alist.nr < MPTCP_RM_IDS_MAX) + if ((remove_anno_list_by_saddr(msk, &entry->addr) || + lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) && + alist.nr < MPTCP_RM_IDS_MAX) alist.ids[alist.nr++] = entry->addr.id; } From dfac7f073b2b86267dc1e2f975de43edf84f21ef Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 14 Nov 2023 00:16:16 +0100 Subject: [PATCH 0475/1397] mptcp: fix setsockopt(IP_TOS) subflow locking commit 7679d34f97b7a09fd565f5729f79fd61b7c55329 upstream. The MPTCP implementation of the IP_TOS socket option uses the lockless variant of the TOS manipulation helper and does not hold such lock at the helper invocation time. Add the required locking. Fixes: ffcacff87cd6 ("mptcp: Support for IP_TOS for MPTCP setsockopt()") Cc: stable@vger.kernel.org Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/457 Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts Link: https://lore.kernel.org/r/20231114-upstream-net-20231113-mptcp-misc-fixes-6-7-rc2-v1-4-7b9cd6a7b7f4@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/mptcp/sockopt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 8260202c0066..7539b9c8c2fb 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -737,8 +737,11 @@ static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname, val = inet_sk(sk)->tos; mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + bool slow; + slow = lock_sock_fast(ssk); __ip_sock_set_tos(ssk, val); + unlock_sock_fast(ssk, slow); } release_sock(sk); From 55dd38166c5b875abe290bb9c6445e0c3b945b2c Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 14 Nov 2023 00:16:17 +0100 Subject: [PATCH 0476/1397] selftests: mptcp: fix fastclose with csum failure commit 7cefbe5e1dacc7236caa77e9d072423f21422fe2 upstream. Running the mp_join selftest manually with the following command line: ./mptcp_join.sh -z -C leads to some failures: 002 fastclose server test # ... rtx [fail] got 1 MP_RST[s] TX expected 0 # ... rstrx [fail] got 1 MP_RST[s] RX expected 0 The problem is really in the wrong expectations for the RST checks implied by the csum validation. Note that the same check is repeated explicitly in the same test-case, with the correct expectation and pass successfully. Address the issue explicitly setting the correct expectation for the failing checks. Reported-by: Xiumei Mu Fixes: 6bf41020b72b ("selftests: mptcp: update and extend fastclose test-cases") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Link: https://lore.kernel.org/r/20231114-upstream-net-20231113-mptcp-misc-fixes-6-7-rc2-v1-5-7b9cd6a7b7f4@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 5917a74b749d..8eec7d2c1fc6 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -3237,7 +3237,7 @@ fastclose_tests() if reset_check_counter "fastclose server test" "MPTcpExtMPFastcloseRx"; then test_linkfail=1024 fastclose=server \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 0 0 0 + chk_join_nr 0 0 0 0 0 0 1 chk_fclose_nr 1 1 invert chk_rst_nr 1 1 fi From a51260968b80af88713d14d9cc5df1c1e81f6077 Mon Sep 17 00:00:00 2001 From: ChunHao Lin Date: Fri, 10 Nov 2023 01:34:00 +0800 Subject: [PATCH 0477/1397] r8169: fix network lost after resume on DASH systems commit 868c3b95afef4883bfb66c9397482da6840b5baf upstream. Device that support DASH may be reseted or powered off during suspend. So driver needs to handle DASH during system suspend and resume. Or DASH firmware will influence device behavior and causes network lost. Fixes: b646d90053f8 ("r8169: magic.") Cc: stable@vger.kernel.org Reviewed-by: Heiner Kallweit Signed-off-by: ChunHao Lin Link: https://lore.kernel.org/r/20231109173400.4573-3-hau@realtek.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/realtek/r8169_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 0c76c162b8a9..ad9c54fef33f 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -4648,10 +4648,16 @@ static void rtl8169_down(struct rtl8169_private *tp) rtl8169_cleanup(tp); rtl_disable_exit_l1(tp); rtl_prepare_power_down(tp); + + if (tp->dash_type != RTL_DASH_NONE) + rtl8168_driver_stop(tp); } static void rtl8169_up(struct rtl8169_private *tp) { + if (tp->dash_type != RTL_DASH_NONE) + rtl8168_driver_start(tp); + pci_set_master(tp->pci_dev); phy_init_hw(tp->phydev); phy_resume(tp->phydev); From c61d525f3d471bd9970b2775bba32b05f15fb3c9 Mon Sep 17 00:00:00 2001 From: ChunHao Lin Date: Fri, 10 Nov 2023 01:33:59 +0800 Subject: [PATCH 0478/1397] r8169: add handling DASH when DASH is disabled commit 0ab0c45d8aaea5192328bfa6989673aceafc767c upstream. For devices that support DASH, even DASH is disabled, there may still exist a default firmware that will influence device behavior. So driver needs to handle DASH for devices that support DASH, no matter the DASH status is. This patch also prepares for "fix network lost after resume on DASH systems". Fixes: ee7a1beb9759 ("r8169:call "rtl8168_driver_start" "rtl8168_driver_stop" only when hardware dash function is enabled") Cc: stable@vger.kernel.org Signed-off-by: ChunHao Lin Reviewed-by: Heiner Kallweit Link: https://lore.kernel.org/r/20231109173400.4573-2-hau@realtek.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/realtek/r8169_main.c | 36 ++++++++++++++++------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index ad9c54fef33f..b9bb1d2f0237 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -624,6 +624,7 @@ struct rtl8169_private { unsigned supports_gmii:1; unsigned aspm_manageable:1; + unsigned dash_enabled:1; dma_addr_t counters_phys_addr; struct rtl8169_counters *counters; struct rtl8169_tc_offsets tc_offset; @@ -1253,14 +1254,26 @@ static bool r8168ep_check_dash(struct rtl8169_private *tp) return r8168ep_ocp_read(tp, 0x128) & BIT(0); } -static enum rtl_dash_type rtl_check_dash(struct rtl8169_private *tp) +static bool rtl_dash_is_enabled(struct rtl8169_private *tp) +{ + switch (tp->dash_type) { + case RTL_DASH_DP: + return r8168dp_check_dash(tp); + case RTL_DASH_EP: + return r8168ep_check_dash(tp); + default: + return false; + } +} + +static enum rtl_dash_type rtl_get_dash_type(struct rtl8169_private *tp) { switch (tp->mac_version) { case RTL_GIGA_MAC_VER_28: case RTL_GIGA_MAC_VER_31: - return r8168dp_check_dash(tp) ? RTL_DASH_DP : RTL_DASH_NONE; + return RTL_DASH_DP; case RTL_GIGA_MAC_VER_51 ... RTL_GIGA_MAC_VER_53: - return r8168ep_check_dash(tp) ? RTL_DASH_EP : RTL_DASH_NONE; + return RTL_DASH_EP; default: return RTL_DASH_NONE; } @@ -1453,7 +1466,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts) device_set_wakeup_enable(tp_to_dev(tp), wolopts); - if (tp->dash_type == RTL_DASH_NONE) { + if (!tp->dash_enabled) { rtl_set_d3_pll_down(tp, !wolopts); tp->dev->wol_enabled = wolopts ? 1 : 0; } @@ -2512,7 +2525,7 @@ static void rtl_wol_enable_rx(struct rtl8169_private *tp) static void rtl_prepare_power_down(struct rtl8169_private *tp) { - if (tp->dash_type != RTL_DASH_NONE) + if (tp->dash_enabled) return; if (tp->mac_version == RTL_GIGA_MAC_VER_32 || @@ -4875,7 +4888,7 @@ static int rtl8169_runtime_idle(struct device *device) { struct rtl8169_private *tp = dev_get_drvdata(device); - if (tp->dash_type != RTL_DASH_NONE) + if (tp->dash_enabled) return -EBUSY; if (!netif_running(tp->dev) || !netif_carrier_ok(tp->dev)) @@ -4901,8 +4914,7 @@ static void rtl_shutdown(struct pci_dev *pdev) /* Restore original MAC address */ rtl_rar_set(tp, tp->dev->perm_addr); - if (system_state == SYSTEM_POWER_OFF && - tp->dash_type == RTL_DASH_NONE) { + if (system_state == SYSTEM_POWER_OFF && !tp->dash_enabled) { pci_wake_from_d3(pdev, tp->saved_wolopts); pci_set_power_state(pdev, PCI_D3hot); } @@ -5260,7 +5272,8 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1); tp->aspm_manageable = !rc; - tp->dash_type = rtl_check_dash(tp); + tp->dash_type = rtl_get_dash_type(tp); + tp->dash_enabled = rtl_dash_is_enabled(tp); tp->cp_cmd = RTL_R16(tp, CPlusCmd) & CPCMD_MASK; @@ -5331,7 +5344,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* configure chip for default features */ rtl8169_set_features(dev, dev->features); - if (tp->dash_type == RTL_DASH_NONE) { + if (!tp->dash_enabled) { rtl_set_d3_pll_down(tp, true); } else { rtl_set_d3_pll_down(tp, false); @@ -5371,7 +5384,8 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) "ok" : "ko"); if (tp->dash_type != RTL_DASH_NONE) { - netdev_info(dev, "DASH enabled\n"); + netdev_info(dev, "DASH %s\n", + tp->dash_enabled ? "enabled" : "disabled"); rtl8168_driver_start(tp); } From ea67257276720eaabb6109f0bd149a743cf3dcd1 Mon Sep 17 00:00:00 2001 From: Victor Shih Date: Tue, 7 Nov 2023 17:57:40 +0800 Subject: [PATCH 0479/1397] mmc: sdhci-pci-gli: GL9750: Mask the replay timer timeout of AER commit 015c9cbcf0ad709079117d27c2094a46e0eadcdb upstream. Due to a flaw in the hardware design, the GL9750 replay timer frequently times out when ASPM is enabled. As a result, the warning messages will often appear in the system log when the system accesses the GL9750 PCI config. Therefore, the replay timer timeout must be masked. Fixes: d7133797e9e1 ("mmc: sdhci-pci-gli: A workaround to allow GL9750 to enter ASPM L1.2") Signed-off-by: Victor Shih Acked-by: Adrian Hunter Acked-by: Kai-Heng Feng Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231107095741.8832-2-victorshihgli@gmail.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-pci-gli.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/mmc/host/sdhci-pci-gli.c b/drivers/mmc/host/sdhci-pci-gli.c index 044b4704d5bb..d8a991b349a8 100644 --- a/drivers/mmc/host/sdhci-pci-gli.c +++ b/drivers/mmc/host/sdhci-pci-gli.c @@ -28,6 +28,9 @@ #define PCI_GLI_9750_PM_CTRL 0xFC #define PCI_GLI_9750_PM_STATE GENMASK(1, 0) +#define PCI_GLI_9750_CORRERR_MASK 0x214 +#define PCI_GLI_9750_CORRERR_MASK_REPLAY_TIMER_TIMEOUT BIT(12) + #define SDHCI_GLI_9750_CFG2 0x848 #define SDHCI_GLI_9750_CFG2_L1DLY GENMASK(28, 24) #define GLI_9750_CFG2_L1DLY_VALUE 0x1F @@ -564,6 +567,11 @@ static void gl9750_hw_setting(struct sdhci_host *host) value &= ~PCI_GLI_9750_PM_STATE; pci_write_config_dword(pdev, PCI_GLI_9750_PM_CTRL, value); + /* mask the replay timer timeout of AER */ + pci_read_config_dword(pdev, PCI_GLI_9750_CORRERR_MASK, &value); + value |= PCI_GLI_9750_CORRERR_MASK_REPLAY_TIMER_TIMEOUT; + pci_write_config_dword(pdev, PCI_GLI_9750_CORRERR_MASK, value); + gl9750_wt_off(host); } From 35c9b41dc83ffadb25ea6cc1cc68e25df7bef614 Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Wed, 30 Aug 2023 16:16:06 +0100 Subject: [PATCH 0480/1397] media: qcom: camss: Fix pm_domain_on sequence in probe commit 7405116519ad70b8c7340359bfac8db8279e7ce4 upstream. We need to make sure camss_configure_pd() happens before camss_register_entities() as the vfe_get() path relies on the pointer provided by camss_configure_pd(). Fix the ordering sequence in probe to ensure the pointers vfe_get() demands are present by the time camss_register_entities() runs. In order to facilitate backporting to stable kernels I've moved the configure_pd() call pretty early on the probe() function so that irrespective of the existence of the old error handling jump labels this patch should still apply to -next circa Aug 2023 to v5.13 inclusive. Fixes: 2f6f8af67203 ("media: camss: Refactor VFE power domain toggling") Cc: stable@vger.kernel.org Signed-off-by: Bryan O'Donoghue Reviewed-by: Laurent Pinchart Signed-off-by: Hans Verkuil Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/qcom/camss/camss.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/media/platform/qcom/camss/camss.c b/drivers/media/platform/qcom/camss/camss.c index f11dc59135a5..75991d849b57 100644 --- a/drivers/media/platform/qcom/camss/camss.c +++ b/drivers/media/platform/qcom/camss/camss.c @@ -1619,6 +1619,12 @@ static int camss_probe(struct platform_device *pdev) if (ret < 0) goto err_cleanup; + ret = camss_configure_pd(camss); + if (ret < 0) { + dev_err(dev, "Failed to configure power domains: %d\n", ret); + goto err_cleanup; + } + ret = camss_init_subdevices(camss); if (ret < 0) goto err_cleanup; @@ -1678,12 +1684,6 @@ static int camss_probe(struct platform_device *pdev) } } - ret = camss_configure_pd(camss); - if (ret < 0) { - dev_err(dev, "Failed to configure power domains: %d\n", ret); - return ret; - } - pm_runtime_enable(dev); return 0; From 816a4070bc6468e93867995e9a12176dbd91e430 Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Wed, 30 Aug 2023 16:16:09 +0100 Subject: [PATCH 0481/1397] media: qcom: camss: Fix vfe_get() error jump commit 26bda3da00c3edef727a6acb00ed2eb4b22f8723 upstream. Right now it is possible to do a vfe_get() with the internal reference count at 1. If vfe_check_clock_rates() returns non-zero then we will leave the reference count as-is and run: - pm_runtime_put_sync() - vfe->ops->pm_domain_off() skip: - camss_disable_clocks() Subsequent vfe_put() calls will when the ref-count is non-zero unconditionally run: - pm_runtime_put_sync() - vfe->ops->pm_domain_off() - camss_disable_clocks() vfe_get() should not attempt to roll-back on error when the ref-count is non-zero as the upper layers will still do their own vfe_put() operations. vfe_put() will drop the reference count and do the necessary power domain release, the cleanup jumps in vfe_get() should only be run when the ref-count is zero. [ 50.095796] CPU: 7 PID: 3075 Comm: cam Not tainted 6.3.2+ #80 [ 50.095798] Hardware name: LENOVO 21BXCTO1WW/21BXCTO1WW, BIOS N3HET82W (1.54 ) 05/26/2023 [ 50.095799] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 50.095802] pc : refcount_warn_saturate+0xf4/0x148 [ 50.095804] lr : refcount_warn_saturate+0xf4/0x148 [ 50.095805] sp : ffff80000c7cb8b0 [ 50.095806] x29: ffff80000c7cb8b0 x28: ffff16ecc0e3fc10 x27: 0000000000000000 [ 50.095810] x26: 0000000000000000 x25: 0000000000020802 x24: 0000000000000000 [ 50.095813] x23: ffff16ecc7360640 x22: 00000000ffffffff x21: 0000000000000005 [ 50.095815] x20: ffff16ed175f4400 x19: ffffb4d9852942a8 x18: ffffffffffffffff [ 50.095818] x17: ffffb4d9852d4a48 x16: ffffb4d983da5db8 x15: ffff80000c7cb320 [ 50.095821] x14: 0000000000000001 x13: 2e656572662d7265 x12: 7466612d65737520 [ 50.095823] x11: 00000000ffffefff x10: ffffb4d9850cebf0 x9 : ffffb4d9835cf954 [ 50.095826] x8 : 0000000000017fe8 x7 : c0000000ffffefff x6 : 0000000000057fa8 [ 50.095829] x5 : ffff16f813fe3d08 x4 : 0000000000000000 x3 : ffff621e8f4d2000 [ 50.095832] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff16ed32119040 [ 50.095835] Call trace: [ 50.095836] refcount_warn_saturate+0xf4/0x148 [ 50.095838] device_link_put_kref+0x84/0xc8 [ 50.095843] device_link_del+0x38/0x58 [ 50.095846] vfe_pm_domain_off+0x3c/0x50 [qcom_camss] [ 50.095860] vfe_put+0x114/0x140 [qcom_camss] [ 50.095869] csid_set_power+0x2c8/0x408 [qcom_camss] [ 50.095878] pipeline_pm_power_one+0x164/0x170 [videodev] [ 50.095896] pipeline_pm_power+0xc4/0x110 [videodev] [ 50.095909] v4l2_pipeline_pm_use+0x5c/0xa0 [videodev] [ 50.095923] v4l2_pipeline_pm_get+0x1c/0x30 [videodev] [ 50.095937] video_open+0x7c/0x100 [qcom_camss] [ 50.095945] v4l2_open+0x84/0x130 [videodev] [ 50.095960] chrdev_open+0xc8/0x250 [ 50.095964] do_dentry_open+0x1bc/0x498 [ 50.095966] vfs_open+0x34/0x40 [ 50.095968] path_openat+0xb44/0xf20 [ 50.095971] do_filp_open+0xa4/0x160 [ 50.095974] do_sys_openat2+0xc8/0x188 [ 50.095975] __arm64_sys_openat+0x6c/0xb8 [ 50.095977] invoke_syscall+0x50/0x128 [ 50.095982] el0_svc_common.constprop.0+0x4c/0x100 [ 50.095985] do_el0_svc+0x40/0xa8 [ 50.095988] el0_svc+0x2c/0x88 [ 50.095991] el0t_64_sync_handler+0xf4/0x120 [ 50.095994] el0t_64_sync+0x190/0x198 [ 50.095996] ---[ end trace 0000000000000000 ]--- Fixes: 779096916dae ("media: camss: vfe: Fix runtime PM imbalance on error") Cc: stable@vger.kernel.org Signed-off-by: Bryan O'Donoghue Reviewed-by: Laurent Pinchart Signed-off-by: Hans Verkuil Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/qcom/camss/camss-vfe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/qcom/camss/camss-vfe.c b/drivers/media/platform/qcom/camss/camss-vfe.c index 06c95568e5af..5d8462ec0af2 100644 --- a/drivers/media/platform/qcom/camss/camss-vfe.c +++ b/drivers/media/platform/qcom/camss/camss-vfe.c @@ -611,7 +611,7 @@ int vfe_get(struct vfe_device *vfe) } else { ret = vfe_check_clock_rates(vfe); if (ret < 0) - goto error_pm_runtime_get; + goto error_pm_domain; } vfe->power_count++; From b1eaec007b6038e729402dba9276d2413c701dba Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Wed, 30 Aug 2023 16:16:10 +0100 Subject: [PATCH 0482/1397] media: qcom: camss: Fix VFE-17x vfe_disable_output() commit 3143ad282fc08bf995ee73e32a9e40c527bf265d upstream. There are two problems with the current vfe_disable_output() routine. Firstly we rightly use a spinlock to protect output->gen2.active_num everywhere except for in the IDLE timeout path of vfe_disable_output(). Even if that is not racy "in practice" somehow it is by happenstance not by design. Secondly we do not get consistent behaviour from this routine. On sc8280xp 50% of the time I get "VFE idle timeout - resetting". In this case the subsequent capture will succeed. The other 50% of the time, we don't hit the idle timeout, never do the VFE reset and subsequent captures stall indefinitely. Rewrite the vfe_disable_output() routine to - Quiesce write masters with vfe_wm_stop() - Set active_num = 0 remembering to hold the spinlock when we do so followed by - Reset the VFE Testing on sc8280xp and sdm845 shows this to be a valid fix. Fixes: 7319cdf189bb ("media: camss: Add support for VFE hardware version Titan 170") Cc: stable@vger.kernel.org Signed-off-by: Bryan O'Donoghue Signed-off-by: Hans Verkuil Signed-off-by: Greg Kroah-Hartman --- .../media/platform/qcom/camss/camss-vfe-170.c | 22 +++---------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/drivers/media/platform/qcom/camss/camss-vfe-170.c b/drivers/media/platform/qcom/camss/camss-vfe-170.c index 02494c89da91..168baaa80d4e 100644 --- a/drivers/media/platform/qcom/camss/camss-vfe-170.c +++ b/drivers/media/platform/qcom/camss/camss-vfe-170.c @@ -7,7 +7,6 @@ * Copyright (C) 2020-2021 Linaro Ltd. */ -#include #include #include #include @@ -494,35 +493,20 @@ static int vfe_enable_output(struct vfe_line *line) return 0; } -static int vfe_disable_output(struct vfe_line *line) +static void vfe_disable_output(struct vfe_line *line) { struct vfe_device *vfe = to_vfe(line); struct vfe_output *output = &line->output; unsigned long flags; unsigned int i; - bool done; - int timeout = 0; - - do { - spin_lock_irqsave(&vfe->output_lock, flags); - done = !output->gen2.active_num; - spin_unlock_irqrestore(&vfe->output_lock, flags); - usleep_range(10000, 20000); - - if (timeout++ == 100) { - dev_err(vfe->camss->dev, "VFE idle timeout - resetting\n"); - vfe_reset(vfe); - output->gen2.active_num = 0; - return 0; - } - } while (!done); spin_lock_irqsave(&vfe->output_lock, flags); for (i = 0; i < output->wm_num; i++) vfe_wm_stop(vfe, output->wm_idx[i]); + output->gen2.active_num = 0; spin_unlock_irqrestore(&vfe->output_lock, flags); - return 0; + vfe_reset(vfe); } /* From 4dac469b07d4ef631379cd329cd6c0d70bdc09f3 Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Wed, 30 Aug 2023 16:16:11 +0100 Subject: [PATCH 0483/1397] media: qcom: camss: Fix VFE-480 vfe_disable_output() commit 7f24d291350426d40b36dfbe6b3090617cdfd37a upstream. vfe-480 is copied from vfe-17x and has the same racy idle timeout bug as in 17x. Fix the vfe_disable_output() logic to no longer be racy and to conform to the 17x way of quiescing and then resetting the VFE. Fixes: 4edc8eae715c ("media: camss: Add initial support for VFE hardware version Titan 480") Cc: stable@vger.kernel.org Signed-off-by: Bryan O'Donoghue Signed-off-by: Hans Verkuil Signed-off-by: Greg Kroah-Hartman --- .../media/platform/qcom/camss/camss-vfe-480.c | 22 +++---------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/drivers/media/platform/qcom/camss/camss-vfe-480.c b/drivers/media/platform/qcom/camss/camss-vfe-480.c index f70aad2e8c23..8ddb8016434a 100644 --- a/drivers/media/platform/qcom/camss/camss-vfe-480.c +++ b/drivers/media/platform/qcom/camss/camss-vfe-480.c @@ -8,7 +8,6 @@ * Copyright (C) 2021 Jonathan Marek */ -#include #include #include #include @@ -328,35 +327,20 @@ static int vfe_enable_output(struct vfe_line *line) return 0; } -static int vfe_disable_output(struct vfe_line *line) +static void vfe_disable_output(struct vfe_line *line) { struct vfe_device *vfe = to_vfe(line); struct vfe_output *output = &line->output; unsigned long flags; unsigned int i; - bool done; - int timeout = 0; - - do { - spin_lock_irqsave(&vfe->output_lock, flags); - done = !output->gen2.active_num; - spin_unlock_irqrestore(&vfe->output_lock, flags); - usleep_range(10000, 20000); - - if (timeout++ == 100) { - dev_err(vfe->camss->dev, "VFE idle timeout - resetting\n"); - vfe_reset(vfe); - output->gen2.active_num = 0; - return 0; - } - } while (!done); spin_lock_irqsave(&vfe->output_lock, flags); for (i = 0; i < output->wm_num; i++) vfe_wm_stop(vfe, output->wm_idx[i]); + output->gen2.active_num = 0; spin_unlock_irqrestore(&vfe->output_lock, flags); - return 0; + vfe_reset(vfe); } /* From f6cc8265ebf10822d61fe8424b8b0d271fb833f8 Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Wed, 30 Aug 2023 16:16:12 +0100 Subject: [PATCH 0484/1397] media: qcom: camss: Fix missing vfe_lite clocks check commit b6e1bdca463a932c1ac02caa7d3e14bf39288e0c upstream. check_clock doesn't account for vfe_lite which means that vfe_lite will never get validated by this routine. Add the clock name to the expected set to remediate. Fixes: 7319cdf189bb ("media: camss: Add support for VFE hardware version Titan 170") Cc: stable@vger.kernel.org Signed-off-by: Bryan O'Donoghue Reviewed-by: Laurent Pinchart Signed-off-by: Hans Verkuil Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/qcom/camss/camss-vfe.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/media/platform/qcom/camss/camss-vfe.c b/drivers/media/platform/qcom/camss/camss-vfe.c index 5d8462ec0af2..965500b83d07 100644 --- a/drivers/media/platform/qcom/camss/camss-vfe.c +++ b/drivers/media/platform/qcom/camss/camss-vfe.c @@ -535,7 +535,8 @@ static int vfe_check_clock_rates(struct vfe_device *vfe) struct camss_clock *clock = &vfe->clock[i]; if (!strcmp(clock->name, "vfe0") || - !strcmp(clock->name, "vfe1")) { + !strcmp(clock->name, "vfe1") || + !strcmp(clock->name, "vfe_lite")) { u64 min_rate = 0; unsigned long rate; From 5fbdccc9f8f3828071845ed3de6d69033d2097f5 Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Wed, 30 Aug 2023 16:16:14 +0100 Subject: [PATCH 0485/1397] media: qcom: camss: Fix set CSI2_RX_CFG1_VC_MODE when VC is greater than 3 commit e655d1ae9703286cef7fda8675cad62f649dc183 upstream. VC_MODE = 0 implies a two bit VC address. VC_MODE = 1 is required for VCs with a larger address than two bits. Fixes: eebe6d00e9bf ("media: camss: Add support for CSID hardware version Titan 170") Cc: stable@vger.kernel.org Signed-off-by: Bryan O'Donoghue Reviewed-by: Laurent Pinchart Signed-off-by: Hans Verkuil Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/qcom/camss/camss-csid-gen2.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/media/platform/qcom/camss/camss-csid-gen2.c b/drivers/media/platform/qcom/camss/camss-csid-gen2.c index 0f8ac29d038d..efc68f8b4de9 100644 --- a/drivers/media/platform/qcom/camss/camss-csid-gen2.c +++ b/drivers/media/platform/qcom/camss/camss-csid-gen2.c @@ -449,6 +449,8 @@ static void __csid_configure_stream(struct csid_device *csid, u8 enable, u8 vc) writel_relaxed(val, csid->base + CSID_CSI2_RX_CFG0); val = 1 << CSI2_RX_CFG1_PACKET_ECC_CORRECTION_EN; + if (vc > 3) + val |= 1 << CSI2_RX_CFG1_VC_MODE; val |= 1 << CSI2_RX_CFG1_MISR_EN; writel_relaxed(val, csid->base + CSID_CSI2_RX_CFG1); From e09e1ddc693509f2b2d151017fd8666e13470eda Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Wed, 30 Aug 2023 16:16:13 +0100 Subject: [PATCH 0486/1397] media: qcom: camss: Fix invalid clock enable bit disjunction commit d8f7e1a60d01739a1d78db2b08603089c6cf7c8e upstream. define CSIPHY_3PH_CMN_CSI_COMMON_CTRL5_CLK_ENABLE BIT(7) disjunction for gen2 ? BIT(7) : is a nop we are setting the same bit either way. Fixes: 4abb21309fda ("media: camss: csiphy: Move to hardcode CSI Clock Lane number") Cc: stable@vger.kernel.org Signed-off-by: Bryan O'Donoghue Reviewed-by: Konrad Dybcio Reviewed-by: Laurent Pinchart Signed-off-by: Hans Verkuil Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/qcom/camss/camss-csiphy-3ph-1-0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/qcom/camss/camss-csiphy-3ph-1-0.c b/drivers/media/platform/qcom/camss/camss-csiphy-3ph-1-0.c index 04baa80494c6..4dba61b8d3f2 100644 --- a/drivers/media/platform/qcom/camss/camss-csiphy-3ph-1-0.c +++ b/drivers/media/platform/qcom/camss/camss-csiphy-3ph-1-0.c @@ -476,7 +476,7 @@ static void csiphy_lanes_enable(struct csiphy_device *csiphy, settle_cnt = csiphy_settle_cnt_calc(link_freq, csiphy->timer_clk_rate); - val = is_gen2 ? BIT(7) : CSIPHY_3PH_CMN_CSI_COMMON_CTRL5_CLK_ENABLE; + val = CSIPHY_3PH_CMN_CSI_COMMON_CTRL5_CLK_ENABLE; for (i = 0; i < c->num_data; i++) val |= BIT(c->data[i].pos * 2); From ed9f1ea9188017ba31727ba3a1ed58cdff2daf6f Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 30 Aug 2023 16:16:15 +0100 Subject: [PATCH 0487/1397] media: qcom: camss: Fix csid-gen2 for test pattern generator commit 87889f1b7ea40d2544b49c62092e6ef2792dced7 upstream. In the current driver csid Test Pattern Generator (TPG) doesn't work. This change: - fixes writing frame width and height values into CSID_TPG_DT_n_CFG_0 - fixes the shift by one between test_pattern control value and the actual pattern. - drops fixed VC of 0x0a which testing showed prohibited some test patterns in the CSID to produce output. So that TPG starts working, but with the below limitations: - only test_pattern=9 works as it should - test_pattern=8 and test_pattern=7 produce black frame (all zeroes) - the rest of test_pattern's don't work (yavta doesn't get the data) - regardless of the CFA pattern set by 'media-ctl -V' the actual pixel order is always the same (RGGB for any RAW8 or RAW10P format in 4608x2592 resolution). Tested with: RAW10P format, VC0: media-ctl -V '"msm_csid0":0[fmt:SRGGB10/4608x2592 field:none]' media-ctl -V '"msm_vfe0_rdi0":0[fmt:SRGGB10/4608x2592 field:none]' media-ctl -l '"msm_csid0":1->"msm_vfe0_rdi0":0[1]' v4l2-ctl -d /dev/v4l-subdev6 -c test_pattern=9 yavta -B capture-mplane --capture=3 -n 3 -f SRGGB10P -s 4608x2592 /dev/video0 RAW10P format, VC1: media-ctl -V '"msm_csid0":2[fmt:SRGGB10/4608x2592 field:none]' media-ctl -V '"msm_vfe0_rdi1":0[fmt:SRGGB10/4608x2592 field:none]' media-ctl -l '"msm_csid0":2->"msm_vfe0_rdi1":0[1]' v4l2-ctl -d /dev/v4l-subdev6 -c test_pattern=9 yavta -B capture-mplane --capture=3 -n 3 -f SRGGB10P -s 4608x2592 /dev/video1 RAW8 format, VC0: media-ctl --reset media-ctl -V '"msm_csid0":0[fmt:SRGGB8/4608x2592 field:none]' media-ctl -V '"msm_vfe0_rdi0":0[fmt:SRGGB8/4608x2592 field:none]' media-ctl -l '"msm_csid0":1->"msm_vfe0_rdi0":0[1]' yavta -B capture-mplane --capture=3 -n 3 -f SRGGB8 -s 4608x2592 /dev/video0 Fixes: eebe6d00e9bf ("media: camss: Add support for CSID hardware version Titan 170") Cc: stable@vger.kernel.org Signed-off-by: Andrey Konovalov Signed-off-by: Bryan O'Donoghue Reviewed-by: Laurent Pinchart Signed-off-by: Hans Verkuil Signed-off-by: Greg Kroah-Hartman --- drivers/media/platform/qcom/camss/camss-csid-gen2.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/media/platform/qcom/camss/camss-csid-gen2.c b/drivers/media/platform/qcom/camss/camss-csid-gen2.c index efc68f8b4de9..23acc387be5f 100644 --- a/drivers/media/platform/qcom/camss/camss-csid-gen2.c +++ b/drivers/media/platform/qcom/camss/camss-csid-gen2.c @@ -355,9 +355,6 @@ static void __csid_configure_stream(struct csid_device *csid, u8 enable, u8 vc) u8 dt_id = vc; if (tg->enabled) { - /* Config Test Generator */ - vc = 0xa; - /* configure one DT, infinite frames */ val = vc << TPG_VC_CFG0_VC_NUM; val |= INTELEAVING_MODE_ONE_SHOT << TPG_VC_CFG0_LINE_INTERLEAVING_MODE; @@ -370,14 +367,14 @@ static void __csid_configure_stream(struct csid_device *csid, u8 enable, u8 vc) writel_relaxed(0x12345678, csid->base + CSID_TPG_LFSR_SEED); - val = input_format->height & 0x1fff << TPG_DT_n_CFG_0_FRAME_HEIGHT; - val |= input_format->width & 0x1fff << TPG_DT_n_CFG_0_FRAME_WIDTH; + val = (input_format->height & 0x1fff) << TPG_DT_n_CFG_0_FRAME_HEIGHT; + val |= (input_format->width & 0x1fff) << TPG_DT_n_CFG_0_FRAME_WIDTH; writel_relaxed(val, csid->base + CSID_TPG_DT_n_CFG_0(0)); val = format->data_type << TPG_DT_n_CFG_1_DATA_TYPE; writel_relaxed(val, csid->base + CSID_TPG_DT_n_CFG_1(0)); - val = tg->mode << TPG_DT_n_CFG_2_PAYLOAD_MODE; + val = (tg->mode - 1) << TPG_DT_n_CFG_2_PAYLOAD_MODE; val |= 0xBE << TPG_DT_n_CFG_2_USER_SPECIFIED_PAYLOAD; val |= format->decode_format << TPG_DT_n_CFG_2_ENCODE_FORMAT; writel_relaxed(val, csid->base + CSID_TPG_DT_n_CFG_2(0)); From a302879747f8f5bf6ff578acf877f310e0cd3f2f Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 21 Nov 2023 14:38:11 +0100 Subject: [PATCH 0488/1397] Revert "HID: logitech-dj: Add support for a new lightspeed receiver iteration" commit 5b4ffb176d7979ac66b349addf3f7de433335e00 upstream. This reverts commit 9d1bd9346241cd6963b58da7ffb7ed303285f684. Multiple people reported misbehaving devices and reverting this commit fixes the problem for them. As soon as the original commit author starts reacting again, we can try to figure out why he hasn't seen the issues (mismatching report descriptors?), but for the time being, fix for 6.7 by reverting. Link: https://bugzilla.kernel.org/show_bug.cgi?id=218172 Link: https://bugzilla.kernel.org/show_bug.cgi?id=218094 Cc: Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-ids.h | 1 - drivers/hid/hid-logitech-dj.c | 11 +++-------- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index f7973ccd84a2..d10ccfa17e16 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -869,7 +869,6 @@ #define USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_2 0xc534 #define USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1 0xc539 #define USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1_1 0xc53f -#define USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1_2 0xc547 #define USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_POWERPLAY 0xc53a #define USB_DEVICE_ID_SPACETRAVELLER 0xc623 #define USB_DEVICE_ID_SPACENAVIGATOR 0xc626 diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c index 8afe3be683ba..e6a8b6d8eab7 100644 --- a/drivers/hid/hid-logitech-dj.c +++ b/drivers/hid/hid-logitech-dj.c @@ -1695,12 +1695,11 @@ static int logi_dj_raw_event(struct hid_device *hdev, } /* * Mouse-only receivers send unnumbered mouse data. The 27 MHz - * receiver uses 6 byte packets, the nano receiver 8 bytes, - * the lightspeed receiver (Pro X Superlight) 13 bytes. + * receiver uses 6 byte packets, the nano receiver 8 bytes. */ if (djrcv_dev->unnumbered_application == HID_GD_MOUSE && - size <= 13){ - u8 mouse_report[14]; + size <= 8) { + u8 mouse_report[9]; /* Prepend report id */ mouse_report[0] = REPORT_TYPE_MOUSE; @@ -1984,10 +1983,6 @@ static const struct hid_device_id logi_dj_receivers[] = { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1_1), .driver_data = recvr_type_gaming_hidpp}, - { /* Logitech lightspeed receiver (0xc547) */ - HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, - USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1_2), - .driver_data = recvr_type_gaming_hidpp}, { /* Logitech 27 MHz HID++ 1.0 receiver (0xc513) */ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_MX3000_RECEIVER), From 0c7a3af88e8af85f5cdaf45a005d029f94db41f3 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Tue, 21 Nov 2023 09:09:33 +0100 Subject: [PATCH 0489/1397] Revert "net: r8169: Disable multicast filter for RTL8168H and RTL8107E" commit 6a26310273c323380da21eb23fcfd50e31140913 upstream. This reverts commit efa5f1311c4998e9e6317c52bc5ee93b3a0f36df. I couldn't reproduce the reported issue. What I did, based on a pcap packet log provided by the reporter: - Used same chip version (RTL8168h) - Set MAC address to the one used on the reporters system - Replayed the EAPOL unicast packet that, according to the reporter, was filtered out by the mc filter. The packet was properly received. Therefore the root cause of the reported issue seems to be somewhere else. Disabling mc filtering completely for the most common chip version is a quite big hammer. Therefore revert the change and wait for further analysis results from the reporter. Cc: stable@vger.kernel.org Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/realtek/r8169_main.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index b9bb1d2f0237..295366a85c63 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -2599,9 +2599,7 @@ static void rtl_set_rx_mode(struct net_device *dev) rx_mode &= ~AcceptMulticast; } else if (netdev_mc_count(dev) > MC_FILTER_LIMIT || dev->flags & IFF_ALLMULTI || - tp->mac_version == RTL_GIGA_MAC_VER_35 || - tp->mac_version == RTL_GIGA_MAC_VER_46 || - tp->mac_version == RTL_GIGA_MAC_VER_48) { + tp->mac_version == RTL_GIGA_MAC_VER_35) { /* accept all multicasts */ } else if (netdev_mc_empty(dev)) { rx_mode &= ~AcceptMulticast; From 904fa65c26ab9f98b6a9ff25a8996081dfc1792d Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Wed, 24 May 2023 15:25:38 +0800 Subject: [PATCH 0490/1397] ext4: fix race between writepages and remount commit 745f17a4166e79315e4b7f33ce89d03e75a76983 upstream. We got a WARNING in ext4_add_complete_io: ================================================================== WARNING: at fs/ext4/page-io.c:231 ext4_put_io_end_defer+0x182/0x250 CPU: 10 PID: 77 Comm: ksoftirqd/10 Tainted: 6.3.0-rc2 #85 RIP: 0010:ext4_put_io_end_defer+0x182/0x250 [ext4] [...] Call Trace: ext4_end_bio+0xa8/0x240 [ext4] bio_endio+0x195/0x310 blk_update_request+0x184/0x770 scsi_end_request+0x2f/0x240 scsi_io_completion+0x75/0x450 scsi_finish_command+0xef/0x160 scsi_complete+0xa3/0x180 blk_complete_reqs+0x60/0x80 blk_done_softirq+0x25/0x40 __do_softirq+0x119/0x4c8 run_ksoftirqd+0x42/0x70 smpboot_thread_fn+0x136/0x3c0 kthread+0x140/0x1a0 ret_from_fork+0x2c/0x50 ================================================================== Above issue may happen as follows: cpu1 cpu2 ----------------------------|---------------------------- mount -o dioread_lock ext4_writepages ext4_do_writepages *if (ext4_should_dioread_nolock(inode))* // rsv_blocks is not assigned here mount -o remount,dioread_nolock ext4_journal_start_with_reserve __ext4_journal_start __ext4_journal_start_sb jbd2__journal_start *if (rsv_blocks)* // h_rsv_handle is not initialized here mpage_map_and_submit_extent mpage_map_one_extent dioread_nolock = ext4_should_dioread_nolock(inode) if (dioread_nolock && (map->m_flags & EXT4_MAP_UNWRITTEN)) mpd->io_submit.io_end->handle = handle->h_rsv_handle ext4_set_io_unwritten_flag io_end->flag |= EXT4_IO_END_UNWRITTEN // now io_end->handle is NULL but has EXT4_IO_END_UNWRITTEN flag scsi_finish_command scsi_io_completion scsi_io_completion_action scsi_end_request blk_update_request req_bio_endio bio_endio bio->bi_end_io > ext4_end_bio ext4_put_io_end_defer ext4_add_complete_io // trigger WARN_ON(!io_end->handle && sbi->s_journal); The immediate cause of this problem is that ext4_should_dioread_nolock() function returns inconsistent values in the ext4_do_writepages() and mpage_map_one_extent(). There are four conditions in this function that can be changed at mount time to cause this problem. These four conditions can be divided into two categories: (1) journal_data and EXT4_EXTENTS_FL, which can be changed by ioctl (2) DELALLOC and DIOREAD_NOLOCK, which can be changed by remount The two in the first category have been fixed by commit c8585c6fcaf2 ("ext4: fix races between changing inode journal mode and ext4_writepages") and commit cb85f4d23f79 ("ext4: fix race between writepages and enabling EXT4_EXTENTS_FL") respectively. Two cases in the other category have not yet been fixed, and the above issue is caused by this situation. We refer to the fix for the first category, when applying options during remount, we grab s_writepages_rwsem to avoid racing with writepages ops to trigger this problem. Fixes: 6b523df4fb5a ("ext4: use transaction reservation for extent conversion in ext4_end_io") Cc: stable@vger.kernel.org Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20230524072538.2883391-1-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ext4.h | 3 ++- fs/ext4/super.c | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 9418359b1d9d..cd4ccae1e28a 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1676,7 +1676,8 @@ struct ext4_sb_info { /* * Barrier between writepages ops and changing any inode's JOURNAL_DATA - * or EXTENTS flag. + * or EXTENTS flag or between writepages ops and changing DELALLOC or + * DIOREAD_NOLOCK mount options on remount. */ struct percpu_rw_semaphore s_writepages_rwsem; struct dax_device *s_daxdev; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 6f48dec19f4a..d062383ea50e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -6443,6 +6443,7 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb) struct ext4_mount_options old_opts; ext4_group_t g; int err = 0; + int alloc_ctx; #ifdef CONFIG_QUOTA int enable_quota = 0; int i, j; @@ -6483,7 +6484,16 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb) } + /* + * Changing the DIOREAD_NOLOCK or DELALLOC mount options may cause + * two calls to ext4_should_dioread_nolock() to return inconsistent + * values, triggering WARN_ON in ext4_add_complete_io(). we grab + * here s_writepages_rwsem to avoid race between writepages ops and + * remount. + */ + alloc_ctx = ext4_writepages_down_write(sb); ext4_apply_options(fc, sb); + ext4_writepages_up_write(sb, alloc_ctx); if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^ test_opt(sb, JOURNAL_CHECKSUM)) { @@ -6701,6 +6711,8 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb) if (sb_rdonly(sb) && !(old_sb_flags & SB_RDONLY) && sb_any_quota_suspended(sb)) dquot_resume(sb, -1); + + alloc_ctx = ext4_writepages_down_write(sb); sb->s_flags = old_sb_flags; sbi->s_mount_opt = old_opts.s_mount_opt; sbi->s_mount_opt2 = old_opts.s_mount_opt2; @@ -6709,6 +6721,8 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb) sbi->s_commit_interval = old_opts.s_commit_interval; sbi->s_min_batch_time = old_opts.s_min_batch_time; sbi->s_max_batch_time = old_opts.s_max_batch_time; + ext4_writepages_up_write(sb, alloc_ctx); + if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks) ext4_release_system_zone(sb); #ifdef CONFIG_QUOTA From 825ba5adcbbb70d6419acccf1a0f403797fcec83 Mon Sep 17 00:00:00 2001 From: Wang Jianjian Date: Thu, 24 Aug 2023 23:56:31 +0800 Subject: [PATCH 0491/1397] ext4: no need to generate from free list in mballoc commit ebf6cb7c6e1241984f75f29f1bdbfa2fe7168f88 upstream. Commit 7a2fcbf7f85 ("ext4: don't use blocks freed but not yet committed in buddy cache init") added a code to mark as used blocks in the list of not yet committed freed blocks during initialization of a buddy page. However ext4_mb_free_metadata() makes sure buddy page is already loaded and takes a reference to it so it cannot happen that ext4_mb_init_cache() is called when efd list is non-empty. Just remove the ext4_mb_generate_from_freelist() call. Fixes: 7a2fcbf7f85('ext4: don't use blocks freed but not yet committed in buddy cache init') Signed-off-by: Wang Jianjian Link: https://lore.kernel.org/r/tencent_53CBCB1668358AE862684E453DF37B722008@qq.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/mballoc.c | 39 ++++++--------------------------------- 1 file changed, 6 insertions(+), 33 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 1e599305d85f..a7b8558c0d09 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -417,8 +417,6 @@ static const char * const ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = { static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, ext4_group_t group); -static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, - ext4_group_t group); static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac); static bool ext4_mb_good_group(struct ext4_allocation_context *ac, @@ -1361,17 +1359,17 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp) * We place the buddy block and bitmap block * close together */ + grinfo = ext4_get_group_info(sb, group); + if (!grinfo) { + err = -EFSCORRUPTED; + goto out; + } if ((first_block + i) & 1) { /* this is block of buddy */ BUG_ON(incore == NULL); mb_debug(sb, "put buddy for group %u in page %lu/%x\n", group, page->index, i * blocksize); trace_ext4_mb_buddy_bitmap_load(sb, group); - grinfo = ext4_get_group_info(sb, group); - if (!grinfo) { - err = -EFSCORRUPTED; - goto out; - } grinfo->bb_fragments = 0; memset(grinfo->bb_counters, 0, sizeof(*grinfo->bb_counters) * @@ -1398,7 +1396,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp) /* mark all preallocated blks used in in-core bitmap */ ext4_mb_generate_from_pa(sb, data, group); - ext4_mb_generate_from_freelist(sb, data, group); + WARN_ON_ONCE(!RB_EMPTY_ROOT(&grinfo->bb_free_root)); ext4_unlock_group(sb, group); /* set incore so that the buddy information can be @@ -4958,31 +4956,6 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) return false; } -/* - * the function goes through all block freed in the group - * but not yet committed and marks them used in in-core bitmap. - * buddy must be generated from this bitmap - * Need to be called with the ext4 group lock held - */ -static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, - ext4_group_t group) -{ - struct rb_node *n; - struct ext4_group_info *grp; - struct ext4_free_data *entry; - - grp = ext4_get_group_info(sb, group); - if (!grp) - return; - n = rb_first(&(grp->bb_free_root)); - - while (n) { - entry = rb_entry(n, struct ext4_free_data, efd_node); - mb_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count); - n = rb_next(n); - } -} - /* * the function goes through all preallocation in this group and marks them * used in in-core bitmap. buddy must be generated from this bitmap From 3bc2023850b83b23bdfebecf059752bc6b9f58e2 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Thu, 24 Aug 2023 17:26:05 +0800 Subject: [PATCH 0492/1397] ext4: make sure allocate pending entry not fail commit 8e387c89e96b9543a339f84043cf9df15fed2632 upstream. __insert_pending() allocate memory in atomic context, so the allocation could fail, but we are not handling that failure now. It could lead ext4_es_remove_extent() to get wrong reserved clusters, and the global data blocks reservation count will be incorrect. The same to extents_status entry preallocation, preallocate pending entry out of the i_es_lock with __GFP_NOFAIL, make sure __insert_pending() and __revise_pending() always succeeds. Signed-off-by: Zhang Yi Cc: stable@kernel.org Link: https://lore.kernel.org/r/20230824092619.1327976-3-yi.zhang@huaweicloud.com Reviewed-by: Jan Kara Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/extents_status.c | 123 ++++++++++++++++++++++++++++----------- 1 file changed, 89 insertions(+), 34 deletions(-) diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index 6f7de14c0fa8..b9c8270e1cf0 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -152,8 +152,9 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan); static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, struct ext4_inode_info *locked_ei); -static void __revise_pending(struct inode *inode, ext4_lblk_t lblk, - ext4_lblk_t len); +static int __revise_pending(struct inode *inode, ext4_lblk_t lblk, + ext4_lblk_t len, + struct pending_reservation **prealloc); int __init ext4_init_es(void) { @@ -448,6 +449,19 @@ static void ext4_es_list_del(struct inode *inode) spin_unlock(&sbi->s_es_lock); } +static inline struct pending_reservation *__alloc_pending(bool nofail) +{ + if (!nofail) + return kmem_cache_alloc(ext4_pending_cachep, GFP_ATOMIC); + + return kmem_cache_zalloc(ext4_pending_cachep, GFP_KERNEL | __GFP_NOFAIL); +} + +static inline void __free_pending(struct pending_reservation *pr) +{ + kmem_cache_free(ext4_pending_cachep, pr); +} + /* * Returns true if we cannot fail to allocate memory for this extent_status * entry and cannot reclaim it until its status changes. @@ -836,11 +850,12 @@ void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, { struct extent_status newes; ext4_lblk_t end = lblk + len - 1; - int err1 = 0; - int err2 = 0; + int err1 = 0, err2 = 0, err3 = 0; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct extent_status *es1 = NULL; struct extent_status *es2 = NULL; + struct pending_reservation *pr = NULL; + bool revise_pending = false; if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) return; @@ -868,11 +883,17 @@ void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, ext4_es_insert_extent_check(inode, &newes); + revise_pending = sbi->s_cluster_ratio > 1 && + test_opt(inode->i_sb, DELALLOC) && + (status & (EXTENT_STATUS_WRITTEN | + EXTENT_STATUS_UNWRITTEN)); retry: if (err1 && !es1) es1 = __es_alloc_extent(true); if ((err1 || err2) && !es2) es2 = __es_alloc_extent(true); + if ((err1 || err2 || err3) && revise_pending && !pr) + pr = __alloc_pending(true); write_lock(&EXT4_I(inode)->i_es_lock); err1 = __es_remove_extent(inode, lblk, end, NULL, es1); @@ -897,13 +918,18 @@ void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, es2 = NULL; } - if (sbi->s_cluster_ratio > 1 && test_opt(inode->i_sb, DELALLOC) && - (status & EXTENT_STATUS_WRITTEN || - status & EXTENT_STATUS_UNWRITTEN)) - __revise_pending(inode, lblk, len); + if (revise_pending) { + err3 = __revise_pending(inode, lblk, len, &pr); + if (err3 != 0) + goto error; + if (pr) { + __free_pending(pr); + pr = NULL; + } + } error: write_unlock(&EXT4_I(inode)->i_es_lock); - if (err1 || err2) + if (err1 || err2 || err3) goto retry; ext4_es_print_tree(inode); @@ -1311,7 +1337,7 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end, rc->ndelonly--; node = rb_next(&pr->rb_node); rb_erase(&pr->rb_node, &tree->root); - kmem_cache_free(ext4_pending_cachep, pr); + __free_pending(pr); if (!node) break; pr = rb_entry(node, struct pending_reservation, @@ -1907,11 +1933,13 @@ static struct pending_reservation *__get_pending(struct inode *inode, * * @inode - file containing the cluster * @lblk - logical block in the cluster to be added + * @prealloc - preallocated pending entry * * Returns 0 on successful insertion and -ENOMEM on failure. If the * pending reservation is already in the set, returns successfully. */ -static int __insert_pending(struct inode *inode, ext4_lblk_t lblk) +static int __insert_pending(struct inode *inode, ext4_lblk_t lblk, + struct pending_reservation **prealloc) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_pending_tree *tree = &EXT4_I(inode)->i_pending_tree; @@ -1937,10 +1965,15 @@ static int __insert_pending(struct inode *inode, ext4_lblk_t lblk) } } - pr = kmem_cache_alloc(ext4_pending_cachep, GFP_ATOMIC); - if (pr == NULL) { - ret = -ENOMEM; - goto out; + if (likely(*prealloc == NULL)) { + pr = __alloc_pending(false); + if (!pr) { + ret = -ENOMEM; + goto out; + } + } else { + pr = *prealloc; + *prealloc = NULL; } pr->lclu = lclu; @@ -1970,7 +2003,7 @@ static void __remove_pending(struct inode *inode, ext4_lblk_t lblk) if (pr != NULL) { tree = &EXT4_I(inode)->i_pending_tree; rb_erase(&pr->rb_node, &tree->root); - kmem_cache_free(ext4_pending_cachep, pr); + __free_pending(pr); } } @@ -2029,10 +2062,10 @@ void ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk, bool allocated) { struct extent_status newes; - int err1 = 0; - int err2 = 0; + int err1 = 0, err2 = 0, err3 = 0; struct extent_status *es1 = NULL; struct extent_status *es2 = NULL; + struct pending_reservation *pr = NULL; if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) return; @@ -2052,6 +2085,8 @@ void ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk, es1 = __es_alloc_extent(true); if ((err1 || err2) && !es2) es2 = __es_alloc_extent(true); + if ((err1 || err2 || err3) && allocated && !pr) + pr = __alloc_pending(true); write_lock(&EXT4_I(inode)->i_es_lock); err1 = __es_remove_extent(inode, lblk, lblk, NULL, es1); @@ -2074,11 +2109,18 @@ void ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk, es2 = NULL; } - if (allocated) - __insert_pending(inode, lblk); + if (allocated) { + err3 = __insert_pending(inode, lblk, &pr); + if (err3 != 0) + goto error; + if (pr) { + __free_pending(pr); + pr = NULL; + } + } error: write_unlock(&EXT4_I(inode)->i_es_lock); - if (err1 || err2) + if (err1 || err2 || err3) goto retry; ext4_es_print_tree(inode); @@ -2184,21 +2226,24 @@ unsigned int ext4_es_delayed_clu(struct inode *inode, ext4_lblk_t lblk, * @inode - file containing the range * @lblk - logical block defining the start of range * @len - length of range in blocks + * @prealloc - preallocated pending entry * * Used after a newly allocated extent is added to the extents status tree. * Requires that the extents in the range have either written or unwritten * status. Must be called while holding i_es_lock. */ -static void __revise_pending(struct inode *inode, ext4_lblk_t lblk, - ext4_lblk_t len) +static int __revise_pending(struct inode *inode, ext4_lblk_t lblk, + ext4_lblk_t len, + struct pending_reservation **prealloc) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); ext4_lblk_t end = lblk + len - 1; ext4_lblk_t first, last; bool f_del = false, l_del = false; + int ret = 0; if (len == 0) - return; + return 0; /* * Two cases - block range within single cluster and block range @@ -2219,7 +2264,9 @@ static void __revise_pending(struct inode *inode, ext4_lblk_t lblk, f_del = __es_scan_range(inode, &ext4_es_is_delonly, first, lblk - 1); if (f_del) { - __insert_pending(inode, first); + ret = __insert_pending(inode, first, prealloc); + if (ret < 0) + goto out; } else { last = EXT4_LBLK_CMASK(sbi, end) + sbi->s_cluster_ratio - 1; @@ -2227,9 +2274,11 @@ static void __revise_pending(struct inode *inode, ext4_lblk_t lblk, l_del = __es_scan_range(inode, &ext4_es_is_delonly, end + 1, last); - if (l_del) - __insert_pending(inode, last); - else + if (l_del) { + ret = __insert_pending(inode, last, prealloc); + if (ret < 0) + goto out; + } else __remove_pending(inode, last); } } else { @@ -2237,18 +2286,24 @@ static void __revise_pending(struct inode *inode, ext4_lblk_t lblk, if (first != lblk) f_del = __es_scan_range(inode, &ext4_es_is_delonly, first, lblk - 1); - if (f_del) - __insert_pending(inode, first); - else + if (f_del) { + ret = __insert_pending(inode, first, prealloc); + if (ret < 0) + goto out; + } else __remove_pending(inode, first); last = EXT4_LBLK_CMASK(sbi, end) + sbi->s_cluster_ratio - 1; if (last != end) l_del = __es_scan_range(inode, &ext4_es_is_delonly, end + 1, last); - if (l_del) - __insert_pending(inode, last); - else + if (l_del) { + ret = __insert_pending(inode, last, prealloc); + if (ret < 0) + goto out; + } else __remove_pending(inode, last); } +out: + return ret; } From df562e04a1c8a84dfd10c89c00db086fec85a3b5 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Tue, 19 Sep 2023 10:18:23 +0200 Subject: [PATCH 0493/1397] ext4: apply umask if ACL support is disabled commit 484fd6c1de13b336806a967908a927cc0356e312 upstream. The function ext4_init_acl() calls posix_acl_create() which is responsible for applying the umask. But without CONFIG_EXT4_FS_POSIX_ACL, ext4_init_acl() is an empty inline function, and nobody applies the umask. This fixes a bug which causes the umask to be ignored with O_TMPFILE on ext4: https://github.com/MusicPlayerDaemon/MPD/issues/558 https://bugs.gentoo.org/show_bug.cgi?id=686142#c3 https://bugzilla.kernel.org/show_bug.cgi?id=203625 Reviewed-by: "J. Bruce Fields" Cc: stable@vger.kernel.org Signed-off-by: Max Kellermann Link: https://lore.kernel.org/r/20230919081824.1096619-1-max.kellermann@ionos.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/acl.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h index 0c5a79c3b5d4..ef4c19e5f570 100644 --- a/fs/ext4/acl.h +++ b/fs/ext4/acl.h @@ -68,6 +68,11 @@ extern int ext4_init_acl(handle_t *, struct inode *, struct inode *); static inline int ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) { + /* usually, the umask is applied by posix_acl_create(), but if + ext4 ACL support is disabled at compile time, we need to do + it here, because posix_acl_create() will never be called */ + inode->i_mode &= ~current_umask(); + return 0; } #endif /* CONFIG_EXT4_FS_POSIX_ACL */ From a5ab5da03870a832211e3c9c9d088e3347337026 Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Sun, 27 Aug 2023 01:47:00 +0800 Subject: [PATCH 0494/1397] ext4: correct offset of gdb backup in non meta_bg group to update_backups commit 31f13421c004a420c0e9d288859c9ea9259ea0cc upstream. Commit 0aeaa2559d6d5 ("ext4: fix corruption when online resizing a 1K bigalloc fs") found that primary superblock's offset in its group is not equal to offset of backup superblock in its group when block size is 1K and bigalloc is enabled. As group descriptor blocks are right after superblock, we can't pass block number of gdb to update_backups for the same reason. The root casue of the issue above is that leading 1K padding block is count as data block offset for primary block while backup block has no padding block offset in its group. Remove padding data block count to fix the issue for gdb backups. For meta_bg case, update_backups treat blk_off as block number, do no conversion in this case. Signed-off-by: Kemeng Shi Reviewed-by: Theodore Ts'o Link: https://lore.kernel.org/r/20230826174712.4059355-2-shikemeng@huaweicloud.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/resize.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 0361c20910de..87cd5b07a970 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1601,6 +1601,8 @@ static int ext4_flex_group_add(struct super_block *sb, int gdb_num_end = ((group + flex_gd->count - 1) / EXT4_DESC_PER_BLOCK(sb)); int meta_bg = ext4_has_feature_meta_bg(sb); + sector_t padding_blocks = meta_bg ? 0 : sbi->s_sbh->b_blocknr - + ext4_group_first_block_no(sb, 0); sector_t old_gdb = 0; update_backups(sb, ext4_group_first_block_no(sb, 0), @@ -1612,8 +1614,8 @@ static int ext4_flex_group_add(struct super_block *sb, gdb_num); if (old_gdb == gdb_bh->b_blocknr) continue; - update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data, - gdb_bh->b_size, meta_bg); + update_backups(sb, gdb_bh->b_blocknr - padding_blocks, + gdb_bh->b_data, gdb_bh->b_size, meta_bg); old_gdb = gdb_bh->b_blocknr; } } From c26e3adc65babf681c10c109a68bc437e2eec51f Mon Sep 17 00:00:00 2001 From: Ojaswin Mujoo Date: Mon, 18 Sep 2023 16:15:50 +0530 Subject: [PATCH 0495/1397] ext4: mark buffer new if it is unwritten to avoid stale data exposure commit 2cd8bdb5efc1e0d5b11a4b7ba6b922fd2736a87f upstream. ** Short Version ** In ext4 with dioread_nolock, we could have a scenario where the bh returned by get_blocks (ext4_get_block_unwritten()) in __block_write_begin_int() has UNWRITTEN and MAPPED flag set. Since such a bh does not have NEW flag set we never zero out the range of bh that is not under write, causing whatever stale data is present in the folio at that time to be written out to disk. To fix this mark the buffer as new, in case it is unwritten, in ext4_get_block_unwritten(). ** Long Version ** The issue mentioned above was resulting in two different bugs: 1. On block size < page size case in ext4, generic/269 was reliably failing with dioread_nolock. The state of the write was as follows: * The write was extending i_size. * The last block of the file was fallocated and had an unwritten extent * We were near ENOSPC and hence we were switching to non-delayed alloc allocation. In this case, the back trace that triggers the bug is as follows: ext4_da_write_begin() /* switch to nodelalloc due to low space */ ext4_write_begin() ext4_should_dioread_nolock() // true since mount flags still have delalloc __block_write_begin(..., ext4_get_block_unwritten) __block_write_begin_int() for(each buffer head in page) { /* first iteration, this is bh1 which contains i_size */ if (!buffer_mapped) get_block() /* returns bh with only UNWRITTEN and MAPPED */ /* second iteration, bh2 */ if (!buffer_mapped) get_block() /* we fail here, could be ENOSPC */ } if (err) /* * this would zero out all new buffers and mark them uptodate. * Since bh1 was never marked new, we skip it here which causes * the bug later. */ folio_zero_new_buffers(); /* ext4_wrte_begin() error handling */ ext4_truncate_failed_write() ext4_truncate() ext4_block_truncate_page() __ext4_block_zero_page_range() if(!buffer_uptodate()) ext4_read_bh_lock() ext4_read_bh() -> ... ext4_submit_bh_wbc() BUG_ON(buffer_unwritten(bh)); /* !!! */ 2. The second issue is stale data exposure with page size >= blocksize with dioread_nolock. The conditions needed for it to happen are same as the previous issue ie dioread_nolock around ENOSPC condition. The issue is also similar where in __block_write_begin_int() when we call ext4_get_block_unwritten() on the buffer_head and the underlying extent is unwritten, we get an unwritten and mapped buffer head. Since it is not new, we never zero out the partial range which is not under write, thus writing stale data to disk. This can be easily observed with the following reproducer: fallocate -l 4k testfile xfs_io -c "pwrite 2k 2k" testfile # hexdump output will have stale data in from byte 0 to 2k in testfile hexdump -C testfile NOTE: To trigger this, we need dioread_nolock enabled and write happening via ext4_write_begin(), which is usually used when we have -o nodealloc. Since dioread_nolock is disabled with nodelalloc, the only alternate way to call ext4_write_begin() is to ensure that delayed alloc switches to nodelalloc ie ext4_da_write_begin() calls ext4_write_begin(). This will usually happen when ext4 is almost full like the way generic/269 was triggering it in Issue 1 above. This might make the issue harder to hit. Hence, for reliable replication, I used the below patch to temporarily allow dioread_nolock with nodelalloc and then mount the disk with -o nodealloc,dioread_nolock. With this you can hit the stale data issue 100% of times: @@ -508,8 +508,8 @@ static inline int ext4_should_dioread_nolock(struct inode *inode) if (ext4_should_journal_data(inode)) return 0; /* temporary fix to prevent generic/422 test failures */ - if (!test_opt(inode->i_sb, DELALLOC)) - return 0; + // if (!test_opt(inode->i_sb, DELALLOC)) + // return 0; return 1; } After applying this patch to mark buffer as NEW, both the above issues are fixed. Signed-off-by: Ojaswin Mujoo Cc: stable@kernel.org Reviewed-by: Jan Kara Reviewed-by: "Ritesh Harjani (IBM)" Link: https://lore.kernel.org/r/d0ed09d70a9733fbb5349c5c7b125caac186ecdf.1695033645.git.ojaswin@linux.ibm.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 4ce35f1c8b0a..d7732320431a 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -789,10 +789,22 @@ int ext4_get_block(struct inode *inode, sector_t iblock, int ext4_get_block_unwritten(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { + int ret = 0; + ext4_debug("ext4_get_block_unwritten: inode %lu, create flag %d\n", inode->i_ino, create); - return _ext4_get_block(inode, iblock, bh_result, + ret = _ext4_get_block(inode, iblock, bh_result, EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT); + + /* + * If the buffer is marked unwritten, mark it as new to make sure it is + * zeroed out correctly in case of partial writes. Otherwise, there is + * a chance of stale data getting exposed. + */ + if (ret == 0 && buffer_unwritten(bh_result)) + set_buffer_new(bh_result); + + return ret; } /* Maximum number of blocks we map for direct IO at once. */ From 409e2d00c21fb73f2312cf64df6158400ec963dc Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Sun, 27 Aug 2023 01:47:02 +0800 Subject: [PATCH 0496/1397] ext4: correct return value of ext4_convert_meta_bg commit 48f1551592c54f7d8e2befc72a99ff4e47f7dca0 upstream. Avoid to ignore error in "err". Signed-off-by: Kemeng Shi Link: https://lore.kernel.org/r/20230826174712.4059355-4-shikemeng@huaweicloud.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/resize.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 87cd5b07a970..8cf9a07c2c53 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1982,9 +1982,7 @@ static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode) errout: ret = ext4_journal_stop(handle); - if (!err) - err = ret; - return ret; + return err ? err : ret; invalid_resize_inode: ext4_error(sb, "corrupted/inconsistent resize inode"); From afe944f1f176e7059ce1a5ffe3f4d46ead6b3b87 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Thu, 24 Aug 2023 17:26:04 +0800 Subject: [PATCH 0497/1397] ext4: correct the start block of counting reserved clusters commit 40ea98396a3659062267d1fe5f99af4f7e4f05e3 upstream. When big allocate feature is enabled, we need to count and update reserved clusters before removing a delayed only extent_status entry. {init|count|get}_rsvd() have already done this, but the start block number of this counting isn't correct in the following case. lblk end | | v v ------------------------- | | orig_es ------------------------- ^ ^ len1 is 0 | len2 | If the start block of the orig_es entry founded is bigger than lblk, we passed lblk as start block to count_rsvd(), but the length is correct, finally, the range to be counted is offset. This patch fix this by passing the start blocks to 'orig_es->lblk + len1'. Signed-off-by: Zhang Yi Cc: stable@kernel.org Link: https://lore.kernel.org/r/20230824092619.1327976-2-yi.zhang@huaweicloud.com Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/ext4/extents_status.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index b9c8270e1cf0..f4b50652f0cc 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -1431,8 +1431,8 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, } } if (count_reserved) - count_rsvd(inode, lblk, orig_es.es_len - len1 - len2, - &orig_es, &rc); + count_rsvd(inode, orig_es.es_lblk + len1, + orig_es.es_len - len1 - len2, &orig_es, &rc); goto out_get_reserved; } From b88c91b28706d6881048d3bd52acf2251bedb49a Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Sun, 27 Aug 2023 01:47:03 +0800 Subject: [PATCH 0498/1397] ext4: remove gdb backup copy for meta bg in setup_new_flex_group_blocks commit 40dd7953f4d606c280074f10d23046b6812708ce upstream. Wrong check of gdb backup in meta bg as following: first_group is the first group of meta_bg which contains target group, so target group is always >= first_group. We check if target group has gdb backup by comparing first_group with [group + 1] and [group + EXT4_DESC_PER_BLOCK(sb) - 1]. As group >= first_group, then [group + N] is > first_group. So no copy of gdb backup in meta bg is done in setup_new_flex_group_blocks. No need to do gdb backup copy in meta bg from setup_new_flex_group_blocks as we always copy updated gdb block to backups at end of ext4_flex_group_add as following: ext4_flex_group_add /* no gdb backup copy for meta bg any more */ setup_new_flex_group_blocks /* update current group number */ ext4_update_super sbi->s_groups_count += flex_gd->count; /* * if group in meta bg contains backup is added, the primary gdb block * of the meta bg will be copy to backup in new added group here. */ for (; gdb_num <= gdb_num_end; gdb_num++) update_backups(...) In summary, we can remove wrong gdb backup copy code in setup_new_flex_group_blocks. Signed-off-by: Kemeng Shi Reviewed-by: Theodore Ts'o Link: https://lore.kernel.org/r/20230826174712.4059355-5-shikemeng@huaweicloud.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/resize.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 8cf9a07c2c53..54e77a36b86f 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -560,13 +560,8 @@ static int setup_new_flex_group_blocks(struct super_block *sb, if (meta_bg == 0 && !ext4_bg_has_super(sb, group)) goto handle_itb; - if (meta_bg == 1) { - ext4_group_t first_group; - first_group = ext4_meta_bg_first_group(sb, group); - if (first_group != group + 1 && - first_group != group + EXT4_DESC_PER_BLOCK(sb) - 1) - goto handle_itb; - } + if (meta_bg == 1) + goto handle_itb; block = start + ext4_bg_has_super(sb, group); /* Copy all of the GDT blocks into the backup in this group */ From 1a657b36c1a0ca05996c77cbdcd0e44e2f0dceb1 Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Sun, 27 Aug 2023 01:47:01 +0800 Subject: [PATCH 0499/1397] ext4: add missed brelse in update_backups commit 9adac8b01f4be28acd5838aade42b8daa4f0b642 upstream. add missed brelse in update_backups Signed-off-by: Kemeng Shi Reviewed-by: Theodore Ts'o Link: https://lore.kernel.org/r/20230826174712.4059355-3-shikemeng@huaweicloud.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/resize.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 54e77a36b86f..667381180b26 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1186,8 +1186,10 @@ static void update_backups(struct super_block *sb, sector_t blk_off, char *data, ext4_group_first_block_no(sb, group)); BUFFER_TRACE(bh, "get_write_access"); if ((err = ext4_journal_get_write_access(handle, sb, bh, - EXT4_JTR_NONE))) + EXT4_JTR_NONE))) { + brelse(bh); break; + } lock_buffer(bh); memcpy(bh->b_data, data, size); if (rest) From 3eb32071b57853067fbf3294ecc47062492703b3 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 13 Oct 2023 14:13:50 +0200 Subject: [PATCH 0500/1397] ext4: properly sync file size update after O_SYNC direct IO commit 91562895f8030cb9a0470b1db49de79346a69f91 upstream. Gao Xiang has reported that on ext4 O_SYNC direct IO does not properly sync file size update and thus if we crash at unfortunate moment, the file can have smaller size although O_SYNC IO has reported successful completion. The problem happens because update of on-disk inode size is handled in ext4_dio_write_iter() *after* iomap_dio_rw() (and thus dio_complete() in particular) has returned and generic_file_sync() gets called by dio_complete(). Fix the problem by handling on-disk inode size update directly in our ->end_io completion handler. References: https://lore.kernel.org/all/02d18236-26ef-09b0-90ad-030c4fe3ee20@linux.alibaba.com Reported-by: Gao Xiang CC: stable@vger.kernel.org Fixes: 378f32bab371 ("ext4: introduce direct I/O write using iomap infrastructure") Signed-off-by: Jan Kara Tested-by: Joseph Qi Reviewed-by: "Ritesh Harjani (IBM)" Link: https://lore.kernel.org/r/20231013121350.26872-1-jack@suse.cz Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/file.c | 153 +++++++++++++++++++++---------------------------- 1 file changed, 65 insertions(+), 88 deletions(-) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 6830ea3a6c59..19d9db4799c4 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -306,80 +306,38 @@ static ssize_t ext4_buffered_write_iter(struct kiocb *iocb, } static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset, - ssize_t written, size_t count) + ssize_t count) { handle_t *handle; - bool truncate = false; - u8 blkbits = inode->i_blkbits; - ext4_lblk_t written_blk, end_blk; - int ret; - - /* - * Note that EXT4_I(inode)->i_disksize can get extended up to - * inode->i_size while the I/O was running due to writeback of delalloc - * blocks. But, the code in ext4_iomap_alloc() is careful to use - * zeroed/unwritten extents if this is possible; thus we won't leave - * uninitialized blocks in a file even if we didn't succeed in writing - * as much as we intended. - */ - WARN_ON_ONCE(i_size_read(inode) < EXT4_I(inode)->i_disksize); - if (offset + count <= EXT4_I(inode)->i_disksize) { - /* - * We need to ensure that the inode is removed from the orphan - * list if it has been added prematurely, due to writeback of - * delalloc blocks. - */ - if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) { - handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); - - if (IS_ERR(handle)) { - ext4_orphan_del(NULL, inode); - return PTR_ERR(handle); - } - - ext4_orphan_del(handle, inode); - ext4_journal_stop(handle); - } - - return written; - } - - if (written < 0) - goto truncate; + lockdep_assert_held_write(&inode->i_rwsem); handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); - if (IS_ERR(handle)) { - written = PTR_ERR(handle); - goto truncate; - } + if (IS_ERR(handle)) + return PTR_ERR(handle); - if (ext4_update_inode_size(inode, offset + written)) { - ret = ext4_mark_inode_dirty(handle, inode); + if (ext4_update_inode_size(inode, offset + count)) { + int ret = ext4_mark_inode_dirty(handle, inode); if (unlikely(ret)) { - written = ret; ext4_journal_stop(handle); - goto truncate; + return ret; } } - /* - * We may need to truncate allocated but not written blocks beyond EOF. - */ - written_blk = ALIGN(offset + written, 1 << blkbits); - end_blk = ALIGN(offset + count, 1 << blkbits); - if (written_blk < end_blk && ext4_can_truncate(inode)) - truncate = true; - - /* - * Remove the inode from the orphan list if it has been extended and - * everything went OK. - */ - if (!truncate && inode->i_nlink) + if (inode->i_nlink) ext4_orphan_del(handle, inode); ext4_journal_stop(handle); - if (truncate) { -truncate: + return count; +} + +/* + * Clean up the inode after DIO or DAX extending write has completed and the + * inode size has been updated using ext4_handle_inode_extension(). + */ +static void ext4_inode_extension_cleanup(struct inode *inode, ssize_t count) +{ + lockdep_assert_held_write(&inode->i_rwsem); + if (count < 0) { ext4_truncate_failed_write(inode); /* * If the truncate operation failed early, then the inode may @@ -388,9 +346,28 @@ static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset, */ if (inode->i_nlink) ext4_orphan_del(NULL, inode); + return; } + /* + * If i_disksize got extended due to writeback of delalloc blocks while + * the DIO was running we could fail to cleanup the orphan list in + * ext4_handle_inode_extension(). Do it now. + */ + if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) { + handle_t *handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); - return written; + if (IS_ERR(handle)) { + /* + * The write has successfully completed. Not much to + * do with the error here so just cleanup the orphan + * list and hope for the best. + */ + ext4_orphan_del(NULL, inode); + return; + } + ext4_orphan_del(handle, inode); + ext4_journal_stop(handle); + } } static int ext4_dio_write_end_io(struct kiocb *iocb, ssize_t size, @@ -399,31 +376,22 @@ static int ext4_dio_write_end_io(struct kiocb *iocb, ssize_t size, loff_t pos = iocb->ki_pos; struct inode *inode = file_inode(iocb->ki_filp); + if (!error && size && flags & IOMAP_DIO_UNWRITTEN) + error = ext4_convert_unwritten_extents(NULL, inode, pos, size); if (error) return error; - - if (size && flags & IOMAP_DIO_UNWRITTEN) { - error = ext4_convert_unwritten_extents(NULL, inode, pos, size); - if (error < 0) - return error; - } /* - * If we are extending the file, we have to update i_size here before - * page cache gets invalidated in iomap_dio_rw(). Otherwise racing - * buffered reads could zero out too much from page cache pages. Update - * of on-disk size will happen later in ext4_dio_write_iter() where - * we have enough information to also perform orphan list handling etc. - * Note that we perform all extending writes synchronously under - * i_rwsem held exclusively so i_size update is safe here in that case. - * If the write was not extending, we cannot see pos > i_size here - * because operations reducing i_size like truncate wait for all - * outstanding DIO before updating i_size. + * Note that EXT4_I(inode)->i_disksize can get extended up to + * inode->i_size while the I/O was running due to writeback of delalloc + * blocks. But the code in ext4_iomap_alloc() is careful to use + * zeroed/unwritten extents if this is possible; thus we won't leave + * uninitialized blocks in a file even if we didn't succeed in writing + * as much as we intended. */ - pos += size; - if (pos > i_size_read(inode)) - i_size_write(inode, pos); - - return 0; + WARN_ON_ONCE(i_size_read(inode) < READ_ONCE(EXT4_I(inode)->i_disksize)); + if (pos + size <= READ_ONCE(EXT4_I(inode)->i_disksize)) + return size; + return ext4_handle_inode_extension(inode, pos, size); } static const struct iomap_dio_ops ext4_dio_write_ops = { @@ -606,9 +574,16 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) dio_flags, NULL, 0); if (ret == -ENOTBLK) ret = 0; - - if (extend) - ret = ext4_handle_inode_extension(inode, offset, ret, count); + if (extend) { + /* + * We always perform extending DIO write synchronously so by + * now the IO is completed and ext4_handle_inode_extension() + * was called. Cleanup the inode in case of error or race with + * writeback of delalloc blocks. + */ + WARN_ON_ONCE(ret == -EIOCBQUEUED); + ext4_inode_extension_cleanup(inode, ret); + } out: if (ilock_shared) @@ -689,8 +664,10 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from) ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops); - if (extend) - ret = ext4_handle_inode_extension(inode, offset, ret, count); + if (extend) { + ret = ext4_handle_inode_extension(inode, offset, ret); + ext4_inode_extension_cleanup(inode, ret); + } out: inode_unlock(inode); if (ret > 0) From 7343c23ebcadbedc23a7063d1e24d976eccb0d0d Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 2 Oct 2023 14:50:20 -0400 Subject: [PATCH 0501/1397] ext4: fix racy may inline data check in dio write commit ce56d21355cd6f6937aca32f1f44ca749d1e4808 upstream. syzbot reports that the following warning from ext4_iomap_begin() triggers as of the commit referenced below: if (WARN_ON_ONCE(ext4_has_inline_data(inode))) return -ERANGE; This occurs during a dio write, which is never expected to encounter an inode with inline data. To enforce this behavior, ext4_dio_write_iter() checks the current inline state of the inode and clears the MAY_INLINE_DATA state flag to either fall back to buffered writes, or enforce that any other writers in progress on the inode are not allowed to create inline data. The problem is that the check for existing inline data and the state flag can span a lock cycle. For example, if the ilock is originally locked shared and subsequently upgraded to exclusive, another writer may have reacquired the lock and created inline data before the dio write task acquires the lock and proceeds. The commit referenced below loosens the lock requirements to allow some forms of unaligned dio writes to occur under shared lock, but AFAICT the inline data check was technically already racy for any dio write that would have involved a lock cycle. Regardless, lift clearing of the state bit to the same lock critical section that checks for preexisting inline data on the inode to close the race. Cc: stable@kernel.org Reported-by: syzbot+307da6ca5cb0d01d581a@syzkaller.appspotmail.com Fixes: 310ee0902b8d ("ext4: allow concurrent unaligned dio overwrites") Signed-off-by: Brian Foster Link: https://lore.kernel.org/r/20231002185020.531537-1-bfoster@redhat.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/file.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 19d9db4799c4..0166bb9ca160 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -537,18 +537,20 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) return ext4_buffered_write_iter(iocb, from); } + /* + * Prevent inline data from being created since we are going to allocate + * blocks for DIO. We know the inode does not currently have inline data + * because ext4_should_use_dio() checked for it, but we have to clear + * the state flag before the write checks because a lock cycle could + * introduce races with other writers. + */ + ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); + ret = ext4_dio_write_checks(iocb, from, &ilock_shared, &extend, &unwritten, &dio_flags); if (ret <= 0) return ret; - /* - * Make sure inline data cannot be created anymore since we are going - * to allocate blocks for DIO. We know the inode does not have any - * inline data now because ext4_dio_supported() checked for that. - */ - ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); - offset = iocb->ki_pos; count = ret; From f3f1c94dfdadcbeab4cb70636ad288153425ad76 Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Tue, 17 Oct 2023 16:01:35 +0200 Subject: [PATCH 0502/1397] drm/amd/pm: Handle non-terminated overdrive commands. commit 08e9ebc75b5bcfec9d226f9e16bab2ab7b25a39a upstream. The incoming strings might not be terminated by a newline or a 0. (found while testing a program that just wrote the string itself, causing a crash) Cc: stable@vger.kernel.org Fixes: e3933f26b657 ("drm/amd/pp: Add edit/commit/show OD clock/voltage support in sysfs") Signed-off-by: Bas Nieuwenhuizen Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 8bb2da13826f..b4c9fedaa51d 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -734,7 +734,7 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, if (adev->in_suspend && !adev->in_runpm) return -EPERM; - if (count > 127) + if (count > 127 || count == 0) return -EINVAL; if (*buf == 's') @@ -754,7 +754,8 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, else return -EINVAL; - memcpy(buf_cpy, buf, count+1); + memcpy(buf_cpy, buf, count); + buf_cpy[count] = 0; tmp_str = buf_cpy; @@ -771,6 +772,9 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, return -EINVAL; parameter_size++; + if (!tmp_str) + break; + while (isspace(*tmp_str)) tmp_str++; } From 154438369cbd93b00a852c94103c7a342ffd3256 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 14 Sep 2023 16:11:59 +0300 Subject: [PATCH 0503/1397] drm: bridge: it66121: ->get_edid callback must not return err pointers commit 81995ee1620318b4c7bbeb02bcc372da2c078c76 upstream. The drm stack does not expect error valued pointers for EDID anywhere. Fixes: e66856508746 ("drm: bridge: it66121: Set DDC preamble only once before reading EDID") Cc: Paul Cercueil Cc: Robert Foss Cc: Phong LE Cc: Neil Armstrong Cc: Andrzej Hajda Cc: Robert Foss Cc: Laurent Pinchart Cc: Jonas Karlman Cc: Jernej Skrabec Cc: # v6.3+ Signed-off-by: Jani Nikula Reviewed-by: Neil Armstrong Signed-off-by: Paul Cercueil Link: https://patchwork.freedesktop.org/patch/msgid/20230914131159.2472513-1-jani.nikula@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/bridge/ite-it66121.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/bridge/ite-it66121.c b/drivers/gpu/drm/bridge/ite-it66121.c index fc7f5ec5fb38..8f5846b76d59 100644 --- a/drivers/gpu/drm/bridge/ite-it66121.c +++ b/drivers/gpu/drm/bridge/ite-it66121.c @@ -884,14 +884,14 @@ static struct edid *it66121_bridge_get_edid(struct drm_bridge *bridge, mutex_lock(&ctx->lock); ret = it66121_preamble_ddc(ctx); if (ret) { - edid = ERR_PTR(ret); + edid = NULL; goto out_unlock; } ret = regmap_write(ctx->regmap, IT66121_DDC_HEADER_REG, IT66121_DDC_HEADER_EDID); if (ret) { - edid = ERR_PTR(ret); + edid = NULL; goto out_unlock; } From b4166941c03dc0b8b958ff3f295ad6e67f967a3d Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 4 Sep 2023 22:04:59 -0700 Subject: [PATCH 0504/1397] x86/srso: Move retbleed IBPB check into existing 'has_microcode' code block commit 351236947a45a512c517153bbe109fe868d05e6d upstream. Simplify the code flow a bit by moving the retbleed IBPB check into the existing 'has_microcode' block. Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Signed-off-by: Borislav Petkov (AMD) Acked-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/0a22b86b1f6b07f9046a9ab763fc0e0d1b7a91d4.1693889988.git.jpoimboe@kernel.org Cc: Caleb Jorden Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index a55a3864df1c..0bc55472f303 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -2422,10 +2422,8 @@ static void __init srso_select_mitigation(void) setup_force_cpu_cap(X86_FEATURE_SRSO_NO); return; } - } - if (retbleed_mitigation == RETBLEED_MITIGATION_IBPB) { - if (has_microcode) { + if (retbleed_mitigation == RETBLEED_MITIGATION_IBPB) { srso_mitigation = SRSO_MITIGATION_IBPB; goto out; } From 17117871bf0d6792f7aacec0f57cdc63fc01042a Mon Sep 17 00:00:00 2001 From: Gabe Teeger Date: Fri, 15 Sep 2023 18:18:48 -0400 Subject: [PATCH 0505/1397] drm/amd/display: Add Null check for DPP resource commit e186400685d8a9287388a8535e2399bc673bfe95 upstream. [what and why] Check whether dpp resource pointer is null in advance and return early if so. Reviewed-by: Charlene Liu Reviewed-by: Martin Leung Signed-off-by: Gabe Teeger Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Acked-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index f7b51aca6020..8873acfe309c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -996,7 +996,7 @@ static void adjust_recout_for_visual_confirm(struct rect *recout, struct dc *dc = pipe_ctx->stream->ctx->dc; int dpp_offset, base_offset; - if (dc->debug.visual_confirm == VISUAL_CONFIRM_DISABLE) + if (dc->debug.visual_confirm == VISUAL_CONFIRM_DISABLE || !pipe_ctx->plane_res.dpp) return; dpp_offset = pipe_ctx->stream->timing.v_addressable / VISUAL_CONFIRM_DPP_OFFSET_DENO; From 76c32b807cc5d9f098d1e943128642a320b23c98 Mon Sep 17 00:00:00 2001 From: Chaitanya Kumar Borah Date: Wed, 18 Oct 2023 17:06:22 +0530 Subject: [PATCH 0506/1397] drm/i915/mtl: Support HBR3 rate with C10 phy and eDP in MTL commit ce4941c2d6459664761c9854701015d8e99414fb upstream. eDP specification supports HBR3 link rate since v1.4a. Moreover, C10 phy can support HBR3 link rate for both DP and eDP. Therefore, do not clamp the supported rates for eDP at 6.75Gbps. Cc: BSpec: 70073 74224 Signed-off-by: Chaitanya Kumar Borah Reviewed-by: Mika Kahola Signed-off-by: Mika Kahola Link: https://patchwork.freedesktop.org/patch/msgid/20231018113622.2761997-1-chaitanya.kumar.borah@intel.com (cherry picked from commit a3431650f30a94b179d419ef87c21213655c28cd) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/display/intel_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index e0e4cb529284..b4583d1f9666 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -430,7 +430,7 @@ static int mtl_max_source_rate(struct intel_dp *intel_dp) enum phy phy = intel_port_to_phy(i915, dig_port->base.port); if (intel_is_c10phy(i915, phy)) - return intel_dp_is_edp(intel_dp) ? 675000 : 810000; + return 810000; return 2000000; } From 8722d668845303969d45d81f1fd61e9e2f277401 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 31 Oct 2023 18:08:00 +0200 Subject: [PATCH 0507/1397] drm/i915: Bump GLK CDCLK frequency when driving multiple pipes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 0cb89cd42fd22bbdec0b046c48f35775f5b88bdb upstream. On GLK CDCLK frequency needs to be at least 2*96 MHz when accessing the audio hardware. Currently we bump the CDCLK frequency up temporarily (if not high enough already) whenever audio hardware is being accessed, and drop it back down afterwards. With a single active pipe this works just fine as we can switch between all the valid CDCLK frequencies by changing the cd2x divider, which doesn't require a full modeset. However with multiple active pipes the cd2x divider trick no longer works, and thus we end up blinking all displays off and back on. To avoid this let's just bump the CDCLK frequency to >=2*96MHz whenever multiple pipes are active. The downside is slightly higher power consumption, but that seems like an acceptable tradeoff. With a single active pipe we can stick to the current more optiomal (from power comsumption POV) behaviour. Cc: stable@vger.kernel.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/9599 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231031160800.18371-1-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit 451eaa1a614c911f5a51078dcb68022874e4cb12) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/display/intel_cdclk.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 2fb030b1ff1d..f99cf8037bd6 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -2688,6 +2688,18 @@ static int intel_compute_min_cdclk(struct intel_cdclk_state *cdclk_state) for_each_pipe(dev_priv, pipe) min_cdclk = max(cdclk_state->min_cdclk[pipe], min_cdclk); + /* + * Avoid glk_force_audio_cdclk() causing excessive screen + * blinking when multiple pipes are active by making sure + * CDCLK frequency is always high enough for audio. With a + * single active pipe we can always change CDCLK frequency + * by changing the cd2x divider (see glk_cdclk_table[]) and + * thus a full modeset won't be needed then. + */ + if (IS_GEMINILAKE(dev_priv) && cdclk_state->active_pipes && + !is_power_of_2(cdclk_state->active_pipes)) + min_cdclk = max(2 * 96000, min_cdclk); + if (min_cdclk > dev_priv->display.cdclk.max_cdclk_freq) { drm_dbg_kms(&dev_priv->drm, "required cdclk (%d kHz) exceeds max (%d kHz)\n", From 269303d2d5e5ea24135d9da61e60efd01709e806 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Fri, 3 Nov 2023 11:09:22 +0000 Subject: [PATCH 0508/1397] drm/i915: Fix potential spectre vulnerability commit 1a8e9bad6ef563c28ab0f8619628d5511be55431 upstream. Fix smatch warning: drivers/gpu/drm/i915/gem/i915_gem_context.c:847 set_proto_ctx_sseu() warn: potential spectre issue 'pc->user_engines' [r] (local cap) Fixes: d4433c7600f7 ("drm/i915/gem: Use the proto-context to handle create parameters (v5)") Cc: # v5.15+ Signed-off-by: Kunwu Chan Reviewed-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20231103110922.430122-1-tvrtko.ursulin@linux.intel.com (cherry picked from commit 27b086382c22efb7e0a16442f7bdc2e120108ef3) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 9a9ff84c90d7..e38f06a6e56e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -844,6 +844,7 @@ static int set_proto_ctx_sseu(struct drm_i915_file_private *fpriv, if (idx >= pc->num_user_engines) return -EINVAL; + idx = array_index_nospec(idx, pc->num_user_engines); pe = &pc->user_engines[idx]; /* Only render engine supports RPCS configuration. */ From 4a6bb69835864a8c9003feb2a4ef48acdd321436 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Wed, 18 Oct 2023 11:38:15 +0200 Subject: [PATCH 0509/1397] drm/i915: Flush WC GGTT only on required platforms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7d7a328d0e8d6edefb7b0d665185d468667588d0 upstream. gen8_ggtt_invalidate() is only needed for limited set of platforms where GGTT is mapped as WC. This was added as way to fix WC based GGTT in commit 0f9b91c754b7 ("drm/i915: flush system agent TLBs on SNB") and there are no reference in HW docs that forces us to use this on non-WC backed GGTT. This can also cause unwanted side-effects on XE_HP platforms where GFX_FLSH_CNTL_GEN6 is not valid anymore. v2: Add a func to detect wc ggtt detection (Ville) v3: Improve commit log and add reference commit (Daniel) Fixes: d2eae8e98d59 ("drm/i915/dg2: Drop force_probe requirement") Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: Jani Nikula Cc: Jonathan Cavitt Cc: John Harrison Cc: Andi Shyti Cc: Ville Syrjälä Cc: Daniel Vetter Cc: # v6.2+ Suggested-by: Matt Roper Signed-off-by: Nirmoy Das Reviewed-by: Matt Roper Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20231018093815.1349-1-nirmoy.das@intel.com (cherry picked from commit 81de3e296b10a13e5c9f13172825b0d8d9495c68) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/gt/intel_ggtt.c | 35 +++++++++++++++++++--------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index da21f2786b5d..b20d8fe8aa95 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -190,6 +190,21 @@ void gen6_ggtt_invalidate(struct i915_ggtt *ggtt) spin_unlock_irq(&uncore->lock); } +static bool needs_wc_ggtt_mapping(struct drm_i915_private *i915) +{ + /* + * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range + * will be dropped. For WC mappings in general we have 64 byte burst + * writes when the WC buffer is flushed, so we can't use it, but have to + * resort to an uncached mapping. The WC issue is easily caught by the + * readback check when writing GTT PTE entries. + */ + if (!IS_GEN9_LP(i915) && GRAPHICS_VER(i915) < 11) + return true; + + return false; +} + static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt) { struct intel_uncore *uncore = ggtt->vm.gt->uncore; @@ -197,8 +212,12 @@ static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt) /* * Note that as an uncached mmio write, this will flush the * WCB of the writes into the GGTT before it triggers the invalidate. + * + * Only perform this when GGTT is mapped as WC, see ggtt_probe_common(). */ - intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); + if (needs_wc_ggtt_mapping(ggtt->vm.i915)) + intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, + GFX_FLSH_CNTL_EN); } static void guc_ggtt_invalidate(struct i915_ggtt *ggtt) @@ -902,17 +921,11 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) GEM_WARN_ON(pci_resource_len(pdev, GEN4_GTTMMADR_BAR) != gen6_gttmmadr_size(i915)); phys_addr = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) + gen6_gttadr_offset(i915); - /* - * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range - * will be dropped. For WC mappings in general we have 64 byte burst - * writes when the WC buffer is flushed, so we can't use it, but have to - * resort to an uncached mapping. The WC issue is easily caught by the - * readback check when writing GTT PTE entries. - */ - if (IS_GEN9_LP(i915) || GRAPHICS_VER(i915) >= 11) - ggtt->gsm = ioremap(phys_addr, size); - else + if (needs_wc_ggtt_mapping(i915)) ggtt->gsm = ioremap_wc(phys_addr, size); + else + ggtt->gsm = ioremap(phys_addr, size); + if (!ggtt->gsm) { drm_err(&i915->drm, "Failed to map the ggtt page table\n"); return -ENOMEM; From e00915dbfe245f993686be8367fb3c0bfc0ae455 Mon Sep 17 00:00:00 2001 From: Ma Jun Date: Tue, 31 Oct 2023 11:11:04 +0800 Subject: [PATCH 0510/1397] drm/amd/pm: Fix error of MACO flag setting code commit 7f3e6b840fa8b0889d776639310a5dc672c1e9e1 upstream. MACO only works if BACO is supported Signed-off-by: Ma Jun Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 8 ++++---- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 9 +++++---- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 3903a47669e4..c8d14dc28266 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -352,12 +352,12 @@ static int smu_v13_0_0_check_powerplay_table(struct smu_context *smu) if (powerplay_table->platform_caps & SMU_13_0_0_PP_PLATFORM_CAP_HARDWAREDC) smu->dc_controlled_by_gpio = true; - if (powerplay_table->platform_caps & SMU_13_0_0_PP_PLATFORM_CAP_BACO || - powerplay_table->platform_caps & SMU_13_0_0_PP_PLATFORM_CAP_MACO) + if (powerplay_table->platform_caps & SMU_13_0_0_PP_PLATFORM_CAP_BACO) { smu_baco->platform_support = true; - if (powerplay_table->platform_caps & SMU_13_0_0_PP_PLATFORM_CAP_MACO) - smu_baco->maco_support = true; + if (powerplay_table->platform_caps & SMU_13_0_0_PP_PLATFORM_CAP_MACO) + smu_baco->maco_support = true; + } /* * We are in the transition to a new OD mechanism. diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 94ef5b4d116d..51ae41cb43ea 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -341,12 +341,13 @@ static int smu_v13_0_7_check_powerplay_table(struct smu_context *smu) if (powerplay_table->platform_caps & SMU_13_0_7_PP_PLATFORM_CAP_HARDWAREDC) smu->dc_controlled_by_gpio = true; - if (powerplay_table->platform_caps & SMU_13_0_7_PP_PLATFORM_CAP_BACO || - powerplay_table->platform_caps & SMU_13_0_7_PP_PLATFORM_CAP_MACO) + if (powerplay_table->platform_caps & SMU_13_0_7_PP_PLATFORM_CAP_BACO) { smu_baco->platform_support = true; - if (smu_baco->platform_support && (BoardTable->HsrEnabled || BoardTable->VddqOffEnabled)) - smu_baco->maco_support = true; + if ((powerplay_table->platform_caps & SMU_13_0_7_PP_PLATFORM_CAP_MACO) + && (BoardTable->HsrEnabled || BoardTable->VddqOffEnabled)) + smu_baco->maco_support = true; + } #if 0 if (!overdrive_lowerlimits->FeatureCtrlMask || From 0b7582054e906a7b060a0086c9e4bd1491a27d43 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 1 Nov 2023 15:48:14 -0400 Subject: [PATCH 0511/1397] drm/amdgpu/smu13: drop compute workload workaround commit 23170863ea0a0965d224342c0eb2ad8303b1f267 upstream. This was fixed in PMFW before launch and is no longer required. Reviewed-by: Yang Wang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x Signed-off-by: Greg Kroah-Hartman --- .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 32 ++----------------- 1 file changed, 2 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index c8d14dc28266..4022dd44ebb2 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2163,38 +2163,10 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, } } - if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE && - (((smu->adev->pdev->device == 0x744C) && (smu->adev->pdev->revision == 0xC8)) || - ((smu->adev->pdev->device == 0x744C) && (smu->adev->pdev->revision == 0xCC)))) { - ret = smu_cmn_update_table(smu, - SMU_TABLE_ACTIVITY_MONITOR_COEFF, - WORKLOAD_PPLIB_COMPUTE_BIT, - (void *)(&activity_monitor_external), - false); - if (ret) { - dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__); - return ret; - } - - ret = smu_cmn_update_table(smu, - SMU_TABLE_ACTIVITY_MONITOR_COEFF, - WORKLOAD_PPLIB_CUSTOM_BIT, - (void *)(&activity_monitor_external), - true); - if (ret) { - dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__); - return ret; - } - - workload_type = smu_cmn_to_asic_specific_index(smu, - CMN2ASIC_MAPPING_WORKLOAD, - PP_SMC_POWER_PROFILE_CUSTOM); - } else { - /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */ - workload_type = smu_cmn_to_asic_specific_index(smu, + /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */ + workload_type = smu_cmn_to_asic_specific_index(smu, CMN2ASIC_MAPPING_WORKLOAD, smu->power_profile_mode); - } if (workload_type < 0) return -EINVAL; From 32121a75b4d3f53445ad22a6eabc408be71c62b3 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 17 Oct 2023 16:30:00 -0400 Subject: [PATCH 0512/1397] drm/amdgpu: don't use pci_is_thunderbolt_attached() commit 7b1c6263eaf4fd64ffe1cafdc504a42ee4bfbb33 upstream. It's only valid on Intel systems with the Intel VSEC. Use dev_is_removable() instead. This should do the right thing regardless of the platform. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2925 Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 ++++---- drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c | 5 +++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index b9fd755419fb..a164857bdb9f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -2018,7 +2019,6 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) */ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) { - struct drm_device *dev = adev_to_drm(adev); struct pci_dev *parent; int i, r; bool total; @@ -2089,7 +2089,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) (amdgpu_is_atpx_hybrid() || amdgpu_has_atpx_dgpu_power_cntl()) && ((adev->flags & AMD_IS_APU) == 0) && - !pci_is_thunderbolt_attached(to_pci_dev(dev->dev))) + !dev_is_removable(&adev->pdev->dev)) adev->flags |= AMD_IS_PX; if (!(adev->flags & AMD_IS_APU)) { @@ -3903,7 +3903,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, px = amdgpu_device_supports_px(ddev); - if (px || (!pci_is_thunderbolt_attached(adev->pdev) && + if (px || (!dev_is_removable(&adev->pdev->dev) && apple_gmux_detect(NULL, NULL))) vga_switcheroo_register_client(adev->pdev, &amdgpu_switcheroo_ops, px); @@ -4048,7 +4048,7 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) px = amdgpu_device_supports_px(adev_to_drm(adev)); - if (px || (!pci_is_thunderbolt_attached(adev->pdev) && + if (px || (!dev_is_removable(&adev->pdev->dev) && apple_gmux_detect(NULL, NULL))) vga_switcheroo_unregister_client(adev->pdev); diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index 4038455d7998..ef368ca79a66 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -28,6 +28,7 @@ #include "nbio/nbio_2_3_offset.h" #include "nbio/nbio_2_3_sh_mask.h" #include +#include #include #define smnPCIE_CONFIG_CNTL 0x11180044 @@ -361,7 +362,7 @@ static void nbio_v2_3_enable_aspm(struct amdgpu_device *adev, data |= NAVI10_PCIE__LC_L0S_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT; - if (pci_is_thunderbolt_attached(adev->pdev)) + if (dev_is_removable(&adev->pdev->dev)) data |= NAVI10_PCIE__LC_L1_INACTIVITY_TBT_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT; else data |= NAVI10_PCIE__LC_L1_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT; @@ -480,7 +481,7 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev) def = data = RREG32_PCIE(smnPCIE_LC_CNTL); data |= NAVI10_PCIE__LC_L0S_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT; - if (pci_is_thunderbolt_attached(adev->pdev)) + if (dev_is_removable(&adev->pdev->dev)) data |= NAVI10_PCIE__LC_L1_INACTIVITY_TBT_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT; else data |= NAVI10_PCIE__LC_L1_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT; From 553d26837568814821a78295a9d7ff3247d0df92 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Wed, 1 Nov 2023 14:22:04 +0800 Subject: [PATCH 0513/1397] drm/amdgpu: fix GRBM read timeout when do mes_self_test commit 36e7ff5c13cb15cb7b06c76d42bb76cbf6b7ea75 upstream. Use a proper MEID to make sure the CP_HQD_* and CP_GFX_HQD_* registers can be touched when initialize the compute and gfx mqd in mes_self_test. Otherwise, we expect no response from CP and an GRBM eventual timeout. Signed-off-by: Tim Huang Acked-by: Alex Deucher Reviewed-by: Yifan Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index b6015157763a..6aa75052309f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -556,8 +556,20 @@ static void amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev, mqd_prop.hqd_queue_priority = p->hqd_queue_priority; mqd_prop.hqd_active = false; + if (p->queue_type == AMDGPU_RING_TYPE_GFX || + p->queue_type == AMDGPU_RING_TYPE_COMPUTE) { + mutex_lock(&adev->srbm_mutex); + amdgpu_gfx_select_me_pipe_q(adev, p->ring->me, p->ring->pipe, 0, 0, 0); + } + mqd_mgr->init_mqd(adev, q->mqd_cpu_ptr, &mqd_prop); + if (p->queue_type == AMDGPU_RING_TYPE_GFX || + p->queue_type == AMDGPU_RING_TYPE_COMPUTE) { + amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + } + amdgpu_bo_unreserve(q->mqd_obj); } @@ -993,9 +1005,13 @@ int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id, switch (queue_type) { case AMDGPU_RING_TYPE_GFX: ring->funcs = adev->gfx.gfx_ring[0].funcs; + ring->me = adev->gfx.gfx_ring[0].me; + ring->pipe = adev->gfx.gfx_ring[0].pipe; break; case AMDGPU_RING_TYPE_COMPUTE: ring->funcs = adev->gfx.compute_ring[0].funcs; + ring->me = adev->gfx.compute_ring[0].me; + ring->pipe = adev->gfx.compute_ring[0].pipe; break; case AMDGPU_RING_TYPE_SDMA: ring->funcs = adev->sdma.instance[0].ring.funcs; From e9b8533d571746d6f4e171b13c1b46e5de2cd12b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 19 Oct 2023 13:56:56 -0400 Subject: [PATCH 0514/1397] drm/amdgpu: add a retry for IP discovery init commit 3938eb956e383ef88b8fc7d556492336ebee52df upstream. AMD dGPUs have integrated FW that runs as soon as the device gets power and initializes the board (determines the amount of memory, provides configuration details to the driver, etc.). For direct PCIe attached cards this happens as soon as power is applied and normally completes well before the OS has even started loading. However, with hotpluggable ports like USB4, the driver needs to wait for this to complete before initializing the device. This normally takes 60-100ms, but could take longer on some older boards periodically due to memory training. Retry for up to a second. In the non-hotplug case, there should be no change in behavior and this should complete on the first try. v2: adjust test criteria v3: adjust checks for the masks, only enable on removable devices v4: skip bif_fb_en check Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2925 Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 23 +++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 7d5e7ad28ba8..68a901287264 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -93,6 +93,7 @@ MODULE_FIRMWARE(FIRMWARE_IP_DISCOVERY); #define mmRCC_CONFIG_MEMSIZE 0xde3 +#define mmMP0_SMN_C2PMSG_33 0x16061 #define mmMM_INDEX 0x0 #define mmMM_INDEX_HI 0x6 #define mmMM_DATA 0x1 @@ -231,8 +232,26 @@ static int amdgpu_discovery_read_binary_from_sysmem(struct amdgpu_device *adev, static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev, uint8_t *binary) { - uint64_t vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20; - int ret = 0; + uint64_t vram_size; + u32 msg; + int i, ret = 0; + + /* It can take up to a second for IFWI init to complete on some dGPUs, + * but generally it should be in the 60-100ms range. Normally this starts + * as soon as the device gets power so by the time the OS loads this has long + * completed. However, when a card is hotplugged via e.g., USB4, we need to + * wait for this to complete. Once the C2PMSG is updated, we can + * continue. + */ + if (dev_is_removable(&adev->pdev->dev)) { + for (i = 0; i < 1000; i++) { + msg = RREG32(mmMP0_SMN_C2PMSG_33); + if (msg & 0x80000000) + break; + msleep(1); + } + } + vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20; if (vram_size) { uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET; From b4d34b72bf5381e6e3dc42431c0a508168f67b0b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 17 Oct 2023 15:40:01 -0400 Subject: [PATCH 0515/1397] drm/amdgpu: don't use ATRM for external devices commit 432e664e7c98c243fab4c3c95bd463bea3aeed28 upstream. The ATRM ACPI method is for fetching the dGPU vbios rom image on laptops and all-in-one systems. It should not be used for external add in cards. If the dGPU is thunderbolt connected, don't try ATRM. v2: pci_is_thunderbolt_attached only works for Intel. Use pdev->external_facing instead. v3: dev_is_removable() seems to be what we want Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2925 Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c index 38ccec913f00..f3a09ecb7699 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c @@ -29,6 +29,7 @@ #include "amdgpu.h" #include "atom.h" +#include #include #include #include @@ -287,6 +288,10 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev) if (adev->flags & AMD_IS_APU) return false; + /* ATRM is for on-platform devices only */ + if (dev_is_removable(&adev->pdev->dev)) + return false; + while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) { dhandle = ACPI_HANDLE(&pdev->dev); if (!dhandle) From 5e94f18d5e93bd7c352e6bb7c04fbe3c4f44a6d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 31 Oct 2023 15:35:27 +0100 Subject: [PATCH 0516/1397] drm/amdgpu: fix error handling in amdgpu_vm_init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8473bfdcb5b1a32fd05629c4535ccacd73bc5567 upstream. When clearing the root PD fails we need to properly release it again. Signed-off-by: Christian König Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 31 +++++++++++++------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 82f25996ff5e..9ab87a55c8da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2125,7 +2125,8 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) * Returns: * 0 for success, error for failure. */ -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id) +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, + int32_t xcp_id) { struct amdgpu_bo *root_bo; struct amdgpu_bo_vm *root; @@ -2144,6 +2145,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp INIT_LIST_HEAD(&vm->done); INIT_LIST_HEAD(&vm->pt_freed); INIT_WORK(&vm->pt_free_work, amdgpu_vm_pt_free_work); + INIT_KFIFO(vm->faults); r = amdgpu_vm_init_entities(adev, vm); if (r) @@ -2178,34 +2180,33 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp false, &root, xcp_id); if (r) goto error_free_delayed; - root_bo = &root->bo; + + root_bo = amdgpu_bo_ref(&root->bo); r = amdgpu_bo_reserve(root_bo, true); - if (r) - goto error_free_root; + if (r) { + amdgpu_bo_unref(&root->shadow); + amdgpu_bo_unref(&root_bo); + goto error_free_delayed; + } + amdgpu_vm_bo_base_init(&vm->root, vm, root_bo); r = dma_resv_reserve_fences(root_bo->tbo.base.resv, 1); if (r) - goto error_unreserve; - - amdgpu_vm_bo_base_init(&vm->root, vm, root_bo); + goto error_free_root; r = amdgpu_vm_pt_clear(adev, vm, root, false); if (r) - goto error_unreserve; + goto error_free_root; amdgpu_bo_unreserve(vm->root.bo); - - INIT_KFIFO(vm->faults); + amdgpu_bo_unref(&root_bo); return 0; -error_unreserve: - amdgpu_bo_unreserve(vm->root.bo); - error_free_root: - amdgpu_bo_unref(&root->shadow); + amdgpu_vm_pt_free_root(adev, vm); + amdgpu_bo_unreserve(vm->root.bo); amdgpu_bo_unref(&root_bo); - vm->root.bo = NULL; error_free_delayed: dma_fence_put(vm->last_tlb_flush); From df9142a336767a570002f81d89599aafd2afcb77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 9 Nov 2023 10:12:39 +0100 Subject: [PATCH 0517/1397] drm/amdgpu: fix error handling in amdgpu_bo_list_get() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 12f76050d8d4d10dab96333656b821bd4620d103 upstream. We should not leak the pointer where we couldn't grab the reference on to the caller because it can be that the error handling still tries to put the reference then. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index b6298e901cbd..9a53ca555e70 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -183,6 +183,7 @@ int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id, } rcu_read_unlock(); + *result = NULL; return -ENOENT; } From c0707623d46021262e68e6aa9e2111ba84593412 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 9 Nov 2023 10:14:14 +0100 Subject: [PATCH 0518/1397] drm/amdgpu: lower CS errors to debug severity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 17daf01ab4e3e5a5929747aa05cc15eb2bad5438 upstream. Otherwise userspace can spam the logs by using incorrect input values. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index d93a8961274c..f4fd0d5bd9b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1411,7 +1411,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (r == -ENOMEM) DRM_ERROR("Not enough memory for command submission!\n"); else if (r != -ERESTARTSYS && r != -EAGAIN) - DRM_ERROR("Failed to process the buffer list %d!\n", r); + DRM_DEBUG("Failed to process the buffer list %d!\n", r); goto error_fini; } From 5e4d9fda7e65cd267900f7d2c7c1579a98bf413c Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 31 Oct 2023 13:30:00 -0400 Subject: [PATCH 0519/1397] drm/amdgpu: Fix possible null pointer dereference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 256503071c2de2b5b5c20e06654aa9a44f13aa62 upstream. mem = bo->tbo.resource may be NULL in amdgpu_vm_bo_update. Fixes: 180253782038 ("drm/ttm: stop allocating dummy resources during BO creation") Signed-off-by: Felix Kuehling Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9ab87a55c8da..89c8e51cd332 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1095,8 +1095,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, bo = gem_to_amdgpu_bo(gobj); } mem = bo->tbo.resource; - if (mem->mem_type == TTM_PL_TT || - mem->mem_type == AMDGPU_PL_PREEMPT) + if (mem && (mem->mem_type == TTM_PL_TT || + mem->mem_type == AMDGPU_PL_PREEMPT)) pages_addr = bo->tbo.ttm->dma_address; } From 5a4d4bf6e3549a2b537c6cfe4a4e90468ad97367 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 13 Sep 2023 16:18:44 -0400 Subject: [PATCH 0520/1397] drm/amd/display: Guard against invalid RPTR/WPTR being set commit 1ffa8602e39b89469dc703ebab7a7e44c33da0f7 upstream. [WHY] HW can return invalid values on register read, guard against these being set and causing us to access memory out of range and page fault. [HOW] Guard at sync_inbox1 and guard at pushing commands. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Hansen Dsouza Acked-by: Alex Hung Signed-off-by: Nicholas Kazlauskas Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- .../gpu/drm/amd/display/dmub/src/dmub_srv.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c index 93624ffe4eb8..b4c1044e4ae4 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c @@ -658,9 +658,16 @@ enum dmub_status dmub_srv_sync_inbox1(struct dmub_srv *dmub) return DMUB_STATUS_INVALID; if (dmub->hw_funcs.get_inbox1_rptr && dmub->hw_funcs.get_inbox1_wptr) { - dmub->inbox1_rb.rptr = dmub->hw_funcs.get_inbox1_rptr(dmub); - dmub->inbox1_rb.wrpt = dmub->hw_funcs.get_inbox1_wptr(dmub); - dmub->inbox1_last_wptr = dmub->inbox1_rb.wrpt; + uint32_t rptr = dmub->hw_funcs.get_inbox1_rptr(dmub); + uint32_t wptr = dmub->hw_funcs.get_inbox1_wptr(dmub); + + if (rptr > dmub->inbox1_rb.capacity || wptr > dmub->inbox1_rb.capacity) { + return DMUB_STATUS_HW_FAILURE; + } else { + dmub->inbox1_rb.rptr = rptr; + dmub->inbox1_rb.wrpt = wptr; + dmub->inbox1_last_wptr = dmub->inbox1_rb.wrpt; + } } return DMUB_STATUS_OK; @@ -694,6 +701,11 @@ enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub, if (!dmub->hw_init) return DMUB_STATUS_INVALID; + if (dmub->inbox1_rb.rptr > dmub->inbox1_rb.capacity || + dmub->inbox1_rb.wrpt > dmub->inbox1_rb.capacity) { + return DMUB_STATUS_HW_FAILURE; + } + if (dmub_rb_push_front(&dmub->inbox1_rb, cmd)) return DMUB_STATUS_OK; From 6c888a71d8d4358298305e3578b1664920af7168 Mon Sep 17 00:00:00 2001 From: Fangzhi Zuo Date: Mon, 23 Oct 2023 13:57:32 -0400 Subject: [PATCH 0521/1397] drm/amd/display: Fix DSC not Enabled on Direct MST Sink commit a58555359a9f870543aaddef277c3396159895ce upstream. [WHY & HOW] For the scenario when a dsc capable MST sink device is directly connected, it needs to use max dsc compression as the link bw constraint. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Roman Li Acked-by: Alex Hung Signed-off-by: Fangzhi Zuo Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- .../display/amdgpu_dm/amdgpu_dm_mst_types.c | 29 +++++++++---------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 57230661132b..28f5eb9ecbd3 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -1598,31 +1598,31 @@ enum dc_status dm_dp_mst_is_port_support_mode( unsigned int upper_link_bw_in_kbps = 0, down_link_bw_in_kbps = 0; unsigned int max_compressed_bw_in_kbps = 0; struct dc_dsc_bw_range bw_range = {0}; - struct drm_dp_mst_topology_mgr *mst_mgr; + uint16_t full_pbn = aconnector->mst_output_port->full_pbn; /* - * check if the mode could be supported if DSC pass-through is supported - * AND check if there enough bandwidth available to support the mode - * with DSC enabled. + * Consider the case with the depth of the mst topology tree is equal or less than 2 + * A. When dsc bitstream can be transmitted along the entire path + * 1. dsc is possible between source and branch/leaf device (common dsc params is possible), AND + * 2. dsc passthrough supported at MST branch, or + * 3. dsc decoding supported at leaf MST device + * Use maximum dsc compression as bw constraint + * B. When dsc bitstream cannot be transmitted along the entire path + * Use native bw as bw constraint */ if (is_dsc_common_config_possible(stream, &bw_range) && - aconnector->mst_output_port->passthrough_aux) { - mst_mgr = aconnector->mst_output_port->mgr; - mutex_lock(&mst_mgr->lock); - + (aconnector->mst_output_port->passthrough_aux || + aconnector->dsc_aux == &aconnector->mst_output_port->aux)) { cur_link_settings = stream->link->verified_link_cap; upper_link_bw_in_kbps = dc_link_bandwidth_kbps(aconnector->dc_link, - &cur_link_settings - ); - down_link_bw_in_kbps = kbps_from_pbn(aconnector->mst_output_port->full_pbn); + &cur_link_settings); + down_link_bw_in_kbps = kbps_from_pbn(full_pbn); /* pick the bottleneck */ end_to_end_bw_in_kbps = min(upper_link_bw_in_kbps, down_link_bw_in_kbps); - mutex_unlock(&mst_mgr->lock); - /* * use the maximum dsc compression bandwidth as the required * bandwidth for the mode @@ -1637,8 +1637,7 @@ enum dc_status dm_dp_mst_is_port_support_mode( /* check if mode could be supported within full_pbn */ bpp = convert_dc_color_depth_into_bpc(stream->timing.display_color_depth) * 3; pbn = drm_dp_calc_pbn_mode(stream->timing.pix_clk_100hz / 10, bpp, false); - - if (pbn > aconnector->mst_output_port->full_pbn) + if (pbn > full_pbn) return DC_FAIL_BANDWIDTH_VALIDATE; } From 1d07b7e84276777dad3c8cfebdf8e739606f90c9 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 8 Nov 2023 13:31:57 -0600 Subject: [PATCH 0522/1397] drm/amd/display: fix a NULL pointer dereference in amdgpu_dm_i2c_xfer() commit b71f4ade1b8900d30c661d6c27f87c35214c398c upstream. When ddc_service_construct() is called, it explicitly checks both the link type and whether there is something on the link which will dictate whether the pin is marked as hw_supported. If the pin isn't set or the link is not set (such as from unloading/reloading amdgpu in an IGT test) then fail the amdgpu_dm_i2c_xfer() call. Cc: stable@vger.kernel.org Fixes: 22676bc500c2 ("drm/amd/display: Fix dmub soft hang for PSR 1") Link: https://github.com/fwupd/fwupd/issues/6327 Signed-off-by: Mario Limonciello Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 34f011cedd06..67fab3077e41 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -7430,6 +7430,9 @@ static int amdgpu_dm_i2c_xfer(struct i2c_adapter *i2c_adap, int i; int result = -EIO; + if (!ddc_service->ddc_pin || !ddc_service->ddc_pin->hw_info.hw_supported) + return result; + cmd.payloads = kcalloc(num, sizeof(struct i2c_payload), GFP_KERNEL); if (!cmd.payloads) From 9a5ae53e0f46ab17176ae8c50de3318be10c00b6 Mon Sep 17 00:00:00 2001 From: Tianci Yin Date: Wed, 1 Nov 2023 09:47:13 +0800 Subject: [PATCH 0523/1397] drm/amd/display: Enable fast plane updates on DCN3.2 and above commit 435f5b369657cffee4b04db1f5805b48599f4dbe upstream. [WHY] When cursor moves across screen boarder, lag cursor observed, since subvp settings need to sync up with vblank that causes cursor updates being delayed. [HOW] Enable fast plane updates on DCN3.2 to fix it. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Aurabindo Pillai Acked-by: Alex Hung Signed-off-by: Tianci Yin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 67fab3077e41..00e23bebbc5c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -9541,14 +9541,14 @@ static bool should_reset_plane(struct drm_atomic_state *state, struct drm_plane *other; struct drm_plane_state *old_other_state, *new_other_state; struct drm_crtc_state *new_crtc_state; + struct amdgpu_device *adev = drm_to_adev(plane->dev); int i; /* - * TODO: Remove this hack once the checks below are sufficient - * enough to determine when we need to reset all the planes on - * the stream. + * TODO: Remove this hack for all asics once it proves that the + * fast updates works fine on DCN3.2+. */ - if (state->allow_modeset) + if (adev->ip_versions[DCE_HWIP][0] < IP_VERSION(3, 2, 0) && state->allow_modeset) return true; /* Exit early if we know that we're adding or removing the plane. */ From a4a131bdd9cf37c76388276835a1c31d3e1dc240 Mon Sep 17 00:00:00 2001 From: Paul Hsieh Date: Wed, 25 Oct 2023 10:53:35 +0800 Subject: [PATCH 0524/1397] drm/amd/display: Clear dpcd_sink_ext_caps if not set commit 923bbfe6c888812db1088d684bd30c24036226d2 upstream. [WHY] Some eDP panels' ext caps don't set initial values and the value of dpcd_addr (0x317) is random. It means that sometimes the eDP can be OLED, miniLED and etc, and cause incorrect backlight control interface. [HOW] Add remove_sink_ext_caps to remove sink ext caps (HDR, OLED and etc) Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Anthony Koo Acked-by: Alex Hung Signed-off-by: Paul Hsieh Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/dc/dc_types.h | 1 + drivers/gpu/drm/amd/display/dc/link/link_detection.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 445ad79001ce..ba900b0a62a8 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -189,6 +189,7 @@ struct dc_panel_patch { unsigned int disable_fams; unsigned int skip_avmute; unsigned int mst_start_top_delay; + unsigned int remove_sink_ext_caps; }; struct dc_edid_caps { diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c index c9b6676eaf53..e682d27e098f 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c @@ -1085,6 +1085,9 @@ static bool detect_link_and_local_sink(struct dc_link *link, if (sink->edid_caps.panel_patch.skip_scdc_overwrite) link->ctx->dc->debug.hdmi20_disable = true; + if (sink->edid_caps.panel_patch.remove_sink_ext_caps) + link->dpcd_sink_ext_caps.raw = 0; + if (dc_is_hdmi_signal(link->connector_signal)) read_scdc_caps(link->ddc, link->local_sink); From 17661e606e6d643e2e39af9470064c3d50da9834 Mon Sep 17 00:00:00 2001 From: Lewis Huang Date: Thu, 19 Oct 2023 17:22:21 +0800 Subject: [PATCH 0525/1397] drm/amd/display: Change the DMCUB mailbox memory location from FB to inbox commit 5911d02cac70d7fb52009fbd37423e63f8f6f9bc upstream. [WHY] Flush command sent to DMCUB spends more time for execution on a dGPU than on an APU. This causes cursor lag when using high refresh rate mouses. [HOW] 1. Change the DMCUB mailbox memory location from FB to inbox. 2. Only change windows memory to inbox. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Nicholas Kazlauskas Acked-by: Alex Hung Signed-off-by: Lewis Huang Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 13 ++++---- drivers/gpu/drm/amd/display/dmub/dmub_srv.h | 22 ++++++++----- .../gpu/drm/amd/display/dmub/src/dmub_srv.c | 32 ++++++++++++++----- 3 files changed, 45 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 00e23bebbc5c..deedcd997845 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2084,7 +2084,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev) struct dmub_srv_create_params create_params; struct dmub_srv_region_params region_params; struct dmub_srv_region_info region_info; - struct dmub_srv_fb_params fb_params; + struct dmub_srv_memory_params memory_params; struct dmub_srv_fb_info *fb_info; struct dmub_srv *dmub_srv; const struct dmcub_firmware_header_v1_0 *hdr; @@ -2184,6 +2184,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev) adev->dm.dmub_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes) + PSP_HEADER_BYTES; + region_params.is_mailbox_in_inbox = false; status = dmub_srv_calc_region_info(dmub_srv, ®ion_params, ®ion_info); @@ -2207,10 +2208,10 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev) return r; /* Rebase the regions on the framebuffer address. */ - memset(&fb_params, 0, sizeof(fb_params)); - fb_params.cpu_addr = adev->dm.dmub_bo_cpu_addr; - fb_params.gpu_addr = adev->dm.dmub_bo_gpu_addr; - fb_params.region_info = ®ion_info; + memset(&memory_params, 0, sizeof(memory_params)); + memory_params.cpu_fb_addr = adev->dm.dmub_bo_cpu_addr; + memory_params.gpu_fb_addr = adev->dm.dmub_bo_gpu_addr; + memory_params.region_info = ®ion_info; adev->dm.dmub_fb_info = kzalloc(sizeof(*adev->dm.dmub_fb_info), GFP_KERNEL); @@ -2222,7 +2223,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev) return -ENOMEM; } - status = dmub_srv_calc_fb_info(dmub_srv, &fb_params, fb_info); + status = dmub_srv_calc_mem_info(dmub_srv, &memory_params, fb_info); if (status != DMUB_STATUS_OK) { DRM_ERROR("Error calculating DMUB FB info: %d\n", status); return -EINVAL; diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h index 2d995c87fbb9..d3c4a9a577ee 100644 --- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h @@ -186,6 +186,7 @@ struct dmub_srv_region_params { uint32_t vbios_size; const uint8_t *fw_inst_const; const uint8_t *fw_bss_data; + bool is_mailbox_in_inbox; }; /** @@ -205,20 +206,25 @@ struct dmub_srv_region_params { */ struct dmub_srv_region_info { uint32_t fb_size; + uint32_t inbox_size; uint8_t num_regions; struct dmub_region regions[DMUB_WINDOW_TOTAL]; }; /** - * struct dmub_srv_fb_params - parameters used for driver fb setup + * struct dmub_srv_memory_params - parameters used for driver fb setup * @region_info: region info calculated by dmub service - * @cpu_addr: base cpu address for the framebuffer - * @gpu_addr: base gpu virtual address for the framebuffer + * @cpu_fb_addr: base cpu address for the framebuffer + * @cpu_inbox_addr: base cpu address for the gart + * @gpu_fb_addr: base gpu virtual address for the framebuffer + * @gpu_inbox_addr: base gpu virtual address for the gart */ -struct dmub_srv_fb_params { +struct dmub_srv_memory_params { const struct dmub_srv_region_info *region_info; - void *cpu_addr; - uint64_t gpu_addr; + void *cpu_fb_addr; + void *cpu_inbox_addr; + uint64_t gpu_fb_addr; + uint64_t gpu_inbox_addr; }; /** @@ -546,8 +552,8 @@ dmub_srv_calc_region_info(struct dmub_srv *dmub, * DMUB_STATUS_OK - success * DMUB_STATUS_INVALID - unspecified error */ -enum dmub_status dmub_srv_calc_fb_info(struct dmub_srv *dmub, - const struct dmub_srv_fb_params *params, +enum dmub_status dmub_srv_calc_mem_info(struct dmub_srv *dmub, + const struct dmub_srv_memory_params *params, struct dmub_srv_fb_info *out); /** diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c index b4c1044e4ae4..d1922dde5b8b 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c @@ -386,7 +386,7 @@ dmub_srv_calc_region_info(struct dmub_srv *dmub, uint32_t fw_state_size = DMUB_FW_STATE_SIZE; uint32_t trace_buffer_size = DMUB_TRACE_BUFFER_SIZE; uint32_t scratch_mem_size = DMUB_SCRATCH_MEM_SIZE; - + uint32_t previous_top = 0; if (!dmub->sw_init) return DMUB_STATUS_INVALID; @@ -411,8 +411,15 @@ dmub_srv_calc_region_info(struct dmub_srv *dmub, bios->base = dmub_align(stack->top, 256); bios->top = bios->base + params->vbios_size; - mail->base = dmub_align(bios->top, 256); - mail->top = mail->base + DMUB_MAILBOX_SIZE; + if (params->is_mailbox_in_inbox) { + mail->base = 0; + mail->top = mail->base + DMUB_MAILBOX_SIZE; + previous_top = bios->top; + } else { + mail->base = dmub_align(bios->top, 256); + mail->top = mail->base + DMUB_MAILBOX_SIZE; + previous_top = mail->top; + } fw_info = dmub_get_fw_meta_info(params); @@ -431,7 +438,7 @@ dmub_srv_calc_region_info(struct dmub_srv *dmub, dmub->fw_version = fw_info->fw_version; } - trace_buff->base = dmub_align(mail->top, 256); + trace_buff->base = dmub_align(previous_top, 256); trace_buff->top = trace_buff->base + dmub_align(trace_buffer_size, 64); fw_state->base = dmub_align(trace_buff->top, 256); @@ -442,11 +449,14 @@ dmub_srv_calc_region_info(struct dmub_srv *dmub, out->fb_size = dmub_align(scratch_mem->top, 4096); + if (params->is_mailbox_in_inbox) + out->inbox_size = dmub_align(mail->top, 4096); + return DMUB_STATUS_OK; } -enum dmub_status dmub_srv_calc_fb_info(struct dmub_srv *dmub, - const struct dmub_srv_fb_params *params, +enum dmub_status dmub_srv_calc_mem_info(struct dmub_srv *dmub, + const struct dmub_srv_memory_params *params, struct dmub_srv_fb_info *out) { uint8_t *cpu_base; @@ -461,8 +471,8 @@ enum dmub_status dmub_srv_calc_fb_info(struct dmub_srv *dmub, if (params->region_info->num_regions != DMUB_NUM_WINDOWS) return DMUB_STATUS_INVALID; - cpu_base = (uint8_t *)params->cpu_addr; - gpu_base = params->gpu_addr; + cpu_base = (uint8_t *)params->cpu_fb_addr; + gpu_base = params->gpu_fb_addr; for (i = 0; i < DMUB_NUM_WINDOWS; ++i) { const struct dmub_region *reg = @@ -470,6 +480,12 @@ enum dmub_status dmub_srv_calc_fb_info(struct dmub_srv *dmub, out->fb[i].cpu_addr = cpu_base + reg->base; out->fb[i].gpu_addr = gpu_base + reg->base; + + if (i == DMUB_WINDOW_4_MAILBOX && params->cpu_inbox_addr != 0) { + out->fb[i].cpu_addr = (uint8_t *)params->cpu_inbox_addr + reg->base; + out->fb[i].gpu_addr = params->gpu_inbox_addr + reg->base; + } + out->fb[i].size = reg->top - reg->base; } From bd3a9e5771a8b332f466d06f7c130a69cab0d526 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 28 Nov 2023 17:20:18 +0000 Subject: [PATCH 0526/1397] Linux 6.6.3 Link: https://lore.kernel.org/r/20231124172028.107505484@linuxfoundation.org Tested-by: Ronald Warsow Tested-by: Nam Cao Tested-by: Takeshi Ogasawara Tested-by: Ron Economos Tested-by: Bagas Sanjaya Link: https://lore.kernel.org/r/20231125163158.249616313@linuxfoundation.org Tested-by: Ronald Warsow Tested-by: SeongJae Park Link: https://lore.kernel.org/r/20231125194417.090791215@linuxfoundation.org Tested-by: Takeshi Ogasawara Tested-by: Ronald Warsow Tested-by: Florian Fainelli Tested-by: Bagas Sanjaya Link: https://lore.kernel.org/r/20231126154418.032283745@linuxfoundation.org Tested-by: SeongJae Park Tested-by: Ricardo B. Marliere Tested-by: Florian Fainelli Tested-by: Takeshi Ogasawara Tested-by: Linux Kernel Functional Testing Tested-by: Nam Cao Tested-by: Allen Pais Tested-by: Justin M. Forbes Tested-by: Guenter Roeck Tested-by: Bagas Sanjaya Tested-by: Rudi Heitbaum Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 03c52108af62..8ecebeb5642c 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 6 -SUBLEVEL = 2 +SUBLEVEL = 3 EXTRAVERSION = NAME = Hurr durr I'ma ninja sloth From ed1e0ea2416fea620900587140b4bff339ab0097 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 28 Nov 2023 16:58:34 -0500 Subject: [PATCH 0527/1397] NFSD: Fix "start of NFS reply" pointer passed to nfsd_cache_update() [ Upstream commit 1caf5f61dd8430ae5a0b4538afe4953ce7517cbb ] The "statp + 1" pointer that is passed to nfsd_cache_update() is supposed to point to the start of the egress NFS Reply header. In fact, it does point there for AUTH_SYS and RPCSEC_GSS_KRB5 requests. But both krb5i and krb5p add fields between the RPC header's accept_stat field and the start of the NFS Reply header. In those cases, "statp + 1" points at the extra fields instead of the Reply. The result is that nfsd_cache_update() caches what looks to the client like garbage. A connection break can occur for a number of reasons, but the most common reason when using krb5i/p is a GSS sequence number window underrun. When an underrun is detected, the server is obliged to drop the RPC and the connection to force a retransmit with a fresh GSS sequence number. The client presents the same XID, it hits in the server's DRC, and the server returns the garbage cache entry. The "statp + 1" argument has been used since the oldest changeset in the kernel history repo, so it has been in nfsd_dispatch() literally since before history began. The problem arose only when the server-side GSS implementation was added twenty years ago. Reviewed-by: Jeff Layton Tested-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfssvc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index c7af1095f6b5..378ec82bd390 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -988,6 +988,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp) const struct svc_procedure *proc = rqstp->rq_procinfo; __be32 *statp = rqstp->rq_accept_statp; struct nfsd_cacherep *rp; + __be32 *nfs_reply; /* * Give the xdr decoder a chance to change this if it wants @@ -1008,6 +1009,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp) goto out_dropit; } + nfs_reply = xdr_inline_decode(&rqstp->rq_res_stream, 0); *statp = proc->pc_func(rqstp); if (test_bit(RQ_DROPME, &rqstp->rq_flags)) goto out_update_drop; @@ -1015,7 +1017,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp) if (!proc->pc_encode(rqstp, &rqstp->rq_res_stream)) goto out_encode_err; - nfsd_cache_update(rqstp, rp, rqstp->rq_cachetype, statp + 1); + nfsd_cache_update(rqstp, rp, rqstp->rq_cachetype, nfs_reply); out_cached_reply: return 1; From b79e3569bab93280b6306b2962da7ad848db53b7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 28 Nov 2023 16:58:40 -0500 Subject: [PATCH 0528/1397] NFSD: Fix checksum mismatches in the duplicate reply cache [ Upstream commit bf51c52a1f3c238d72c64e14d5e7702d3a245b82 ] nfsd_cache_csum() currently assumes that the server's RPC layer has been advancing rq_arg.head[0].iov_base as it decodes an incoming request, because that's the way it used to work. On entry, it expects that buf->head[0].iov_base points to the start of the NFS header, and excludes the already-decoded RPC header. These days however, head[0].iov_base now points to the start of the RPC header during all processing. It no longer points at the NFS Call header when execution arrives at nfsd_cache_csum(). In a retransmitted RPC the XID and the NFS header are supposed to be the same as the original message, but the contents of the retransmitted RPC header can be different. For example, for krb5, the GSS sequence number will be different between the two. Thus if the RPC header is always included in the DRC checksum computation, the checksum of the retransmitted message might not match the checksum of the original message, even though the NFS part of these messages is identical. The result is that, even if a matching XID is found in the DRC, the checksum mismatch causes the server to execute the retransmitted RPC transaction again. Reviewed-by: Jeff Layton Tested-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/cache.h | 4 +-- fs/nfsd/nfscache.c | 64 +++++++++++++++++++++++++++++++--------------- fs/nfsd/nfssvc.c | 10 +++++++- 3 files changed, 54 insertions(+), 24 deletions(-) diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h index 929248c6ca84..4cbe0434cbb8 100644 --- a/fs/nfsd/cache.h +++ b/fs/nfsd/cache.h @@ -84,8 +84,8 @@ int nfsd_net_reply_cache_init(struct nfsd_net *nn); void nfsd_net_reply_cache_destroy(struct nfsd_net *nn); int nfsd_reply_cache_init(struct nfsd_net *); void nfsd_reply_cache_shutdown(struct nfsd_net *); -int nfsd_cache_lookup(struct svc_rqst *rqstp, - struct nfsd_cacherep **cacherep); +int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start, + unsigned int len, struct nfsd_cacherep **cacherep); void nfsd_cache_update(struct svc_rqst *rqstp, struct nfsd_cacherep *rp, int cachetype, __be32 *statp); int nfsd_reply_cache_stats_show(struct seq_file *m, void *v); diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index abb453be71ca..6cd36af2f97e 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -368,33 +368,52 @@ nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc) return freed; } -/* - * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes +/** + * nfsd_cache_csum - Checksum incoming NFS Call arguments + * @buf: buffer containing a whole RPC Call message + * @start: starting byte of the NFS Call header + * @remaining: size of the NFS Call header, in bytes + * + * Compute a weak checksum of the leading bytes of an NFS procedure + * call header to help verify that a retransmitted Call matches an + * entry in the duplicate reply cache. + * + * To avoid assumptions about how the RPC message is laid out in + * @buf and what else it might contain (eg, a GSS MIC suffix), the + * caller passes us the exact location and length of the NFS Call + * header. + * + * Returns a 32-bit checksum value, as defined in RFC 793. */ -static __wsum -nfsd_cache_csum(struct svc_rqst *rqstp) +static __wsum nfsd_cache_csum(struct xdr_buf *buf, unsigned int start, + unsigned int remaining) { + unsigned int base, len; + struct xdr_buf subbuf; + __wsum csum = 0; + void *p; int idx; - unsigned int base; - __wsum csum; - struct xdr_buf *buf = &rqstp->rq_arg; - const unsigned char *p = buf->head[0].iov_base; - size_t csum_len = min_t(size_t, buf->head[0].iov_len + buf->page_len, - RC_CSUMLEN); - size_t len = min(buf->head[0].iov_len, csum_len); + + if (remaining > RC_CSUMLEN) + remaining = RC_CSUMLEN; + if (xdr_buf_subsegment(buf, &subbuf, start, remaining)) + return csum; /* rq_arg.head first */ - csum = csum_partial(p, len, 0); - csum_len -= len; + if (subbuf.head[0].iov_len) { + len = min_t(unsigned int, subbuf.head[0].iov_len, remaining); + csum = csum_partial(subbuf.head[0].iov_base, len, csum); + remaining -= len; + } /* Continue into page array */ - idx = buf->page_base / PAGE_SIZE; - base = buf->page_base & ~PAGE_MASK; - while (csum_len) { - p = page_address(buf->pages[idx]) + base; - len = min_t(size_t, PAGE_SIZE - base, csum_len); + idx = subbuf.page_base / PAGE_SIZE; + base = subbuf.page_base & ~PAGE_MASK; + while (remaining) { + p = page_address(subbuf.pages[idx]) + base; + len = min_t(unsigned int, PAGE_SIZE - base, remaining); csum = csum_partial(p, len, csum); - csum_len -= len; + remaining -= len; base = 0; ++idx; } @@ -465,6 +484,8 @@ nfsd_cache_insert(struct nfsd_drc_bucket *b, struct nfsd_cacherep *key, /** * nfsd_cache_lookup - Find an entry in the duplicate reply cache * @rqstp: Incoming Call to find + * @start: starting byte in @rqstp->rq_arg of the NFS Call header + * @len: size of the NFS Call header, in bytes * @cacherep: OUT: DRC entry for this request * * Try to find an entry matching the current call in the cache. When none @@ -478,7 +499,8 @@ nfsd_cache_insert(struct nfsd_drc_bucket *b, struct nfsd_cacherep *key, * %RC_REPLY: Reply from cache * %RC_DROPIT: Do not process the request further */ -int nfsd_cache_lookup(struct svc_rqst *rqstp, struct nfsd_cacherep **cacherep) +int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start, + unsigned int len, struct nfsd_cacherep **cacherep) { struct nfsd_net *nn; struct nfsd_cacherep *rp, *found; @@ -494,7 +516,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, struct nfsd_cacherep **cacherep) goto out; } - csum = nfsd_cache_csum(rqstp); + csum = nfsd_cache_csum(&rqstp->rq_arg, start, len); /* * Since the common case is a cache miss followed by an insert, diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 378ec82bd390..a87e9ef61386 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -988,6 +988,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp) const struct svc_procedure *proc = rqstp->rq_procinfo; __be32 *statp = rqstp->rq_accept_statp; struct nfsd_cacherep *rp; + unsigned int start, len; __be32 *nfs_reply; /* @@ -996,11 +997,18 @@ int nfsd_dispatch(struct svc_rqst *rqstp) */ rqstp->rq_cachetype = proc->pc_cachetype; + /* + * ->pc_decode advances the argument stream past the NFS + * Call header, so grab the header's starting location and + * size now for the call to nfsd_cache_lookup(). + */ + start = xdr_stream_pos(&rqstp->rq_arg_stream); + len = xdr_stream_remaining(&rqstp->rq_arg_stream); if (!proc->pc_decode(rqstp, &rqstp->rq_arg_stream)) goto out_decode_err; rp = NULL; - switch (nfsd_cache_lookup(rqstp, &rp)) { + switch (nfsd_cache_lookup(rqstp, start, len, &rp)) { case RC_DOIT: break; case RC_REPLY: From 3fca09b62ae8e91b704dd37809829ed75584b1f0 Mon Sep 17 00:00:00 2001 From: Fang Xiang Date: Mon, 30 Oct 2023 16:32:56 +0800 Subject: [PATCH 0529/1397] irqchip/gic-v3-its: Flush ITS tables correctly in non-coherent GIC designs [ Upstream commit d3badb15613c14dd35d3495b1dde5c90fcd616dd ] In non-coherent GIC designs, the ITS tables must be flushed before writing to the GITS_BASER registers, otherwise the ITS could read dirty tables, which results in unpredictable behavior. Flush the tables right at the begin of its_setup_baser() to prevent that. [ tglx: Massage changelog ] Fixes: a8707f553884 ("irqchip/gic-v3: Add Rockchip 3588001 erratum workaround") Suggested-by: Marc Zyngier Signed-off-by: Fang Xiang Signed-off-by: Thomas Gleixner Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20231030083256.4345-1-fangxiang3@xiaomi.com Signed-off-by: Sasha Levin --- drivers/irqchip/irq-gic-v3-its.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index a8c89df1a997..9a7a74239eab 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -2379,12 +2379,12 @@ static int its_setup_baser(struct its_node *its, struct its_baser *baser, break; } + if (!shr) + gic_flush_dcache_to_poc(base, PAGE_ORDER_TO_SIZE(order)); + its_write_baser(its, baser, val); tmp = baser->val; - if (its->flags & ITS_FLAGS_FORCE_NON_SHAREABLE) - tmp &= ~GITS_BASER_SHAREABILITY_MASK; - if ((val ^ tmp) & GITS_BASER_SHAREABILITY_MASK) { /* * Shareability didn't stick. Just use @@ -2394,10 +2394,9 @@ static int its_setup_baser(struct its_node *its, struct its_baser *baser, * non-cacheable as well. */ shr = tmp & GITS_BASER_SHAREABILITY_MASK; - if (!shr) { + if (!shr) cache = GITS_BASER_nC; - gic_flush_dcache_to_poc(base, PAGE_ORDER_TO_SIZE(order)); - } + goto retry_baser; } @@ -2609,6 +2608,11 @@ static int its_alloc_tables(struct its_node *its) /* erratum 24313: ignore memory access type */ cache = GITS_BASER_nCnB; + if (its->flags & ITS_FLAGS_FORCE_NON_SHAREABLE) { + cache = GITS_BASER_nC; + shr = 0; + } + for (i = 0; i < GITS_BASER_NR_REGS; i++) { struct its_baser *baser = its->tables + i; u64 val = its_read_baser(its, baser); From f969fd598682e4bb97f63de2189a4284c495e125 Mon Sep 17 00:00:00 2001 From: Ani Sinha Date: Mon, 16 Oct 2023 19:01:22 +0530 Subject: [PATCH 0530/1397] hv/hv_kvp_daemon: Some small fixes for handling NM keyfiles [ Upstream commit c3803203bc5ec910a3eb06172cf6fb368e0e4390 ] Some small fixes: - lets make sure we are not adding ipv4 addresses in ipv6 section in keyfile and vice versa. - ADDR_FAMILY_IPV6 is a bit in addr_family. Test that bit instead of checking the whole value of addr_family. - Some trivial fixes in hv_set_ifconfig.sh. These fixes are proposed after doing some internal testing at Red Hat. CC: Shradha Gupta CC: Saurabh Sengar Fixes: 42999c904612 ("hv/hv_kvp_daemon:Support for keyfile based connection profile") Signed-off-by: Ani Sinha Reviewed-by: Shradha Gupta Signed-off-by: Wei Liu Message-ID: <20231016133122.2419537-1-anisinha@redhat.com> Signed-off-by: Sasha Levin --- tools/hv/hv_kvp_daemon.c | 20 ++++++++++++-------- tools/hv/hv_set_ifconfig.sh | 4 ++-- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index 264eeb9c46a9..318e2dad27e0 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -1421,7 +1421,7 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) if (error) goto setval_error; - if (new_val->addr_family == ADDR_FAMILY_IPV6) { + if (new_val->addr_family & ADDR_FAMILY_IPV6) { error = fprintf(nmfile, "\n[ipv6]\n"); if (error < 0) goto setval_error; @@ -1455,14 +1455,18 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) if (error < 0) goto setval_error; - error = fprintf(nmfile, "gateway=%s\n", (char *)new_val->gate_way); - if (error < 0) - goto setval_error; - - error = fprintf(nmfile, "dns=%s\n", (char *)new_val->dns_addr); - if (error < 0) - goto setval_error; + /* we do not want ipv4 addresses in ipv6 section and vice versa */ + if (is_ipv6 != is_ipv4((char *)new_val->gate_way)) { + error = fprintf(nmfile, "gateway=%s\n", (char *)new_val->gate_way); + if (error < 0) + goto setval_error; + } + if (is_ipv6 != is_ipv4((char *)new_val->dns_addr)) { + error = fprintf(nmfile, "dns=%s\n", (char *)new_val->dns_addr); + if (error < 0) + goto setval_error; + } fclose(nmfile); fclose(ifcfg_file); diff --git a/tools/hv/hv_set_ifconfig.sh b/tools/hv/hv_set_ifconfig.sh index ae5a7a8249a2..440a91b35823 100755 --- a/tools/hv/hv_set_ifconfig.sh +++ b/tools/hv/hv_set_ifconfig.sh @@ -53,7 +53,7 @@ # or "manual" if no boot-time protocol should be used) # # address1=ipaddr1/plen -# address=ipaddr2/plen +# address2=ipaddr2/plen # # gateway=gateway1;gateway2 # @@ -61,7 +61,7 @@ # # [ipv6] # address1=ipaddr1/plen -# address2=ipaddr1/plen +# address2=ipaddr2/plen # # gateway=gateway1;gateway2 # From 14204acc09f652169baed1141c671429047b1313 Mon Sep 17 00:00:00 2001 From: Abel Wu Date: Tue, 7 Nov 2023 17:05:07 +0800 Subject: [PATCH 0531/1397] sched/eevdf: Fix vruntime adjustment on reweight [ Upstream commit eab03c23c2a162085b13200d7942fc5a00b5ccc8 ] vruntime of the (on_rq && !0-lag) entity needs to be adjusted when it gets re-weighted, and the calculations can be simplified based on the fact that re-weight won't change the w-average of all the entities. Please check the proofs in comments. But adjusting vruntime can also cause position change in RB-tree hence require re-queue to fix up which might be costly. This might be avoided by deferring adjustment to the time the entity actually leaves tree (dequeue/pick), but that will negatively affect task selection and probably not good enough either. Fixes: 147f3efaa241 ("sched/fair: Implement an EEVDF-like scheduling policy") Signed-off-by: Abel Wu Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20231107090510.71322-2-wuyun.abel@bytedance.com Signed-off-by: Sasha Levin --- kernel/sched/fair.c | 151 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 128 insertions(+), 23 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 1795f6fe991f..035132014817 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3626,41 +3626,140 @@ static inline void dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { } #endif +static void reweight_eevdf(struct cfs_rq *cfs_rq, struct sched_entity *se, + unsigned long weight) +{ + unsigned long old_weight = se->load.weight; + u64 avruntime = avg_vruntime(cfs_rq); + s64 vlag, vslice; + + /* + * VRUNTIME + * ======== + * + * COROLLARY #1: The virtual runtime of the entity needs to be + * adjusted if re-weight at !0-lag point. + * + * Proof: For contradiction assume this is not true, so we can + * re-weight without changing vruntime at !0-lag point. + * + * Weight VRuntime Avg-VRuntime + * before w v V + * after w' v' V' + * + * Since lag needs to be preserved through re-weight: + * + * lag = (V - v)*w = (V'- v')*w', where v = v' + * ==> V' = (V - v)*w/w' + v (1) + * + * Let W be the total weight of the entities before reweight, + * since V' is the new weighted average of entities: + * + * V' = (WV + w'v - wv) / (W + w' - w) (2) + * + * by using (1) & (2) we obtain: + * + * (WV + w'v - wv) / (W + w' - w) = (V - v)*w/w' + v + * ==> (WV-Wv+Wv+w'v-wv)/(W+w'-w) = (V - v)*w/w' + v + * ==> (WV - Wv)/(W + w' - w) + v = (V - v)*w/w' + v + * ==> (V - v)*W/(W + w' - w) = (V - v)*w/w' (3) + * + * Since we are doing at !0-lag point which means V != v, we + * can simplify (3): + * + * ==> W / (W + w' - w) = w / w' + * ==> Ww' = Ww + ww' - ww + * ==> W * (w' - w) = w * (w' - w) + * ==> W = w (re-weight indicates w' != w) + * + * So the cfs_rq contains only one entity, hence vruntime of + * the entity @v should always equal to the cfs_rq's weighted + * average vruntime @V, which means we will always re-weight + * at 0-lag point, thus breach assumption. Proof completed. + * + * + * COROLLARY #2: Re-weight does NOT affect weighted average + * vruntime of all the entities. + * + * Proof: According to corollary #1, Eq. (1) should be: + * + * (V - v)*w = (V' - v')*w' + * ==> v' = V' - (V - v)*w/w' (4) + * + * According to the weighted average formula, we have: + * + * V' = (WV - wv + w'v') / (W - w + w') + * = (WV - wv + w'(V' - (V - v)w/w')) / (W - w + w') + * = (WV - wv + w'V' - Vw + wv) / (W - w + w') + * = (WV + w'V' - Vw) / (W - w + w') + * + * ==> V'*(W - w + w') = WV + w'V' - Vw + * ==> V' * (W - w) = (W - w) * V (5) + * + * If the entity is the only one in the cfs_rq, then reweight + * always occurs at 0-lag point, so V won't change. Or else + * there are other entities, hence W != w, then Eq. (5) turns + * into V' = V. So V won't change in either case, proof done. + * + * + * So according to corollary #1 & #2, the effect of re-weight + * on vruntime should be: + * + * v' = V' - (V - v) * w / w' (4) + * = V - (V - v) * w / w' + * = V - vl * w / w' + * = V - vl' + */ + if (avruntime != se->vruntime) { + vlag = (s64)(avruntime - se->vruntime); + vlag = div_s64(vlag * old_weight, weight); + se->vruntime = avruntime - vlag; + } + + /* + * DEADLINE + * ======== + * + * When the weight changes, the virtual time slope changes and + * we should adjust the relative virtual deadline accordingly. + * + * d' = v' + (d - v)*w/w' + * = V' - (V - v)*w/w' + (d - v)*w/w' + * = V - (V - v)*w/w' + (d - v)*w/w' + * = V + (d - V)*w/w' + */ + vslice = (s64)(se->deadline - avruntime); + vslice = div_s64(vslice * old_weight, weight); + se->deadline = avruntime + vslice; +} + static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, unsigned long weight) { - unsigned long old_weight = se->load.weight; + bool curr = cfs_rq->curr == se; if (se->on_rq) { /* commit outstanding execution time */ - if (cfs_rq->curr == se) + if (curr) update_curr(cfs_rq); else - avg_vruntime_sub(cfs_rq, se); + __dequeue_entity(cfs_rq, se); update_load_sub(&cfs_rq->load, se->load.weight); } dequeue_load_avg(cfs_rq, se); - update_load_set(&se->load, weight); - if (!se->on_rq) { /* * Because we keep se->vlag = V - v_i, while: lag_i = w_i*(V - v_i), * we need to scale se->vlag when w_i changes. */ - se->vlag = div_s64(se->vlag * old_weight, weight); + se->vlag = div_s64(se->vlag * se->load.weight, weight); } else { - s64 deadline = se->deadline - se->vruntime; - /* - * When the weight changes, the virtual time slope changes and - * we should adjust the relative virtual deadline accordingly. - */ - deadline = div_s64(deadline * old_weight, weight); - se->deadline = se->vruntime + deadline; - if (se != cfs_rq->curr) - min_deadline_cb_propagate(&se->run_node, NULL); + reweight_eevdf(cfs_rq, se, weight); } + update_load_set(&se->load, weight); + #ifdef CONFIG_SMP do { u32 divider = get_pelt_divider(&se->avg); @@ -3672,8 +3771,17 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, enqueue_load_avg(cfs_rq, se); if (se->on_rq) { update_load_add(&cfs_rq->load, se->load.weight); - if (cfs_rq->curr != se) - avg_vruntime_add(cfs_rq, se); + if (!curr) { + /* + * The entity's vruntime has been adjusted, so let's check + * whether the rq-wide min_vruntime needs updated too. Since + * the calculations above require stable min_vruntime rather + * than up-to-date one, we do the update at the end of the + * reweight process. + */ + __enqueue_entity(cfs_rq, se); + update_min_vruntime(cfs_rq); + } } } @@ -3817,14 +3925,11 @@ static void update_cfs_group(struct sched_entity *se) #ifndef CONFIG_SMP shares = READ_ONCE(gcfs_rq->tg->shares); - - if (likely(se->load.weight == shares)) - return; #else - shares = calc_group_shares(gcfs_rq); + shares = calc_group_shares(gcfs_rq); #endif - - reweight_entity(cfs_rq_of(se), se, shares); + if (unlikely(se->load.weight != shares)) + reweight_entity(cfs_rq_of(se), se, shares); } #else /* CONFIG_FAIR_GROUP_SCHED */ From 47a3075109978d9684f7fea59de7c639fcba7145 Mon Sep 17 00:00:00 2001 From: Keisuke Nishimura Date: Tue, 31 Oct 2023 14:38:22 +0100 Subject: [PATCH 0532/1397] sched/fair: Fix the decision for load balance [ Upstream commit 6d7e4782bcf549221b4ccfffec2cf4d1a473f1a3 ] should_we_balance is called for the decision to do load-balancing. When sched ticks invoke this function, only one CPU should return true. However, in the current code, two CPUs can return true. The following situation, where b means busy and i means idle, is an example, because CPU 0 and CPU 2 return true. [0, 1] [2, 3] b b i b This fix checks if there exists an idle CPU with busy sibling(s) after looking for a CPU on an idle core. If some idle CPUs with busy siblings are found, just the first one should do load-balancing. Fixes: b1bfeab9b002 ("sched/fair: Consider the idle state of the whole core for load balance") Signed-off-by: Keisuke Nishimura Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Chen Yu Reviewed-by: Shrikanth Hegde Reviewed-by: Vincent Guittot Link: https://lkml.kernel.org/r/20231031133821.1570861-1-keisuke.nishimura@inria.fr Signed-off-by: Sasha Levin --- kernel/sched/fair.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 035132014817..fa9fff0f9620 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -11121,12 +11121,16 @@ static int should_we_balance(struct lb_env *env) continue; } - /* Are we the first idle CPU? */ + /* + * Are we the first idle core in a non-SMT domain or higher, + * or the first idle CPU in a SMT domain? + */ return cpu == env->dst_cpu; } - if (idle_smt == env->dst_cpu) - return true; + /* Are we the first idle CPU with busy siblings? */ + if (idle_smt != -1) + return idle_smt == env->dst_cpu; /* Are we the first CPU of this group ? */ return group_balance_cpu(sg) == env->dst_cpu; From d0c739099c6932433c10afdcba0a4025fc81933f Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Thu, 9 Nov 2023 19:02:14 -0500 Subject: [PATCH 0533/1397] drm/msm/dsi: use the correct VREG_CTRL_1 value for 4nm cphy [ Upstream commit b3e0f94d15700ac8e8c1c2355834f5d5c753c41d ] Use the same value as the downstream driver. This change is needed for CPHY mode to work correctly. Fixes: 8b034e677111 ("drm/msm/dsi: add support for DSI-PHY on SM8550") Signed-off-by: Jonathan Marek Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/566987/ Link: https://lore.kernel.org/r/20231110000216.29979-1-jonathan@marek.ca Signed-off-by: Abhinav Kumar Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c index 3b1ed02f644d..89a6344bc865 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c @@ -918,7 +918,7 @@ static int dsi_7nm_phy_enable(struct msm_dsi_phy *phy, if ((phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V5_2)) { if (phy->cphy_mode) { vreg_ctrl_0 = 0x45; - vreg_ctrl_1 = 0x45; + vreg_ctrl_1 = 0x41; glbl_rescode_top_ctrl = 0x00; glbl_rescode_bot_ctrl = 0x00; } else { From 3ef5eee900894611f8038ffa119d485e11e32218 Mon Sep 17 00:00:00 2001 From: Gerd Bayer Date: Wed, 15 Nov 2023 16:59:58 +0100 Subject: [PATCH 0534/1397] s390/ism: ism driver implies smc protocol [ Upstream commit d565fa4300d9ebd5ba3bbd259ce841f8dab609d6 ] Since commit a72178cfe855 ("net/smc: Fix dependency of SMC on ISM") you can build the ism code without selecting the SMC network protocol. That leaves some ism functions be reported as unused. Move these functions under the conditional compile with CONFIG_SMC. Also codify the suggestion to also configure the SMC protocol in ism's Kconfig - but with an "imply" rather than a "select" as SMC depends on other config options and allow for a deliberate decision not to build SMC. Also, mention that in ISM's help. Fixes: a72178cfe855 ("net/smc: Fix dependency of SMC on ISM") Reported-by: Randy Dunlap Closes: https://lore.kernel.org/netdev/afd142a2-1fa0-46b9-8b2d-7652d41d3ab8@infradead.org/ Signed-off-by: Gerd Bayer Reviewed-by: Wenjia Zhang Reviewed-by: Simon Horman Acked-by: Randy Dunlap Tested-by: Randy Dunlap # build-tested Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/s390/net/Kconfig | 3 +- drivers/s390/net/ism_drv.c | 93 +++++++++++++++++++------------------- 2 files changed, 48 insertions(+), 48 deletions(-) diff --git a/drivers/s390/net/Kconfig b/drivers/s390/net/Kconfig index 4902d45e929c..c61e6427384c 100644 --- a/drivers/s390/net/Kconfig +++ b/drivers/s390/net/Kconfig @@ -103,10 +103,11 @@ config CCWGROUP config ISM tristate "Support for ISM vPCI Adapter" depends on PCI + imply SMC default n help Select this option if you want to use the Internal Shared Memory - vPCI Adapter. + vPCI Adapter. The adapter can be used with the SMC network protocol. To compile as a module choose M. The module name is ism. If unsure, choose N. diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c index 6df7f377d2f9..81aabbfbbe2c 100644 --- a/drivers/s390/net/ism_drv.c +++ b/drivers/s390/net/ism_drv.c @@ -30,7 +30,6 @@ static const struct pci_device_id ism_device_table[] = { MODULE_DEVICE_TABLE(pci, ism_device_table); static debug_info_t *ism_debug_info; -static const struct smcd_ops ism_ops; #define NO_CLIENT 0xff /* must be >= MAX_CLIENTS */ static struct ism_client *clients[MAX_CLIENTS]; /* use an array rather than */ @@ -289,22 +288,6 @@ static int ism_read_local_gid(struct ism_dev *ism) return ret; } -static int ism_query_rgid(struct ism_dev *ism, u64 rgid, u32 vid_valid, - u32 vid) -{ - union ism_query_rgid cmd; - - memset(&cmd, 0, sizeof(cmd)); - cmd.request.hdr.cmd = ISM_QUERY_RGID; - cmd.request.hdr.len = sizeof(cmd.request); - - cmd.request.rgid = rgid; - cmd.request.vlan_valid = vid_valid; - cmd.request.vlan_id = vid; - - return ism_cmd(ism, &cmd); -} - static void ism_free_dmb(struct ism_dev *ism, struct ism_dmb *dmb) { clear_bit(dmb->sba_idx, ism->sba_bitmap); @@ -429,23 +412,6 @@ static int ism_del_vlan_id(struct ism_dev *ism, u64 vlan_id) return ism_cmd(ism, &cmd); } -static int ism_signal_ieq(struct ism_dev *ism, u64 rgid, u32 trigger_irq, - u32 event_code, u64 info) -{ - union ism_sig_ieq cmd; - - memset(&cmd, 0, sizeof(cmd)); - cmd.request.hdr.cmd = ISM_SIGNAL_IEQ; - cmd.request.hdr.len = sizeof(cmd.request); - - cmd.request.rgid = rgid; - cmd.request.trigger_irq = trigger_irq; - cmd.request.event_code = event_code; - cmd.request.info = info; - - return ism_cmd(ism, &cmd); -} - static unsigned int max_bytes(unsigned int start, unsigned int len, unsigned int boundary) { @@ -503,14 +469,6 @@ u8 *ism_get_seid(void) } EXPORT_SYMBOL_GPL(ism_get_seid); -static u16 ism_get_chid(struct ism_dev *ism) -{ - if (!ism || !ism->pdev) - return 0; - - return to_zpci(ism->pdev)->pchid; -} - static void ism_handle_event(struct ism_dev *ism) { struct ism_event *entry; @@ -569,11 +527,6 @@ static irqreturn_t ism_handle_irq(int irq, void *data) return IRQ_HANDLED; } -static u64 ism_get_local_gid(struct ism_dev *ism) -{ - return ism->local_gid; -} - static int ism_dev_init(struct ism_dev *ism) { struct pci_dev *pdev = ism->pdev; @@ -774,6 +727,22 @@ module_exit(ism_exit); /*************************** SMC-D Implementation *****************************/ #if IS_ENABLED(CONFIG_SMC) +static int ism_query_rgid(struct ism_dev *ism, u64 rgid, u32 vid_valid, + u32 vid) +{ + union ism_query_rgid cmd; + + memset(&cmd, 0, sizeof(cmd)); + cmd.request.hdr.cmd = ISM_QUERY_RGID; + cmd.request.hdr.len = sizeof(cmd.request); + + cmd.request.rgid = rgid; + cmd.request.vlan_valid = vid_valid; + cmd.request.vlan_id = vid; + + return ism_cmd(ism, &cmd); +} + static int smcd_query_rgid(struct smcd_dev *smcd, u64 rgid, u32 vid_valid, u32 vid) { @@ -811,6 +780,23 @@ static int smcd_reset_vlan_required(struct smcd_dev *smcd) return ism_cmd_simple(smcd->priv, ISM_RESET_VLAN); } +static int ism_signal_ieq(struct ism_dev *ism, u64 rgid, u32 trigger_irq, + u32 event_code, u64 info) +{ + union ism_sig_ieq cmd; + + memset(&cmd, 0, sizeof(cmd)); + cmd.request.hdr.cmd = ISM_SIGNAL_IEQ; + cmd.request.hdr.len = sizeof(cmd.request); + + cmd.request.rgid = rgid; + cmd.request.trigger_irq = trigger_irq; + cmd.request.event_code = event_code; + cmd.request.info = info; + + return ism_cmd(ism, &cmd); +} + static int smcd_signal_ieq(struct smcd_dev *smcd, u64 rgid, u32 trigger_irq, u32 event_code, u64 info) { @@ -830,11 +816,24 @@ static int smcd_supports_v2(void) SYSTEM_EID.type[0] != '0'; } +static u64 ism_get_local_gid(struct ism_dev *ism) +{ + return ism->local_gid; +} + static u64 smcd_get_local_gid(struct smcd_dev *smcd) { return ism_get_local_gid(smcd->priv); } +static u16 ism_get_chid(struct ism_dev *ism) +{ + if (!ism || !ism->pdev) + return 0; + + return to_zpci(ism->pdev)->pchid; +} + static u16 smcd_get_chid(struct smcd_dev *smcd) { return ism_get_chid(smcd->priv); From d299ab024330d251ef84470de2c60e798925c039 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Nov 2023 13:12:58 +0000 Subject: [PATCH 0535/1397] rxrpc: Fix RTT determination to use any ACK as a source [ Upstream commit 3798680f2fbbe0ca3ab6138b34e0d161c36497ee ] Fix RTT determination to be able to use any type of ACK as the response from which RTT can be calculated provided its ack.serial is non-zero and matches the serial number of an outgoing DATA or ACK packet. This shouldn't be limited to REQUESTED-type ACKs as these can have other types substituted for them for things like duplicate or out-of-order packets. Fixes: 4700c4d80b7b ("rxrpc: Fix loss of RTT samples due to interposed ACK") Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/trace/events/rxrpc.h | 2 +- net/rxrpc/input.c | 35 ++++++++++++++++------------------- 2 files changed, 17 insertions(+), 20 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 4c53a5ef6257..f7e537f64db4 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -328,7 +328,7 @@ E_(rxrpc_rtt_tx_ping, "PING") #define rxrpc_rtt_rx_traces \ - EM(rxrpc_rtt_rx_cancel, "CNCL") \ + EM(rxrpc_rtt_rx_other_ack, "OACK") \ EM(rxrpc_rtt_rx_obsolete, "OBSL") \ EM(rxrpc_rtt_rx_lost, "LOST") \ EM(rxrpc_rtt_rx_ping_response, "PONG") \ diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 030d64f282f3..3f9594d12519 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -643,12 +643,8 @@ static void rxrpc_complete_rtt_probe(struct rxrpc_call *call, clear_bit(i + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail); smp_mb(); /* Read data before setting avail bit */ set_bit(i, &call->rtt_avail); - if (type != rxrpc_rtt_rx_cancel) - rxrpc_peer_add_rtt(call, type, i, acked_serial, ack_serial, - sent_at, resp_time); - else - trace_rxrpc_rtt_rx(call, rxrpc_rtt_rx_cancel, i, - orig_serial, acked_serial, 0, 0); + rxrpc_peer_add_rtt(call, type, i, acked_serial, ack_serial, + sent_at, resp_time); matched = true; } @@ -801,20 +797,21 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) summary.ack_reason, nr_acks); rxrpc_inc_stat(call->rxnet, stat_rx_acks[ack.reason]); - switch (ack.reason) { - case RXRPC_ACK_PING_RESPONSE: - rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial, - rxrpc_rtt_rx_ping_response); - break; - case RXRPC_ACK_REQUESTED: - rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial, - rxrpc_rtt_rx_requested_ack); - break; - default: - if (acked_serial != 0) + if (acked_serial != 0) { + switch (ack.reason) { + case RXRPC_ACK_PING_RESPONSE: rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial, - rxrpc_rtt_rx_cancel); - break; + rxrpc_rtt_rx_ping_response); + break; + case RXRPC_ACK_REQUESTED: + rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial, + rxrpc_rtt_rx_requested_ack); + break; + default: + rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial, + rxrpc_rtt_rx_other_ack); + break; + } } if (ack.reason == RXRPC_ACK_PING) { From 28612371390da94e9100d2d0384c282099918d2f Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Nov 2023 13:12:59 +0000 Subject: [PATCH 0536/1397] rxrpc: Defer the response to a PING ACK until we've parsed it [ Upstream commit 1a01319feef7047aa2ba400ffa3e047776aa29ca ] Defer the generation of a PING RESPONSE ACK in response to a PING ACK until we've parsed the PING ACK so that we pick up any changes to the packet queue so that we can update ackinfo. This is also applied to an ACK generated in response to an ACK with the REQUEST_ACK flag set. Note that whilst the problem was added in commit 248f219cb8bc, it didn't really matter at that point because the ACK was proposed in softirq mode and generated asynchronously later in process context, taking the latest values at the time. But this fix is only needed since the move to parse incoming packets in an I/O thread rather than in softirq and generate the ACK at point of proposal (b0346843b1076b34a0278ff601f8f287535cb064). Fixes: 248f219cb8bc ("rxrpc: Rewrite the data and ack handling code") Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/rxrpc/input.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 3f9594d12519..92495e73b869 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -814,14 +814,6 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) } } - if (ack.reason == RXRPC_ACK_PING) { - rxrpc_send_ACK(call, RXRPC_ACK_PING_RESPONSE, ack_serial, - rxrpc_propose_ack_respond_to_ping); - } else if (sp->hdr.flags & RXRPC_REQUEST_ACK) { - rxrpc_send_ACK(call, RXRPC_ACK_REQUESTED, ack_serial, - rxrpc_propose_ack_respond_to_ack); - } - /* If we get an EXCEEDS_WINDOW ACK from the server, it probably * indicates that the client address changed due to NAT. The server * lost the call because it switched to a different peer. @@ -832,7 +824,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_is_client_call(call)) { rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, 0, -ENETRESET); - return; + goto send_response; } /* If we get an OUT_OF_SEQUENCE ACK from the server, that can also @@ -846,7 +838,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_is_client_call(call)) { rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, 0, -ENETRESET); - return; + goto send_response; } /* Discard any out-of-order or duplicate ACKs (outside lock). */ @@ -854,7 +846,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) trace_rxrpc_rx_discard_ack(call->debug_id, ack_serial, first_soft_ack, call->acks_first_seq, prev_pkt, call->acks_prev_seq); - return; + goto send_response; } info.rxMTU = 0; @@ -894,7 +886,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) case RXRPC_CALL_SERVER_AWAIT_ACK: break; default: - return; + goto send_response; } if (before(hard_ack, call->acks_hard_ack) || @@ -906,7 +898,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) if (after(hard_ack, call->acks_hard_ack)) { if (rxrpc_rotate_tx_window(call, hard_ack, &summary)) { rxrpc_end_tx_phase(call, false, rxrpc_eproto_unexpected_ack); - return; + goto send_response; } } @@ -924,6 +916,14 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_propose_ack_ping_for_lost_reply); rxrpc_congestion_management(call, skb, &summary, acked_serial); + +send_response: + if (ack.reason == RXRPC_ACK_PING) + rxrpc_send_ACK(call, RXRPC_ACK_PING_RESPONSE, ack_serial, + rxrpc_propose_ack_respond_to_ping); + else if (sp->hdr.flags & RXRPC_REQUEST_ACK) + rxrpc_send_ACK(call, RXRPC_ACK_REQUESTED, ack_serial, + rxrpc_propose_ack_respond_to_ack); } /* From 3a4ef51589dac30c66558642b0a678c795036a08 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2023 16:26:59 +0000 Subject: [PATCH 0537/1397] afs: Fix afs_server_list to be cleaned up with RCU [ Upstream commit e6bace7313d61e31f2b16fa3d774fd8cb3cb869e ] afs_server_list is accessed with the rcu_read_lock() held from volume->servers, so it needs to be cleaned up correctly. Fix this by using kfree_rcu() instead of kfree(). Fixes: 8a070a964877 ("afs: Detect cell aliases 1 - Cells with root volumes") Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Signed-off-by: Sasha Levin --- fs/afs/internal.h | 1 + fs/afs/server_list.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/afs/internal.h b/fs/afs/internal.h index da73b97e19a9..5041eae64423 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -553,6 +553,7 @@ struct afs_server_entry { }; struct afs_server_list { + struct rcu_head rcu; afs_volid_t vids[AFS_MAXTYPES]; /* Volume IDs */ refcount_t usage; unsigned char nr_servers; diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c index ed9056703505..b59896b1de0a 100644 --- a/fs/afs/server_list.c +++ b/fs/afs/server_list.c @@ -17,7 +17,7 @@ void afs_put_serverlist(struct afs_net *net, struct afs_server_list *slist) for (i = 0; i < slist->nr_servers; i++) afs_unuse_server(net, slist->servers[i].server, afs_server_trace_put_slist); - kfree(slist); + kfree_rcu(slist, rcu); } } From 18350d280999cbde37fcac4f55117fbb40972715 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 8 Jun 2023 09:43:54 +0100 Subject: [PATCH 0538/1397] afs: Make error on cell lookup failure consistent with OpenAFS [ Upstream commit 2a4ca1b4b77850544408595e2433f5d7811a9daa ] When kafs tries to look up a cell in the DNS or the local config, it will translate a lookup failure into EDESTADDRREQ whereas OpenAFS translates it into ENOENT. Applications such as West expect the latter behaviour and fail if they see the former. This can be seen by trying to mount an unknown cell: # mount -t afs %example.com:cell.root /mnt mount: /mnt: mount(2) system call failed: Destination address required. Fixes: 4d673da14533 ("afs: Support the AFS dynamic root") Reported-by: Markus Suvanto Link: https://bugzilla.kernel.org/show_bug.cgi?id=216637 Signed-off-by: David Howells Reviewed-by: Jeffrey Altman cc: Marc Dionne cc: linux-afs@lists.infradead.org Signed-off-by: Sasha Levin --- fs/afs/dynroot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index 95bcbd7654d1..8081d68004d0 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -132,8 +132,8 @@ static int afs_probe_cell_name(struct dentry *dentry) ret = dns_query(net->net, "afsdb", name, len, "srv=1", NULL, NULL, false); - if (ret == -ENODATA) - ret = -EDESTADDRREQ; + if (ret == -ENODATA || ret == -ENOKEY) + ret = -ENOENT; return ret; } From 3902d1a1b6ddb1ba247b532ee7621d1cd8ac59c9 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 17 Nov 2023 10:35:23 +0800 Subject: [PATCH 0539/1397] blk-cgroup: avoid to warn !rcu_read_lock_held() in blkg_lookup() [ Upstream commit 35a99d6557cacbc177314735342f77a2dda41872 ] So far, all callers either holds spin lock or rcu read explicitly, and most of the caller has added WARN_ON_ONCE(!rcu_read_lock_held()) or lockdep_assert_held(&disk->queue->queue_lock). Remove WARN_ON_ONCE(!rcu_read_lock_held()) from blkg_lookup() for killing the false positive warning from blkg_conf_prep(). Reported-by: Changhui Zhong Fixes: 83462a6c971c ("blkcg: Drop unnecessary RCU read [un]locks from blkg_conf_prep/finish()") Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20231117023527.3188627-3-ming.lei@redhat.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-cgroup.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 624c03c8fe64..fd482439afbc 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -249,8 +249,6 @@ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, { struct blkcg_gq *blkg; - WARN_ON_ONCE(!rcu_read_lock_held()); - if (blkcg == &blkcg_root) return q->root_blkg; From 21bffb862ca0ce02f4a0f742e7e616fb75071362 Mon Sep 17 00:00:00 2001 From: Xuxin Xiong Date: Tue, 14 Nov 2023 12:42:05 +0800 Subject: [PATCH 0540/1397] drm/panel: auo,b101uan08.3: Fine tune the panel power sequence [ Upstream commit 6965809e526917b73c8f9178173184dcf13cec4b ] For "auo,b101uan08.3" this panel, it is stipulated in the panel spec that MIPI needs to keep the LP11 state before the lcm_reset pin is pulled high. Fixes: 56ad624b4cb5 ("drm/panel: support for auo, b101uan08.3 wuxga dsi video mode panel") Signed-off-by: Xuxin Xiong Reviewed-by: Douglas Anderson Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20231114044205.613421-1-xuxinxiong@huaqin.corp-partner.google.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c index c9087f474cbc..980b10244d4e 100644 --- a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c +++ b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c @@ -2049,6 +2049,7 @@ static const struct panel_desc auo_b101uan08_3_desc = { .mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_SYNC_PULSE | MIPI_DSI_MODE_LPM, .init_cmds = auo_b101uan08_3_init_cmd, + .lp11_before_reset = true, }; static const struct drm_display_mode boe_tv105wum_nw0_default_mode = { From 3fb0fa08641903304b9d81d52a379ff031dc41d4 Mon Sep 17 00:00:00 2001 From: Stefan Berger Date: Mon, 2 Oct 2023 08:57:33 -0400 Subject: [PATCH 0541/1397] fs: Pass AT_GETATTR_NOSEC flag to getattr interface function [ Upstream commit 8a924db2d7b5eb69ba08b1a0af46e9f1359a9bdf ] When vfs_getattr_nosec() calls a filesystem's getattr interface function then the 'nosec' should propagate into this function so that vfs_getattr_nosec() can again be called from the filesystem's gettattr rather than vfs_getattr(). The latter would add unnecessary security checks that the initial vfs_getattr_nosec() call wanted to avoid. Therefore, introduce the getattr flag GETATTR_NOSEC and allow to pass with the new getattr_flags parameter to the getattr interface function. In overlayfs and ecryptfs use this flag to determine which one of the two functions to call. In a recent code change introduced to IMA vfs_getattr_nosec() ended up calling vfs_getattr() in overlayfs, which in turn called security_inode_getattr() on an exiting process that did not have current->fs set anymore, which then caused a kernel NULL pointer dereference. With this change the call to security_inode_getattr() can be avoided, thus avoiding the NULL pointer dereference. Reported-by: Fixes: db1d1e8b9867 ("IMA: use vfs_getattr_nosec to get the i_version") Cc: Alexander Viro Cc: Cc: Miklos Szeredi Cc: Amir Goldstein Cc: Tyler Hicks Cc: Mimi Zohar Suggested-by: Christian Brauner Co-developed-by: Amir Goldstein Signed-off-by: Stefan Berger Link: https://lore.kernel.org/r/20231002125733.1251467-1-stefanb@linux.vnet.ibm.com Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- fs/ecryptfs/inode.c | 12 ++++++++++-- fs/overlayfs/inode.c | 10 +++++----- fs/overlayfs/overlayfs.h | 8 ++++++++ fs/stat.c | 6 +++++- include/uapi/linux/fcntl.h | 3 +++ 5 files changed, 31 insertions(+), 8 deletions(-) diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 992d9c7e64ae..5ab4b87888a7 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -998,6 +998,14 @@ static int ecryptfs_getattr_link(struct mnt_idmap *idmap, return rc; } +static int ecryptfs_do_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) +{ + if (flags & AT_GETATTR_NOSEC) + return vfs_getattr_nosec(path, stat, request_mask, flags); + return vfs_getattr(path, stat, request_mask, flags); +} + static int ecryptfs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) @@ -1006,8 +1014,8 @@ static int ecryptfs_getattr(struct mnt_idmap *idmap, struct kstat lower_stat; int rc; - rc = vfs_getattr(ecryptfs_dentry_to_lower_path(dentry), &lower_stat, - request_mask, flags); + rc = ecryptfs_do_getattr(ecryptfs_dentry_to_lower_path(dentry), + &lower_stat, request_mask, flags); if (!rc) { fsstack_copy_attr_all(d_inode(dentry), ecryptfs_inode_to_lower(d_inode(dentry))); diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 83ef66644c21..fca29dba7b14 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -171,7 +171,7 @@ int ovl_getattr(struct mnt_idmap *idmap, const struct path *path, type = ovl_path_real(dentry, &realpath); old_cred = ovl_override_creds(dentry->d_sb); - err = vfs_getattr(&realpath, stat, request_mask, flags); + err = ovl_do_getattr(&realpath, stat, request_mask, flags); if (err) goto out; @@ -196,8 +196,8 @@ int ovl_getattr(struct mnt_idmap *idmap, const struct path *path, (!is_dir ? STATX_NLINK : 0); ovl_path_lower(dentry, &realpath); - err = vfs_getattr(&realpath, &lowerstat, - lowermask, flags); + err = ovl_do_getattr(&realpath, &lowerstat, lowermask, + flags); if (err) goto out; @@ -249,8 +249,8 @@ int ovl_getattr(struct mnt_idmap *idmap, const struct path *path, ovl_path_lowerdata(dentry, &realpath); if (realpath.dentry) { - err = vfs_getattr(&realpath, &lowerdatastat, - lowermask, flags); + err = ovl_do_getattr(&realpath, &lowerdatastat, + lowermask, flags); if (err) goto out; } else { diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 9817b2dcb132..09ca82ed0f8c 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -397,6 +397,14 @@ static inline bool ovl_open_flags_need_copy_up(int flags) return ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC)); } +static inline int ovl_do_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) +{ + if (flags & AT_GETATTR_NOSEC) + return vfs_getattr_nosec(path, stat, request_mask, flags); + return vfs_getattr(path, stat, request_mask, flags); +} + /* util.c */ int ovl_want_write(struct dentry *dentry); void ovl_drop_write(struct dentry *dentry); diff --git a/fs/stat.c b/fs/stat.c index d43a5cc1bfa4..5375be5f97cc 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -133,7 +133,8 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat, idmap = mnt_idmap(path->mnt); if (inode->i_op->getattr) return inode->i_op->getattr(idmap, path, stat, - request_mask, query_flags); + request_mask, + query_flags | AT_GETATTR_NOSEC); generic_fillattr(idmap, request_mask, inode, stat); return 0; @@ -166,6 +167,9 @@ int vfs_getattr(const struct path *path, struct kstat *stat, { int retval; + if (WARN_ON_ONCE(query_flags & AT_GETATTR_NOSEC)) + return -EPERM; + retval = security_inode_getattr(path); if (retval) return retval; diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index 6c80f96049bd..282e90aeb163 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -116,5 +116,8 @@ #define AT_HANDLE_FID AT_REMOVEDIR /* file handle is needed to compare object identity and may not be usable to open_by_handle_at(2) */ +#if defined(__KERNEL__) +#define AT_GETATTR_NOSEC 0x80000000 +#endif #endif /* _UAPI_LINUX_FCNTL_H */ From c23425107914975759f3a6a33edf2f663a6441d6 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 9 Oct 2023 00:33:15 +0200 Subject: [PATCH 0542/1397] drm/panel: simple: Fix Innolux G101ICE-L01 bus flags [ Upstream commit 06fc41b09cfbc02977acd9189473593a37d82d9b ] Add missing .bus_flags = DRM_BUS_FLAG_DE_HIGH to this panel description, ones which match both the datasheet and the panel display_timing flags . Fixes: 1e29b840af9f ("drm/panel: simple: Add Innolux G101ICE-L01 panel") Signed-off-by: Marek Vasut Reviewed-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20231008223315.279215-1-marex@denx.de Signed-off-by: Sasha Levin --- drivers/gpu/drm/panel/panel-simple.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index dd7928d9570f..944eb83ff25b 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -2349,6 +2349,7 @@ static const struct panel_desc innolux_g101ice_l01 = { .disable = 200, }, .bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG, + .bus_flags = DRM_BUS_FLAG_DE_HIGH, .connector_type = DRM_MODE_CONNECTOR_LVDS, }; From d6569fecd6b3ed9344c3a59a4f3086595c903a39 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 9 Oct 2023 00:32:56 +0200 Subject: [PATCH 0543/1397] drm/panel: simple: Fix Innolux G101ICE-L01 timings [ Upstream commit 3f9a91b6c00e655d27bd785dcda1742dbdc31bda ] The Innolux G101ICE-L01 datasheet [1] page 17 table 6.1 INPUT SIGNAL TIMING SPECIFICATIONS indicates that maximum vertical blanking time is 40 lines. Currently the driver uses 29 lines. Fix it, and since this panel is a DE panel, adjust the timings to make them less hostile to controllers which cannot do 1 px HSA/VSA, distribute the delays evenly between all three parts. [1] https://www.data-modul.com/sites/default/files/products/G101ICE-L01-C2-specification-12042389.pdf Fixes: 1e29b840af9f ("drm/panel: simple: Add Innolux G101ICE-L01 panel") Signed-off-by: Marek Vasut Reviewed-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20231008223256.279196-1-marex@denx.de Signed-off-by: Sasha Levin --- drivers/gpu/drm/panel/panel-simple.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 944eb83ff25b..6e46e55d29a9 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -2326,13 +2326,13 @@ static const struct panel_desc innolux_g070y2_t02 = { static const struct display_timing innolux_g101ice_l01_timing = { .pixelclock = { 60400000, 71100000, 74700000 }, .hactive = { 1280, 1280, 1280 }, - .hfront_porch = { 41, 80, 100 }, - .hback_porch = { 40, 79, 99 }, - .hsync_len = { 1, 1, 1 }, + .hfront_porch = { 30, 60, 70 }, + .hback_porch = { 30, 60, 70 }, + .hsync_len = { 22, 40, 60 }, .vactive = { 800, 800, 800 }, - .vfront_porch = { 5, 11, 14 }, - .vback_porch = { 4, 11, 14 }, - .vsync_len = { 1, 1, 1 }, + .vfront_porch = { 3, 8, 14 }, + .vback_porch = { 3, 8, 14 }, + .vsync_len = { 4, 7, 12 }, .flags = DISPLAY_FLAGS_DE_HIGH, }; From 61a55071653974dab172d4c5d699bb365cfd13c9 Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Fri, 17 Nov 2023 18:11:08 +0800 Subject: [PATCH 0544/1397] net: wangxun: fix kernel panic due to null pointer [ Upstream commit 8ba2c459668cfe2aaacc5ebcd35b4b9ef8643013 ] When the device uses a custom subsystem vendor ID, the function wx_sw_init() returns before the memory of 'wx->mac_table' is allocated. The null pointer will causes the kernel panic. Fixes: 79625f45ca73 ("net: wangxun: Move MAC address handling to libwx") Signed-off-by: Jiawen Wu Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/wangxun/libwx/wx_hw.c | 8 +++++--- drivers/net/ethernet/wangxun/ngbe/ngbe_main.c | 4 +--- drivers/net/ethernet/wangxun/txgbe/txgbe_main.c | 4 +--- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c index 85dc16faca54..52130df26aee 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c @@ -1677,10 +1677,12 @@ int wx_sw_init(struct wx *wx) wx->subsystem_device_id = pdev->subsystem_device; } else { err = wx_flash_read_dword(wx, 0xfffdc, &ssid); - if (!err) - wx->subsystem_device_id = swab16((u16)ssid); + if (err < 0) { + wx_err(wx, "read of internal subsystem device id failed\n"); + return err; + } - return err; + wx->subsystem_device_id = swab16((u16)ssid); } wx->mac_table = kcalloc(wx->mac.num_rar_entries, diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c index 2b431db6085a..a4d63d2f3c5b 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c @@ -121,10 +121,8 @@ static int ngbe_sw_init(struct wx *wx) /* PCI config space info */ err = wx_sw_init(wx); - if (err < 0) { - wx_err(wx, "read of internal subsystem device id failed\n"); + if (err < 0) return err; - } /* mac type, phy type , oem type */ ngbe_init_type_code(wx); diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index 5c3aed516ac2..d60c26ba0ba4 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c @@ -362,10 +362,8 @@ static int txgbe_sw_init(struct wx *wx) /* PCI config space info */ err = wx_sw_init(wx); - if (err < 0) { - wx_err(wx, "read of internal subsystem device id failed\n"); + if (err < 0) return err; - } txgbe_init_type_code(wx); From b605d23cb162bf454e621e234c127ee98bb60c8d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 17 Nov 2023 14:17:33 +0000 Subject: [PATCH 0545/1397] wireguard: use DEV_STATS_INC() [ Upstream commit 93da8d75a66568ba4bb5b14ad2833acd7304cd02 ] wg_xmit() can be called concurrently, KCSAN reported [1] some device stats updates can be lost. Use DEV_STATS_INC() for this unlikely case. [1] BUG: KCSAN: data-race in wg_xmit / wg_xmit read-write to 0xffff888104239160 of 8 bytes by task 1375 on cpu 0: wg_xmit+0x60f/0x680 drivers/net/wireguard/device.c:231 __netdev_start_xmit include/linux/netdevice.h:4918 [inline] netdev_start_xmit include/linux/netdevice.h:4932 [inline] xmit_one net/core/dev.c:3543 [inline] dev_hard_start_xmit+0x11b/0x3f0 net/core/dev.c:3559 ... read-write to 0xffff888104239160 of 8 bytes by task 1378 on cpu 1: wg_xmit+0x60f/0x680 drivers/net/wireguard/device.c:231 __netdev_start_xmit include/linux/netdevice.h:4918 [inline] netdev_start_xmit include/linux/netdevice.h:4932 [inline] xmit_one net/core/dev.c:3543 [inline] dev_hard_start_xmit+0x11b/0x3f0 net/core/dev.c:3559 ... v2: also change wg_packet_consume_data_done() (Hangbin Liu) and wg_packet_purge_staged_packets() Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Jason A. Donenfeld Cc: Hangbin Liu Signed-off-by: Jason A. Donenfeld Reviewed-by: Hangbin Liu Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/wireguard/device.c | 4 ++-- drivers/net/wireguard/receive.c | 12 ++++++------ drivers/net/wireguard/send.c | 3 ++- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c index 258dcc103921..deb9636b0ecf 100644 --- a/drivers/net/wireguard/device.c +++ b/drivers/net/wireguard/device.c @@ -210,7 +210,7 @@ static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev) */ while (skb_queue_len(&peer->staged_packet_queue) > MAX_STAGED_PACKETS) { dev_kfree_skb(__skb_dequeue(&peer->staged_packet_queue)); - ++dev->stats.tx_dropped; + DEV_STATS_INC(dev, tx_dropped); } skb_queue_splice_tail(&packets, &peer->staged_packet_queue); spin_unlock_bh(&peer->staged_packet_queue.lock); @@ -228,7 +228,7 @@ static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev) else if (skb->protocol == htons(ETH_P_IPV6)) icmpv6_ndo_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); err: - ++dev->stats.tx_errors; + DEV_STATS_INC(dev, tx_errors); kfree_skb(skb); return ret; } diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c index 0b3f0c843550..a176653c8861 100644 --- a/drivers/net/wireguard/receive.c +++ b/drivers/net/wireguard/receive.c @@ -416,20 +416,20 @@ static void wg_packet_consume_data_done(struct wg_peer *peer, net_dbg_skb_ratelimited("%s: Packet has unallowed src IP (%pISc) from peer %llu (%pISpfsc)\n", dev->name, skb, peer->internal_id, &peer->endpoint.addr); - ++dev->stats.rx_errors; - ++dev->stats.rx_frame_errors; + DEV_STATS_INC(dev, rx_errors); + DEV_STATS_INC(dev, rx_frame_errors); goto packet_processed; dishonest_packet_type: net_dbg_ratelimited("%s: Packet is neither ipv4 nor ipv6 from peer %llu (%pISpfsc)\n", dev->name, peer->internal_id, &peer->endpoint.addr); - ++dev->stats.rx_errors; - ++dev->stats.rx_frame_errors; + DEV_STATS_INC(dev, rx_errors); + DEV_STATS_INC(dev, rx_frame_errors); goto packet_processed; dishonest_packet_size: net_dbg_ratelimited("%s: Packet has incorrect size from peer %llu (%pISpfsc)\n", dev->name, peer->internal_id, &peer->endpoint.addr); - ++dev->stats.rx_errors; - ++dev->stats.rx_length_errors; + DEV_STATS_INC(dev, rx_errors); + DEV_STATS_INC(dev, rx_length_errors); goto packet_processed; packet_processed: dev_kfree_skb(skb); diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c index 95c853b59e1d..0d48e0f4a1ba 100644 --- a/drivers/net/wireguard/send.c +++ b/drivers/net/wireguard/send.c @@ -333,7 +333,8 @@ static void wg_packet_create_data(struct wg_peer *peer, struct sk_buff *first) void wg_packet_purge_staged_packets(struct wg_peer *peer) { spin_lock_bh(&peer->staged_packet_queue.lock); - peer->device->dev->stats.tx_dropped += peer->staged_packet_queue.qlen; + DEV_STATS_ADD(peer->device->dev, tx_dropped, + peer->staged_packet_queue.qlen); __skb_queue_purge(&peer->staged_packet_queue); spin_unlock_bh(&peer->staged_packet_queue.lock); } From 8da4a34c8f891e87bc9be013b315f9e2c198df47 Mon Sep 17 00:00:00 2001 From: Suman Ghosh Date: Fri, 17 Nov 2023 16:10:18 +0530 Subject: [PATCH 0546/1397] octeontx2-pf: Fix memory leak during interface down [ Upstream commit 5f228d7c8a539714c1e9b7e7534f76bb7979f268 ] During 'ifconfig down' one RSS memory was not getting freed. This patch fixes the same. Fixes: 81a4362016e7 ("octeontx2-pf: Add RSS multi group support") Signed-off-by: Suman Ghosh Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 91b99fd70361..ba95ac913274 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1934,6 +1934,8 @@ int otx2_stop(struct net_device *netdev) /* Clear RSS enable flag */ rss = &pf->hw.rss_info; rss->enable = false; + if (!netif_is_rxfh_configured(netdev)) + kfree(rss->rss_ctx[DEFAULT_RSS_CONTEXT_GROUP]); /* Cleanup Queue IRQ */ vec = pci_irq_vector(pf->pdev, From 6fdfac352978516c89349926d1922fabd30b1660 Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Tue, 31 Oct 2023 04:00:07 +0000 Subject: [PATCH 0547/1397] ata: pata_isapnp: Add missing error check for devm_ioport_map() [ Upstream commit a6925165ea82b7765269ddd8dcad57c731aa00de ] Add missing error return check for devm_ioport_map() and return the error if this function call fails. Fixes: 0d5ff566779f ("libata: convert to iomap") Signed-off-by: Chen Ni Reviewed-by: Sergey Shtylyov Signed-off-by: Damien Le Moal Signed-off-by: Sasha Levin --- drivers/ata/pata_isapnp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/ata/pata_isapnp.c b/drivers/ata/pata_isapnp.c index 25a63d043c8e..0f77e0424066 100644 --- a/drivers/ata/pata_isapnp.c +++ b/drivers/ata/pata_isapnp.c @@ -82,6 +82,9 @@ static int isapnp_init_one(struct pnp_dev *idev, const struct pnp_device_id *dev if (pnp_port_valid(idev, 1)) { ctl_addr = devm_ioport_map(&idev->dev, pnp_port_start(idev, 1), 1); + if (!ctl_addr) + return -ENOMEM; + ap->ioaddr.altstatus_addr = ctl_addr; ap->ioaddr.ctl_addr = ctl_addr; ap->ops = &isapnp_port_ops; From 1911b120d414fed5ef17a4f47fa1186f0ed73527 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Wed, 15 Nov 2023 11:54:03 +0100 Subject: [PATCH 0548/1397] drm/i915: do not clean GT table on error path [ Upstream commit 0561794b6b642b84b879bf97061c4b4fa692839e ] The only task of intel_gt_release_all is to zero gt table. Calling it on error path prevents intel_gt_driver_late_release_all (called from i915_driver_late_release) to cleanup GTs, causing leakage. After i915_driver_late_release GT array is not used anymore so it does not need cleaning at all. Sample leak report: BUG i915_request (...): Objects remaining in i915_request on __kmem_cache_shutdown() ... Object 0xffff888113420040 @offset=64 Allocated in __i915_request_create+0x75/0x610 [i915] age=18339 cpu=1 pid=1454 kmem_cache_alloc+0x25b/0x270 __i915_request_create+0x75/0x610 [i915] i915_request_create+0x109/0x290 [i915] __engines_record_defaults+0xca/0x440 [i915] intel_gt_init+0x275/0x430 [i915] i915_gem_init+0x135/0x2c0 [i915] i915_driver_probe+0x8d1/0xdc0 [i915] v2: removed whole intel_gt_release_all Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/8489 Fixes: bec68cc9ea42 ("drm/i915: Prepare for multiple GTs") Signed-off-by: Andrzej Hajda Reviewed-by: Tvrtko Ursulin Reviewed-by: Nirmoy Das Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20231115-dont_clean_gt_on_error_path-v2-1-54250125470a@intel.com (cherry picked from commit e899505533852bf1da133f2f4c9a9655ff77f7e5) Signed-off-by: Jani Nikula Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/gt/intel_gt.c | 11 ----------- drivers/gpu/drm/i915/i915_driver.c | 4 +--- 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 449f0b7fc843..95631e8f39e7 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -967,8 +967,6 @@ int intel_gt_probe_all(struct drm_i915_private *i915) err: i915_probe_error(i915, "Failed to initialize %s! (%d)\n", gtdef->name, ret); - intel_gt_release_all(i915); - return ret; } @@ -987,15 +985,6 @@ int intel_gt_tiles_init(struct drm_i915_private *i915) return 0; } -void intel_gt_release_all(struct drm_i915_private *i915) -{ - struct intel_gt *gt; - unsigned int id; - - for_each_gt(gt, i915, id) - i915->gt[id] = NULL; -} - void intel_gt_info_print(const struct intel_gt_info *info, struct drm_printer *p) { diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index ec4d26b3c17c..8dc5f85b7747 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -777,7 +777,7 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) ret = i915_driver_mmio_probe(i915); if (ret < 0) - goto out_tiles_cleanup; + goto out_runtime_pm_put; ret = i915_driver_hw_probe(i915); if (ret < 0) @@ -837,8 +837,6 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) i915_ggtt_driver_late_release(i915); out_cleanup_mmio: i915_driver_mmio_release(i915); -out_tiles_cleanup: - intel_gt_release_all(i915); out_runtime_pm_put: enable_rpm_wakeref_asserts(&i915->runtime_pm); i915_driver_late_release(i915); From 3461e3bff21f00487f0be2ecfe30b05abdb107df Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 25 Oct 2023 16:10:17 +0200 Subject: [PATCH 0549/1397] filemap: add a per-mapping stable writes flag [ Upstream commit 762321dab9a72760bf9aec48362f932717c9424d ] folio_wait_stable waits for writeback to finish before modifying the contents of a folio again, e.g. to support check summing of the data in the block integrity code. Currently this behavior is controlled by the SB_I_STABLE_WRITES flag on the super_block, which means it is uniform for the entire file system. This is wrong for the block device pseudofs which is shared by all block devices, or file systems that can use multiple devices like XFS witht the RT subvolume or btrfs (although btrfs currently reimplements folio_wait_stable anyway). Add a per-address_space AS_STABLE_WRITES flag to control the behavior in a more fine grained way. The existing SB_I_STABLE_WRITES is kept to initialize AS_STABLE_WRITES to the existing default which covers most cases. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20231025141020.192413-2-hch@lst.de Tested-by: Ilya Dryomov Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner Stable-dep-of: 1898efcdbed3 ("block: update the stable_writes flag in bdev_add") Signed-off-by: Sasha Levin --- fs/inode.c | 2 ++ include/linux/pagemap.h | 17 +++++++++++++++++ mm/page-writeback.c | 2 +- 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/fs/inode.c b/fs/inode.c index 84bc3c76e5cc..ae1a6410b53d 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -215,6 +215,8 @@ int inode_init_always(struct super_block *sb, struct inode *inode) lockdep_set_class_and_name(&mapping->invalidate_lock, &sb->s_type->invalidate_lock_key, "mapping.invalidate_lock"); + if (sb->s_iflags & SB_I_STABLE_WRITES) + mapping_set_stable_writes(mapping); inode->i_private = NULL; inode->i_mapping = mapping; INIT_HLIST_HEAD(&inode->i_dentry); /* buggered by rcu freeing */ diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 351c3b7f93a1..8c9608b217b0 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -204,6 +204,8 @@ enum mapping_flags { AS_NO_WRITEBACK_TAGS = 5, AS_LARGE_FOLIO_SUPPORT = 6, AS_RELEASE_ALWAYS, /* Call ->release_folio(), even if no private data */ + AS_STABLE_WRITES, /* must wait for writeback before modifying + folio contents */ }; /** @@ -289,6 +291,21 @@ static inline void mapping_clear_release_always(struct address_space *mapping) clear_bit(AS_RELEASE_ALWAYS, &mapping->flags); } +static inline bool mapping_stable_writes(const struct address_space *mapping) +{ + return test_bit(AS_STABLE_WRITES, &mapping->flags); +} + +static inline void mapping_set_stable_writes(struct address_space *mapping) +{ + set_bit(AS_STABLE_WRITES, &mapping->flags); +} + +static inline void mapping_clear_stable_writes(struct address_space *mapping) +{ + clear_bit(AS_STABLE_WRITES, &mapping->flags); +} + static inline gfp_t mapping_gfp_mask(struct address_space * mapping) { return mapping->gfp_mask; diff --git a/mm/page-writeback.c b/mm/page-writeback.c index b8d3d7040a50..4656534b8f5c 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -3110,7 +3110,7 @@ EXPORT_SYMBOL_GPL(folio_wait_writeback_killable); */ void folio_wait_stable(struct folio *folio) { - if (folio_inode(folio)->i_sb->s_iflags & SB_I_STABLE_WRITES) + if (mapping_stable_writes(folio_mapping(folio))) folio_wait_writeback(folio); } EXPORT_SYMBOL_GPL(folio_wait_stable); From 468a549d2ab57c89085f5e8f9a308908c6d689e5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 25 Oct 2023 16:10:18 +0200 Subject: [PATCH 0550/1397] block: update the stable_writes flag in bdev_add [ Upstream commit 1898efcdbed32bb1c67269c985a50bab0dbc9493 ] Propagate the per-queue stable_write flags into each bdev inode in bdev_add. This makes sure devices that require stable writes have it set for I/O on the block device node as well. Note that this doesn't cover the case of a flag changing on a live device yet. We should handle that as well, but I plan to cover it as part of a more general rework of how changing runtime paramters on block devices works. Fixes: 1cb039f3dc16 ("bdi: replace BDI_CAP_STABLE_WRITES with a queue and a sb flag") Reported-by: Ilya Dryomov Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20231025141020.192413-3-hch@lst.de Tested-by: Ilya Dryomov Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- block/bdev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block/bdev.c b/block/bdev.c index f3b13aa1b7d4..04dba25b0019 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -425,6 +425,8 @@ void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors) void bdev_add(struct block_device *bdev, dev_t dev) { + if (bdev_stable_writes(bdev)) + mapping_set_stable_writes(bdev->bd_inode->i_mapping); bdev->bd_dev = dev; bdev->bd_inode->i_rdev = dev; bdev->bd_inode->i_ino = dev; From 53da7fe847d58e771ee9b01ea14e3a94ff60cd7d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 19 Nov 2023 18:56:17 -0500 Subject: [PATCH 0551/1397] libfs: getdents() should return 0 after reaching EOD [ Upstream commit 796432efab1e372d404e7a71cc6891a53f105051 ] The new directory offset helpers don't conform with the convention of getdents() returning no more entries once a directory file descriptor has reached the current end-of-directory. To address this, copy the logic from dcache_readdir() to mark the open directory file descriptor once EOD has been reached. Seeking resets the mark. Reported-by: Tavian Barnes Closes: https://lore.kernel.org/linux-fsdevel/20231113180616.2831430-1-tavianator@tavianator.com/ Fixes: 6faddda69f62 ("libfs: Add directory operations for stable offsets") Signed-off-by: Chuck Lever Link: https://lore.kernel.org/r/170043792492.4628.15646203084646716134.stgit@bazille.1015granger.net Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- fs/libfs.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/libfs.c b/fs/libfs.c index 37f2d34ee090..189447cf4acf 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -396,6 +396,8 @@ static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence) return -EINVAL; } + /* In this case, ->private_data is protected by f_pos_lock */ + file->private_data = NULL; return vfs_setpos(file, offset, U32_MAX); } @@ -425,7 +427,7 @@ static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry) inode->i_ino, fs_umode_to_dtype(inode->i_mode)); } -static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx) +static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx) { struct offset_ctx *so_ctx = inode->i_op->get_offset_ctx(inode); XA_STATE(xas, &so_ctx->xa, ctx->pos); @@ -434,7 +436,7 @@ static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx) while (true) { dentry = offset_find_next(&xas); if (!dentry) - break; + return ERR_PTR(-ENOENT); if (!offset_dir_emit(ctx, dentry)) { dput(dentry); @@ -444,6 +446,7 @@ static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx) dput(dentry); ctx->pos = xas.xa_index + 1; } + return NULL; } /** @@ -476,7 +479,12 @@ static int offset_readdir(struct file *file, struct dir_context *ctx) if (!dir_emit_dots(file, ctx)) return 0; - offset_iterate_dir(d_inode(dir), ctx); + /* In this case, ->private_data is protected by f_pos_lock */ + if (ctx->pos == 2) + file->private_data = NULL; + else if (file->private_data == ERR_PTR(-ENOENT)) + return 0; + file->private_data = offset_iterate_dir(d_inode(dir), ctx); return 0; } From 76a9a10178e47cec24378e00e8c2c161cb7e33f7 Mon Sep 17 00:00:00 2001 From: Jonas Karlman Date: Thu, 26 Oct 2023 19:14:58 +0000 Subject: [PATCH 0552/1397] drm/rockchip: vop: Fix color for RGB888/BGR888 format on VOP full [ Upstream commit bb0a05acd6121ff0e810b44fdc24dbdfaa46b642 ] Use of DRM_FORMAT_RGB888 and DRM_FORMAT_BGR888 on e.g. RK3288, RK3328 and RK3399 result in wrong colors being displayed. The issue can be observed using modetest: modetest -s @:1920x1080-60@RG24 modetest -s @:1920x1080-60@BG24 Vendor 4.4 kernel apply an inverted rb swap for these formats on VOP full framework (IP version 3.x) compared to VOP little framework (2.x). Fix colors by applying different rb swap for VOP full framework (3.x) and VOP little framework (2.x) similar to vendor 4.4 kernel. Fixes: 85a359f25388 ("drm/rockchip: Add BGR formats to VOP") Signed-off-by: Jonas Karlman Tested-by: Diederik de Haas Reviewed-by: Christopher Obbard Tested-by: Christopher Obbard Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20231026191500.2994225-1-jonas@kwiboo.se Signed-off-by: Sasha Levin --- drivers/gpu/drm/rockchip/rockchip_drm_vop.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c index 41cd12d5f2fa..4b338cb89d32 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c @@ -247,14 +247,22 @@ static inline void vop_cfg_done(struct vop *vop) VOP_REG_SET(vop, common, cfg_done, 1); } -static bool has_rb_swapped(uint32_t format) +static bool has_rb_swapped(uint32_t version, uint32_t format) { switch (format) { case DRM_FORMAT_XBGR8888: case DRM_FORMAT_ABGR8888: - case DRM_FORMAT_BGR888: case DRM_FORMAT_BGR565: return true; + /* + * full framework (IP version 3.x) only need rb swapped for RGB888 and + * little framework (IP version 2.x) only need rb swapped for BGR888, + * check for 3.x to also only rb swap BGR888 for unknown vop version + */ + case DRM_FORMAT_RGB888: + return VOP_MAJOR(version) == 3; + case DRM_FORMAT_BGR888: + return VOP_MAJOR(version) != 3; default: return false; } @@ -1013,7 +1021,7 @@ static void vop_plane_atomic_update(struct drm_plane *plane, VOP_WIN_SET(vop, win, dsp_info, dsp_info); VOP_WIN_SET(vop, win, dsp_st, dsp_st); - rb_swap = has_rb_swapped(fb->format->format); + rb_swap = has_rb_swapped(vop->data->version, fb->format->format); VOP_WIN_SET(vop, win, rb_swap, rb_swap); /* From 49036a88865eabfb9b05b6920a86f7726441f2c7 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 15 Nov 2023 11:47:51 -0500 Subject: [PATCH 0553/1397] PM: tools: Fix sleepgraph syntax error [ Upstream commit b85e2dab33ce467e8dcf1cb6c0c587132ff17f56 ] The sleepgraph tool currently fails: File "/usr/bin/sleepgraph", line 4155 or re.match('psci: CPU(?P[0-9]*) killed.*', msg)): ^ SyntaxError: unmatched ')' Fixes: 34ea427e01ea ("PM: tools: sleepgraph: Recognize "CPU killed" messages") Signed-off-by: David Woodhouse Reviewed-by: Wolfram Sang Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- tools/power/pm-graph/sleepgraph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py index 4a356a706785..40ad221e8881 100755 --- a/tools/power/pm-graph/sleepgraph.py +++ b/tools/power/pm-graph/sleepgraph.py @@ -4151,7 +4151,7 @@ def parseKernelLog(data): elif(re.match('Enabling non-boot CPUs .*', msg)): # start of first cpu resume cpu_start = ktime - elif(re.match('smpboot: CPU (?P[0-9]*) is now offline', msg)) \ + elif(re.match('smpboot: CPU (?P[0-9]*) is now offline', msg) \ or re.match('psci: CPU(?P[0-9]*) killed.*', msg)): # end of a cpu suspend, start of the next m = re.match('smpboot: CPU (?P[0-9]*) is now offline', msg) From 95f068b0fd3668672e34ebeb8416f57aef27058b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 14 Nov 2023 01:42:13 +0100 Subject: [PATCH 0554/1397] net, vrf: Move dstats structure to core [ Upstream commit 79e0c5be8c73a674c92bd4ba77b75f4f8c91d32e ] Just move struct pcpu_dstats out of the vrf into the core, and streamline the field names slightly, so they better align with the {t,l}stats ones. No functional change otherwise. A conversion of the u64s to u64_stats_t could be done at a separate point in future. This move is needed as we are moving the {t,l,d}stats allocation/freeing to the core. Signed-off-by: Daniel Borkmann Reviewed-by: Nikolay Aleksandrov Cc: Jakub Kicinski Cc: David Ahern Link: https://lore.kernel.org/r/20231114004220.6495-2-daniel@iogearbox.net Signed-off-by: Martin KaFai Lau Stable-dep-of: 024ee930cb3c ("bpf: Fix dev's rx stats for bpf_redirect_peer traffic") Signed-off-by: Sasha Levin --- drivers/net/vrf.c | 24 +++++++----------------- include/linux/netdevice.h | 10 ++++++++++ 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index a3408e4e1491..3654c8b34402 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -121,22 +121,12 @@ struct net_vrf { int ifindex; }; -struct pcpu_dstats { - u64 tx_pkts; - u64 tx_bytes; - u64 tx_drps; - u64 rx_pkts; - u64 rx_bytes; - u64 rx_drps; - struct u64_stats_sync syncp; -}; - static void vrf_rx_stats(struct net_device *dev, int len) { struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); u64_stats_update_begin(&dstats->syncp); - dstats->rx_pkts++; + dstats->rx_packets++; dstats->rx_bytes += len; u64_stats_update_end(&dstats->syncp); } @@ -161,10 +151,10 @@ static void vrf_get_stats64(struct net_device *dev, do { start = u64_stats_fetch_begin(&dstats->syncp); tbytes = dstats->tx_bytes; - tpkts = dstats->tx_pkts; - tdrops = dstats->tx_drps; + tpkts = dstats->tx_packets; + tdrops = dstats->tx_drops; rbytes = dstats->rx_bytes; - rpkts = dstats->rx_pkts; + rpkts = dstats->rx_packets; } while (u64_stats_fetch_retry(&dstats->syncp, start)); stats->tx_bytes += tbytes; stats->tx_packets += tpkts; @@ -421,7 +411,7 @@ static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev, if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) vrf_rx_stats(dev, len); else - this_cpu_inc(dev->dstats->rx_drps); + this_cpu_inc(dev->dstats->rx_drops); return NETDEV_TX_OK; } @@ -616,11 +606,11 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev) struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); u64_stats_update_begin(&dstats->syncp); - dstats->tx_pkts++; + dstats->tx_packets++; dstats->tx_bytes += len; u64_stats_update_end(&dstats->syncp); } else { - this_cpu_inc(dev->dstats->tx_drps); + this_cpu_inc(dev->dstats->tx_drops); } return ret; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b646609f09c0..b76dc6fa4e77 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2725,6 +2725,16 @@ struct pcpu_sw_netstats { struct u64_stats_sync syncp; } __aligned(4 * sizeof(u64)); +struct pcpu_dstats { + u64 rx_packets; + u64 rx_bytes; + u64 rx_drops; + u64 tx_packets; + u64 tx_bytes; + u64 tx_drops; + struct u64_stats_sync syncp; +} __aligned(8 * sizeof(u64)); + struct pcpu_lstats { u64_stats_t packets; u64_stats_t bytes; From 6ae7b3fc7ae8a8e9205043b3d966488d0a7cbb0a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 14 Nov 2023 01:42:14 +0100 Subject: [PATCH 0555/1397] net: Move {l,t,d}stats allocation to core and convert veth & vrf [ Upstream commit 34d21de99cea9cb17967874313e5b0262527833c ] Move {l,t,d}stats allocation to the core and let netdevs pick the stats type they need. That way the driver doesn't have to bother with error handling (allocation failure checking, making sure free happens in the right spot, etc) - all happening in the core. Co-developed-by: Jakub Kicinski Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann Reviewed-by: Nikolay Aleksandrov Cc: David Ahern Link: https://lore.kernel.org/r/20231114004220.6495-3-daniel@iogearbox.net Signed-off-by: Martin KaFai Lau Stable-dep-of: 024ee930cb3c ("bpf: Fix dev's rx stats for bpf_redirect_peer traffic") Signed-off-by: Sasha Levin --- drivers/net/veth.c | 16 ++----------- drivers/net/vrf.c | 14 +++-------- include/linux/netdevice.h | 20 ++++++++++++---- net/core/dev.c | 49 ++++++++++++++++++++++++++++++++++++++- 4 files changed, 69 insertions(+), 30 deletions(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 0deefd1573cf..0ee5d1e0759f 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -1499,25 +1499,12 @@ static void veth_free_queues(struct net_device *dev) static int veth_dev_init(struct net_device *dev) { - int err; - - dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats); - if (!dev->lstats) - return -ENOMEM; - - err = veth_alloc_queues(dev); - if (err) { - free_percpu(dev->lstats); - return err; - } - - return 0; + return veth_alloc_queues(dev); } static void veth_dev_free(struct net_device *dev) { veth_free_queues(dev); - free_percpu(dev->lstats); } #ifdef CONFIG_NET_POLL_CONTROLLER @@ -1789,6 +1776,7 @@ static void veth_setup(struct net_device *dev) NETIF_F_HW_VLAN_STAG_RX); dev->needs_free_netdev = true; dev->priv_destructor = veth_dev_free; + dev->pcpu_stat_type = NETDEV_PCPU_STAT_LSTATS; dev->max_mtu = ETH_MAX_MTU; dev->hw_features = VETH_FEATURES; diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 3654c8b34402..b90dccdc2d33 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1164,22 +1164,15 @@ static void vrf_dev_uninit(struct net_device *dev) vrf_rtable_release(dev, vrf); vrf_rt6_release(dev, vrf); - - free_percpu(dev->dstats); - dev->dstats = NULL; } static int vrf_dev_init(struct net_device *dev) { struct net_vrf *vrf = netdev_priv(dev); - dev->dstats = netdev_alloc_pcpu_stats(struct pcpu_dstats); - if (!dev->dstats) - goto out_nomem; - /* create the default dst which points back to us */ if (vrf_rtable_create(dev) != 0) - goto out_stats; + goto out_nomem; if (vrf_rt6_create(dev) != 0) goto out_rth; @@ -1193,9 +1186,6 @@ static int vrf_dev_init(struct net_device *dev) out_rth: vrf_rtable_release(dev, vrf); -out_stats: - free_percpu(dev->dstats); - dev->dstats = NULL; out_nomem: return -ENOMEM; } @@ -1694,6 +1684,8 @@ static void vrf_setup(struct net_device *dev) dev->min_mtu = IPV6_MIN_MTU; dev->max_mtu = IP6_MAX_MTU; dev->mtu = dev->max_mtu; + + dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS; } static int vrf_validate(struct nlattr *tb[], struct nlattr *data[], diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b76dc6fa4e77..b8e60a20416b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1774,6 +1774,13 @@ enum netdev_ml_priv_type { ML_PRIV_CAN, }; +enum netdev_stat_type { + NETDEV_PCPU_STAT_NONE, + NETDEV_PCPU_STAT_LSTATS, /* struct pcpu_lstats */ + NETDEV_PCPU_STAT_TSTATS, /* struct pcpu_sw_netstats */ + NETDEV_PCPU_STAT_DSTATS, /* struct pcpu_dstats */ +}; + /** * struct net_device - The DEVICE structure. * @@ -1968,10 +1975,14 @@ enum netdev_ml_priv_type { * * @ml_priv: Mid-layer private * @ml_priv_type: Mid-layer private type - * @lstats: Loopback statistics - * @tstats: Tunnel statistics - * @dstats: Dummy statistics - * @vstats: Virtual ethernet statistics + * + * @pcpu_stat_type: Type of device statistics which the core should + * allocate/free: none, lstats, tstats, dstats. none + * means the driver is handling statistics allocation/ + * freeing internally. + * @lstats: Loopback statistics: packets, bytes + * @tstats: Tunnel statistics: RX/TX packets, RX/TX bytes + * @dstats: Dummy statistics: RX/TX/drop packets, RX/TX bytes * * @garp_port: GARP * @mrp_port: MRP @@ -2328,6 +2339,7 @@ struct net_device { void *ml_priv; enum netdev_ml_priv_type ml_priv_type; + enum netdev_stat_type pcpu_stat_type:8; union { struct pcpu_lstats __percpu *lstats; struct pcpu_sw_netstats __percpu *tstats; diff --git a/net/core/dev.c b/net/core/dev.c index 9f3f8930c691..37444c8e2205 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10050,6 +10050,46 @@ void netif_tx_stop_all_queues(struct net_device *dev) } EXPORT_SYMBOL(netif_tx_stop_all_queues); +static int netdev_do_alloc_pcpu_stats(struct net_device *dev) +{ + void __percpu *v; + + switch (dev->pcpu_stat_type) { + case NETDEV_PCPU_STAT_NONE: + return 0; + case NETDEV_PCPU_STAT_LSTATS: + v = dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats); + break; + case NETDEV_PCPU_STAT_TSTATS: + v = dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + break; + case NETDEV_PCPU_STAT_DSTATS: + v = dev->dstats = netdev_alloc_pcpu_stats(struct pcpu_dstats); + break; + default: + return -EINVAL; + } + + return v ? 0 : -ENOMEM; +} + +static void netdev_do_free_pcpu_stats(struct net_device *dev) +{ + switch (dev->pcpu_stat_type) { + case NETDEV_PCPU_STAT_NONE: + return; + case NETDEV_PCPU_STAT_LSTATS: + free_percpu(dev->lstats); + break; + case NETDEV_PCPU_STAT_TSTATS: + free_percpu(dev->tstats); + break; + case NETDEV_PCPU_STAT_DSTATS: + free_percpu(dev->dstats); + break; + } +} + /** * register_netdevice() - register a network device * @dev: device to register @@ -10110,9 +10150,13 @@ int register_netdevice(struct net_device *dev) goto err_uninit; } + ret = netdev_do_alloc_pcpu_stats(dev); + if (ret) + goto err_uninit; + ret = dev_index_reserve(net, dev->ifindex); if (ret < 0) - goto err_uninit; + goto err_free_pcpu; dev->ifindex = ret; /* Transfer changeable features to wanted_features and enable @@ -10218,6 +10262,8 @@ int register_netdevice(struct net_device *dev) call_netdevice_notifiers(NETDEV_PRE_UNINIT, dev); err_ifindex_release: dev_index_release(net, dev->ifindex); +err_free_pcpu: + netdev_do_free_pcpu_stats(dev); err_uninit: if (dev->netdev_ops->ndo_uninit) dev->netdev_ops->ndo_uninit(dev); @@ -10470,6 +10516,7 @@ void netdev_run_todo(void) WARN_ON(rcu_access_pointer(dev->ip_ptr)); WARN_ON(rcu_access_pointer(dev->ip6_ptr)); + netdev_do_free_pcpu_stats(dev); if (dev->priv_destructor) dev->priv_destructor(dev); if (dev->needs_free_netdev) From 959f301635dc7f4b78e1918b4d1838a51e7e51ff Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Tue, 14 Nov 2023 01:42:17 +0100 Subject: [PATCH 0556/1397] bpf: Fix dev's rx stats for bpf_redirect_peer traffic [ Upstream commit 024ee930cb3c9ae49e4266aee89cfde0ebb407e1 ] Traffic redirected by bpf_redirect_peer() (used by recent CNIs like Cilium) is not accounted for in the RX stats of supported devices (that is, veth and netkit), confusing user space metrics collectors such as cAdvisor [0], as reported by Youlun. Fix it by calling dev_sw_netstats_rx_add() in skb_do_redirect(), to update RX traffic counters. Devices that support ndo_get_peer_dev _must_ use the @tstats per-CPU counters (instead of @lstats, or @dstats). To make this more fool-proof, error out when ndo_get_peer_dev is set but @tstats are not selected. [0] Specifically, the "container_network_receive_{byte,packet}s_total" counters are affected. Fixes: 9aa1206e8f48 ("bpf: Add redirect_peer helper") Reported-by: Youlun Zhang Signed-off-by: Peilin Ye Co-developed-by: Daniel Borkmann Signed-off-by: Daniel Borkmann Reviewed-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20231114004220.6495-6-daniel@iogearbox.net Signed-off-by: Martin KaFai Lau Signed-off-by: Sasha Levin --- net/core/dev.c | 8 ++++++++ net/core/filter.c | 1 + 2 files changed, 9 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index 37444c8e2205..9bf90b2a75b6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10054,6 +10054,14 @@ static int netdev_do_alloc_pcpu_stats(struct net_device *dev) { void __percpu *v; + /* Drivers implementing ndo_get_peer_dev must support tstat + * accounting, so that skb_do_redirect() can bump the dev's + * RX stats upon network namespace switch. + */ + if (dev->netdev_ops->ndo_get_peer_dev && + dev->pcpu_stat_type != NETDEV_PCPU_STAT_TSTATS) + return -EOPNOTSUPP; + switch (dev->pcpu_stat_type) { case NETDEV_PCPU_STAT_NONE: return 0; diff --git a/net/core/filter.c b/net/core/filter.c index a094694899c9..b149a165c405 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2489,6 +2489,7 @@ int skb_do_redirect(struct sk_buff *skb) net_eq(net, dev_net(dev)))) goto out_drop; skb->dev = dev; + dev_sw_netstats_rx_add(dev, skb->len); return -EAGAIN; } return flags & BPF_F_NEIGH ? From e9fd34ab6ea2c2ab5b6eb326e507ff34dcf33d60 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Fri, 20 Oct 2023 12:45:00 +0200 Subject: [PATCH 0557/1397] accel/ivpu: Do not initialize parameters on power up [ Upstream commit f956bf2080862cfc97412e1eaa08689bc9838d20 ] Initialize HW specific parameters only once. We do not have to do this on every power_up (performed during initialization and on resume). Move corresponding code to ->info_init() Reviewed-by: Jeffrey Hugo Signed-off-by: Stanislaw Gruszka Link: https://patchwork.freedesktop.org/patch/msgid/20231020104501.697763-6-stanislaw.gruszka@linux.intel.com Stable-dep-of: 3f7c0634926d ("accel/ivpu/37xx: Fix hangs related to MMIO reset") Signed-off-by: Sasha Levin --- drivers/accel/ivpu/ivpu_hw_37xx.c | 8 ++++---- drivers/accel/ivpu/ivpu_hw_40xx.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_hw_37xx.c b/drivers/accel/ivpu/ivpu_hw_37xx.c index 18be8b98e9a8..cb9f0196e3dd 100644 --- a/drivers/accel/ivpu/ivpu_hw_37xx.c +++ b/drivers/accel/ivpu/ivpu_hw_37xx.c @@ -625,6 +625,10 @@ static int ivpu_hw_37xx_info_init(struct ivpu_device *vdev) ivpu_hw_init_range(&hw->ranges.shave, 0x180000000, SZ_2G); ivpu_hw_init_range(&hw->ranges.dma, 0x200000000, SZ_8G); + ivpu_hw_read_platform(vdev); + ivpu_hw_wa_init(vdev); + ivpu_hw_timeouts_init(vdev); + return 0; } @@ -681,10 +685,6 @@ static int ivpu_hw_37xx_power_up(struct ivpu_device *vdev) { int ret; - ivpu_hw_read_platform(vdev); - ivpu_hw_wa_init(vdev); - ivpu_hw_timeouts_init(vdev); - ret = ivpu_hw_37xx_reset(vdev); if (ret) ivpu_warn(vdev, "Failed to reset HW: %d\n", ret); diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c index 85171a408363..7c3ff25232a2 100644 --- a/drivers/accel/ivpu/ivpu_hw_40xx.c +++ b/drivers/accel/ivpu/ivpu_hw_40xx.c @@ -728,6 +728,10 @@ static int ivpu_hw_40xx_info_init(struct ivpu_device *vdev) ivpu_hw_init_range(&vdev->hw->ranges.shave, 0x80000000 + SZ_256M, SZ_2G - SZ_256M); ivpu_hw_init_range(&vdev->hw->ranges.dma, 0x200000000, SZ_8G); + ivpu_hw_read_platform(vdev); + ivpu_hw_wa_init(vdev); + ivpu_hw_timeouts_init(vdev); + return 0; } @@ -819,10 +823,6 @@ static int ivpu_hw_40xx_power_up(struct ivpu_device *vdev) return ret; } - ivpu_hw_read_platform(vdev); - ivpu_hw_wa_init(vdev); - ivpu_hw_timeouts_init(vdev); - ret = ivpu_hw_40xx_d0i3_disable(vdev); if (ret) ivpu_warn(vdev, "Failed to disable D0I3: %d\n", ret); From 790c480d35dc98b9a877240654d43379fbf5c30d Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Wed, 15 Nov 2023 12:10:04 +0100 Subject: [PATCH 0558/1397] accel/ivpu/37xx: Fix hangs related to MMIO reset [ Upstream commit 3f7c0634926daf48cd2f6db6c1197a1047074088 ] There is no need to call MMIO reset using VPU_37XX_BUTTRESS_VPU_IP_RESET register. IP will be reset by FLR or by entering d0i3. Also IP reset during power_up is not needed as the VPU is already in reset. Removing MMIO reset improves stability as it a partial device reset that is not safe in some corner cases. This change also brings back ivpu_boot_pwr_domain_disable() that helps to properly power down VPU when it is hung by a buggy workload. Signed-off-by: Jacek Lawrynowicz Fixes: 828d63042aec ("accel/ivpu: Don't enter d0i3 during FLR") Reviewed-by: Jeffrey Hugo Link: https://patchwork.freedesktop.org/patch/msgid/20231115111004.1304092-1-jacek.lawrynowicz@linux.intel.com Signed-off-by: Sasha Levin --- drivers/accel/ivpu/ivpu_hw_37xx.c | 46 +++++++++++++++---------------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_hw_37xx.c b/drivers/accel/ivpu/ivpu_hw_37xx.c index cb9f0196e3dd..b8010c07eec1 100644 --- a/drivers/accel/ivpu/ivpu_hw_37xx.c +++ b/drivers/accel/ivpu/ivpu_hw_37xx.c @@ -536,6 +536,16 @@ static int ivpu_boot_pwr_domain_enable(struct ivpu_device *vdev) return ret; } +static int ivpu_boot_pwr_domain_disable(struct ivpu_device *vdev) +{ + ivpu_boot_dpu_active_drive(vdev, false); + ivpu_boot_pwr_island_isolation_drive(vdev, true); + ivpu_boot_pwr_island_trickle_drive(vdev, false); + ivpu_boot_pwr_island_drive(vdev, false); + + return ivpu_boot_wait_for_pwr_island_status(vdev, 0x0); +} + static void ivpu_boot_no_snoop_enable(struct ivpu_device *vdev) { u32 val = REGV_RD32(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES); @@ -634,25 +644,17 @@ static int ivpu_hw_37xx_info_init(struct ivpu_device *vdev) static int ivpu_hw_37xx_reset(struct ivpu_device *vdev) { - int ret; - u32 val; - - if (IVPU_WA(punit_disabled)) - return 0; + int ret = 0; - ret = REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US); - if (ret) { - ivpu_err(vdev, "Timed out waiting for TRIGGER bit\n"); - return ret; + if (ivpu_boot_pwr_domain_disable(vdev)) { + ivpu_err(vdev, "Failed to disable power domain\n"); + ret = -EIO; } - val = REGB_RD32(VPU_37XX_BUTTRESS_VPU_IP_RESET); - val = REG_SET_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, val); - REGB_WR32(VPU_37XX_BUTTRESS_VPU_IP_RESET, val); - - ret = REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US); - if (ret) - ivpu_err(vdev, "Timed out waiting for RESET completion\n"); + if (ivpu_pll_disable(vdev)) { + ivpu_err(vdev, "Failed to disable PLL\n"); + ret = -EIO; + } return ret; } @@ -685,10 +687,6 @@ static int ivpu_hw_37xx_power_up(struct ivpu_device *vdev) { int ret; - ret = ivpu_hw_37xx_reset(vdev); - if (ret) - ivpu_warn(vdev, "Failed to reset HW: %d\n", ret); - ret = ivpu_hw_37xx_d0i3_disable(vdev); if (ret) ivpu_warn(vdev, "Failed to disable D0I3: %d\n", ret); @@ -756,11 +754,11 @@ static int ivpu_hw_37xx_power_down(struct ivpu_device *vdev) { int ret = 0; - if (!ivpu_hw_37xx_is_idle(vdev) && ivpu_hw_37xx_reset(vdev)) - ivpu_err(vdev, "Failed to reset the VPU\n"); + if (!ivpu_hw_37xx_is_idle(vdev)) + ivpu_warn(vdev, "VPU not idle during power down\n"); - if (ivpu_pll_disable(vdev)) { - ivpu_err(vdev, "Failed to disable PLL\n"); + if (ivpu_hw_37xx_reset(vdev)) { + ivpu_err(vdev, "Failed to reset VPU\n"); ret = -EIO; } From 6642322aa83975e37799d6c60c2155d12f876be1 Mon Sep 17 00:00:00 2001 From: Charles Yi Date: Tue, 31 Oct 2023 12:32:39 +0800 Subject: [PATCH 0559/1397] HID: fix HID device resource race between HID core and debugging support [ Upstream commit fc43e9c857b7aa55efba9398419b14d9e35dcc7d ] hid_debug_events_release releases resources bound to the HID device instance. hid_device_release releases the underlying HID device instance potentially before hid_debug_events_release has completed releasing debug resources bound to the same HID device instance. Reference count to prevent the HID device instance from being torn down preemptively when HID debugging support is used. When count reaches zero, release core resources of HID device instance using hiddev_free. The crash: [ 120.728477][ T4396] kernel BUG at lib/list_debug.c:53! [ 120.728505][ T4396] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP [ 120.739806][ T4396] Modules linked in: bcmdhd dhd_static_buf 8822cu pcie_mhi r8168 [ 120.747386][ T4396] CPU: 1 PID: 4396 Comm: hidt_bridge Not tainted 5.10.110 #257 [ 120.754771][ T4396] Hardware name: Rockchip RK3588 EVB4 LP4 V10 Board (DT) [ 120.761643][ T4396] pstate: 60400089 (nZCv daIf +PAN -UAO -TCO BTYPE=--) [ 120.768338][ T4396] pc : __list_del_entry_valid+0x98/0xac [ 120.773730][ T4396] lr : __list_del_entry_valid+0x98/0xac [ 120.779120][ T4396] sp : ffffffc01e62bb60 [ 120.783126][ T4396] x29: ffffffc01e62bb60 x28: ffffff818ce3a200 [ 120.789126][ T4396] x27: 0000000000000009 x26: 0000000000980000 [ 120.795126][ T4396] x25: ffffffc012431000 x24: ffffff802c6d4e00 [ 120.801125][ T4396] x23: ffffff8005c66f00 x22: ffffffc01183b5b8 [ 120.807125][ T4396] x21: ffffff819df2f100 x20: 0000000000000000 [ 120.813124][ T4396] x19: ffffff802c3f0700 x18: ffffffc01d2cd058 [ 120.819124][ T4396] x17: 0000000000000000 x16: 0000000000000000 [ 120.825124][ T4396] x15: 0000000000000004 x14: 0000000000003fff [ 120.831123][ T4396] x13: ffffffc012085588 x12: 0000000000000003 [ 120.837123][ T4396] x11: 00000000ffffbfff x10: 0000000000000003 [ 120.843123][ T4396] x9 : 455103d46b329300 x8 : 455103d46b329300 [ 120.849124][ T4396] x7 : 74707572726f6320 x6 : ffffffc0124b8cb5 [ 120.855124][ T4396] x5 : ffffffffffffffff x4 : 0000000000000000 [ 120.861123][ T4396] x3 : ffffffc011cf4f90 x2 : ffffff81fee7b948 [ 120.867122][ T4396] x1 : ffffffc011cf4f90 x0 : 0000000000000054 [ 120.873122][ T4396] Call trace: [ 120.876259][ T4396] __list_del_entry_valid+0x98/0xac [ 120.881304][ T4396] hid_debug_events_release+0x48/0x12c [ 120.886617][ T4396] full_proxy_release+0x50/0xbc [ 120.891323][ T4396] __fput+0xdc/0x238 [ 120.895075][ T4396] ____fput+0x14/0x24 [ 120.898911][ T4396] task_work_run+0x90/0x148 [ 120.903268][ T4396] do_exit+0x1bc/0x8a4 [ 120.907193][ T4396] do_group_exit+0x8c/0xa4 [ 120.911458][ T4396] get_signal+0x468/0x744 [ 120.915643][ T4396] do_signal+0x84/0x280 [ 120.919650][ T4396] do_notify_resume+0xd0/0x218 [ 120.924262][ T4396] work_pending+0xc/0x3f0 [ Rahul Rameshbabu : rework changelog ] Fixes: cd667ce24796 ("HID: use debugfs for events/reports dumping") Signed-off-by: Charles Yi Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-core.c | 12 ++++++++++-- drivers/hid/hid-debug.c | 3 +++ include/linux/hid.h | 3 +++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 8992e3c1e769..e0181218ad85 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -702,15 +702,22 @@ static void hid_close_report(struct hid_device *device) * Free a device structure, all reports, and all fields. */ -static void hid_device_release(struct device *dev) +void hiddev_free(struct kref *ref) { - struct hid_device *hid = to_hid_device(dev); + struct hid_device *hid = container_of(ref, struct hid_device, ref); hid_close_report(hid); kfree(hid->dev_rdesc); kfree(hid); } +static void hid_device_release(struct device *dev) +{ + struct hid_device *hid = to_hid_device(dev); + + kref_put(&hid->ref, hiddev_free); +} + /* * Fetch a report description item from the data stream. We support long * items, though they are not used yet. @@ -2846,6 +2853,7 @@ struct hid_device *hid_allocate_device(void) spin_lock_init(&hdev->debug_list_lock); sema_init(&hdev->driver_input_lock, 1); mutex_init(&hdev->ll_open_lock); + kref_init(&hdev->ref); hid_bpf_device_init(hdev); diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c index e7ef1ea107c9..7dd83ec74f8a 100644 --- a/drivers/hid/hid-debug.c +++ b/drivers/hid/hid-debug.c @@ -1135,6 +1135,7 @@ static int hid_debug_events_open(struct inode *inode, struct file *file) goto out; } list->hdev = (struct hid_device *) inode->i_private; + kref_get(&list->hdev->ref); file->private_data = list; mutex_init(&list->read_mutex); @@ -1227,6 +1228,8 @@ static int hid_debug_events_release(struct inode *inode, struct file *file) list_del(&list->node); spin_unlock_irqrestore(&list->hdev->debug_list_lock, flags); kfifo_free(&list->hid_debug_fifo); + + kref_put(&list->hdev->ref, hiddev_free); kfree(list); return 0; diff --git a/include/linux/hid.h b/include/linux/hid.h index 964ca1f15e3f..3b08a2957229 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -679,6 +679,7 @@ struct hid_device { /* device report descriptor */ struct list_head debug_list; spinlock_t debug_list_lock; wait_queue_head_t debug_wait; + struct kref ref; unsigned int id; /* system unique id */ @@ -687,6 +688,8 @@ struct hid_device { /* device report descriptor */ #endif /* CONFIG_BPF */ }; +void hiddev_free(struct kref *ref); + #define to_hid_device(pdev) \ container_of(pdev, struct hid_device, dev) From 79b5228e9511a40ce722c2c75b9fdb48c6eae7ce Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Sun, 19 Nov 2023 22:17:59 +0800 Subject: [PATCH 0560/1397] ipv4: Correct/silence an endian warning in __ip_do_redirect [ Upstream commit c0e2926266af3b5acf28df0a8fc6e4d90effe0bb ] net/ipv4/route.c:783:46: warning: incorrect type in argument 2 (different base types) net/ipv4/route.c:783:46: expected unsigned int [usertype] key net/ipv4/route.c:783:46: got restricted __be32 [usertype] new_gw Fixes: 969447f226b4 ("ipv4: use new_gw for redirect neigh lookup") Suggested-by: Eric Dumazet Signed-off-by: Kunwu Chan Link: https://lore.kernel.org/r/20231119141759.420477-1-chentao@kylinos.cn Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/ipv4/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b214b5a2e045..3bad9aa066db 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -780,7 +780,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow goto reject_redirect; } - n = __ipv4_neigh_lookup(rt->dst.dev, new_gw); + n = __ipv4_neigh_lookup(rt->dst.dev, (__force u32)new_gw); if (!n) n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev); if (!IS_ERR(n)) { From d75f7c1bd8fd0144b766239838e250e5aee9e375 Mon Sep 17 00:00:00 2001 From: Cong Yang Date: Mon, 20 Nov 2023 10:01:09 +0800 Subject: [PATCH 0561/1397] drm/panel: boe-tv101wum-nl6: Fine tune Himax83102-j02 panel HFP and HBP [ Upstream commit cea7008190ad65b4aaae6e94667a358d2c10a696 ] The refresh reported by modetest is 60.46Hz, and the actual measurement is 60.01Hz, which is outside the expected tolerance. Adjust hporch and pixel clock to fix it. After repair, modetest and actual measurement were all 60.01Hz. Modetest refresh = Pixel CLK/ htotal* vtotal, but measurement frame rate is HS->LP cycle time(Vblanking). Measured frame rate is not only affecte by Htotal/Vtotal/pixel clock, also affected by Lane-num/PixelBit/LineTime /DSI CLK. Assume that the DSI controller could not make the mode that we requested(presumably it's PLL couldn't generate the exact pixel clock?). If you use a different DSI controller, you may need to readjust these parameters. Now this panel looks like it's only used by me on the MTK platform, so let's change this set of parameters. Fixes: 1bc2ef065f13 ("drm/panel: Support for Starry-himax83102-j02 TDDI MIPI-DSI panel") Signed-off-by: Cong Yang Reviewed-by: Douglas Anderson Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20231120020109.3216343-1-yangcong5@huaqin.corp-partner.google.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c index 980b10244d4e..d76a8ca9c40f 100644 --- a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c +++ b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c @@ -2107,11 +2107,11 @@ static const struct panel_desc starry_qfh032011_53g_desc = { }; static const struct drm_display_mode starry_himax83102_j02_default_mode = { - .clock = 161600, + .clock = 162850, .hdisplay = 1200, - .hsync_start = 1200 + 40, - .hsync_end = 1200 + 40 + 20, - .htotal = 1200 + 40 + 20 + 40, + .hsync_start = 1200 + 50, + .hsync_end = 1200 + 50 + 20, + .htotal = 1200 + 50 + 20 + 50, .vdisplay = 1920, .vsync_start = 1920 + 116, .vsync_end = 1920 + 116 + 8, From 41eccb6e1c1d568d2ee108a12beef3399e55f7ea Mon Sep 17 00:00:00 2001 From: Jose Ignacio Tornos Martinez Date: Mon, 20 Nov 2023 13:06:29 +0100 Subject: [PATCH 0562/1397] net: usb: ax88179_178a: fix failed operations during ax88179_reset [ Upstream commit 0739af07d1d947af27c877f797cb82ceee702515 ] Using generic ASIX Electronics Corp. AX88179 Gigabit Ethernet device, the following test cycle has been implemented: - power on - check logs - shutdown - after detecting the system shutdown, disconnect power - after approximately 60 seconds of sleep, power is restored Running some cycles, sometimes error logs like this appear: kernel: ax88179_178a 2-9:1.0 (unnamed net_device) (uninitialized): Failed to write reg index 0x0001: -19 kernel: ax88179_178a 2-9:1.0 (unnamed net_device) (uninitialized): Failed to read reg index 0x0001: -19 ... These failed operation are happening during ax88179_reset execution, so the initialization could not be correct. In order to avoid this, we need to increase the delay after reset and clock initial operations. By using these larger values, many cycles have been run and no failed operations appear. It would be better to check some status register to verify when the operation has finished, but I do not have found any available information (neither in the public datasheets nor in the manufacturer's driver). The only available information for the necessary delays is the maufacturer's driver (original values) but the proposed values are not enough for the tested devices. Fixes: e2ca90c276e1f ("ax88179_178a: ASIX AX88179_178A USB 3.0/2.0 to gigabit ethernet adapter driver") Reported-by: Herb Wei Tested-by: Herb Wei Signed-off-by: Jose Ignacio Tornos Martinez Link: https://lore.kernel.org/r/20231120120642.54334-1-jtornosm@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/usb/ax88179_178a.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index aff39bf3161d..4ea0e155bb0d 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1583,11 +1583,11 @@ static int ax88179_reset(struct usbnet *dev) *tmp16 = AX_PHYPWR_RSTCTL_IPRL; ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_PHYPWR_RSTCTL, 2, 2, tmp16); - msleep(200); + msleep(500); *tmp = AX_CLK_SELECT_ACS | AX_CLK_SELECT_BCS; ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_CLK_SELECT, 1, 1, tmp); - msleep(100); + msleep(200); /* Ethernet PHY Auto Detach*/ ax88179_auto_detach(dev); From 7234d2b5dffa5af77fd4e0deaebab509e130c6b1 Mon Sep 17 00:00:00 2001 From: "D. Wythe" Date: Wed, 22 Nov 2023 10:37:05 +0800 Subject: [PATCH 0563/1397] net/smc: avoid data corruption caused by decline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e6d71b437abc2f249e3b6a1ae1a7228e09c6e563 ] We found a data corruption issue during testing of SMC-R on Redis applications. The benchmark has a low probability of reporting a strange error as shown below. "Error: Protocol error, got "\xe2" as reply type byte" Finally, we found that the retrieved error data was as follows: 0xE2 0xD4 0xC3 0xD9 0x04 0x00 0x2C 0x20 0xA6 0x56 0x00 0x16 0x3E 0x0C 0xCB 0x04 0x02 0x01 0x00 0x00 0x20 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0xE2 It is quite obvious that this is a SMC DECLINE message, which means that the applications received SMC protocol message. We found that this was caused by the following situations: client server ¦ clc proposal -------------> ¦ clc accept <------------- ¦ clc confirm -------------> wait llc confirm send llc confirm ¦failed llc confirm ¦ x------ (after 2s)timeout wait llc confirm rsp wait decline (after 1s) timeout (after 2s) timeout ¦ decline --------------> ¦ decline <-------------- As a result, a decline message was sent in the implementation, and this message was read from TCP by the already-fallback connection. This patch double the client timeout as 2x of the server value, With this simple change, the Decline messages should never cross or collide (during Confirm link timeout). This issue requires an immediate solution, since the protocol updates involve a more long-term solution. Fixes: 0fb0b02bd6fd ("net/smc: adapt SMC client code to use the LLC flow") Signed-off-by: D. Wythe Reviewed-by: Wen Gu Reviewed-by: Wenjia Zhang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/smc/af_smc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 4c047e0e1625..741339ac9483 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -598,8 +598,12 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc) struct smc_llc_qentry *qentry; int rc; - /* receive CONFIRM LINK request from server over RoCE fabric */ - qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME, + /* Receive CONFIRM LINK request from server over RoCE fabric. + * Increasing the client's timeout by twice as much as the server's + * timeout by default can temporarily avoid decline messages of + * both sides crossing or colliding + */ + qentry = smc_llc_wait(link->lgr, NULL, 2 * SMC_LLC_WAIT_TIME, SMC_LLC_CONFIRM_LINK); if (!qentry) { struct smc_clc_msg_decline dclc; From 322b3f6c90d4a4d0c0db0bd6169175fb098c23ed Mon Sep 17 00:00:00 2001 From: Mikhail Zaslonko Date: Wed, 8 Nov 2023 18:18:52 +0100 Subject: [PATCH 0564/1397] s390/ipl: add missing IPL_TYPE_ECKD_DUMP case to ipl_init() [ Upstream commit 673752a839694133a328610fcbc54f3d59ae87f3 ] Add missing IPL_TYPE_ECKD_DUMP case to ipl_init() creating ECKD ipl device attribute group similar to IPL_TYPE_ECKD case. Commit e2d2a2968f2a ("s390/ipl: add eckd dump support") should have had it from the beginning. Fixes: e2d2a2968f2a ("s390/ipl: add eckd dump support") Signed-off-by: Mikhail Zaslonko Reviewed-by: Sven Schnelle Signed-off-by: Alexander Gordeev Signed-off-by: Sasha Levin --- arch/s390/kernel/ipl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 05e51666db03..8d0b95c17312 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -666,6 +666,7 @@ static int __init ipl_init(void) &ipl_ccw_attr_group_lpar); break; case IPL_TYPE_ECKD: + case IPL_TYPE_ECKD_DUMP: rc = sysfs_create_group(&ipl_kset->kobj, &ipl_eckd_attr_group); break; case IPL_TYPE_FCP: From 1bf36832f878260b1f5bf1b1405a7b7c57de4daf Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 17 Nov 2023 13:14:22 +0000 Subject: [PATCH 0565/1397] arm64: mm: Fix "rodata=on" when CONFIG_RODATA_FULL_DEFAULT_ENABLED=y [ Upstream commit acfa60dbe03802d6afd28401aa47801270e82021 ] When CONFIG_RODATA_FULL_DEFAULT_ENABLED=y, passing "rodata=on" on the kernel command-line (rather than "rodata=full") should turn off the "full" behaviour, leaving writable linear aliases of read-only kernel memory. Unfortunately, the option has no effect in this situation and the only way to disable the "rodata=full" behaviour is to disable rodata protection entirely by passing "rodata=off". Fix this by parsing the "on" and "off" options in the arch code, additionally enforcing that 'rodata_full' cannot be set without also setting 'rodata_enabled', allowing us to simplify a couple of checks in the process. Fixes: 2e8cff0a0eee ("arm64: fix rodata=full") Cc: Ard Biesheuvel Cc: Mark Rutland Signed-off-by: Will Deacon Reviewed-by: "Russell King (Oracle)" Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20231117131422.29663-1-will@kernel.org Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin --- arch/arm64/include/asm/setup.h | 17 +++++++++++++++-- arch/arm64/mm/pageattr.c | 7 +++---- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/setup.h b/arch/arm64/include/asm/setup.h index f4af547ef54c..2e4d7da74fb8 100644 --- a/arch/arm64/include/asm/setup.h +++ b/arch/arm64/include/asm/setup.h @@ -21,9 +21,22 @@ static inline bool arch_parse_debug_rodata(char *arg) extern bool rodata_enabled; extern bool rodata_full; - if (arg && !strcmp(arg, "full")) { + if (!arg) + return false; + + if (!strcmp(arg, "full")) { + rodata_enabled = rodata_full = true; + return true; + } + + if (!strcmp(arg, "off")) { + rodata_enabled = rodata_full = false; + return true; + } + + if (!strcmp(arg, "on")) { rodata_enabled = true; - rodata_full = true; + rodata_full = false; return true; } diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 8e2017ba5f1b..924843f1f661 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -29,8 +29,8 @@ bool can_set_direct_map(void) * * KFENCE pool requires page-granular mapping if initialized late. */ - return (rodata_enabled && rodata_full) || debug_pagealloc_enabled() || - arm64_kfence_can_set_direct_map(); + return rodata_full || debug_pagealloc_enabled() || + arm64_kfence_can_set_direct_map(); } static int change_page_range(pte_t *ptep, unsigned long addr, void *data) @@ -105,8 +105,7 @@ static int change_memory_common(unsigned long addr, int numpages, * If we are manipulating read-only permissions, apply the same * change to the linear mapping of the pages that back this VM area. */ - if (rodata_enabled && - rodata_full && (pgprot_val(set_mask) == PTE_RDONLY || + if (rodata_full && (pgprot_val(set_mask) == PTE_RDONLY || pgprot_val(clear_mask) == PTE_RDONLY)) { for (i = 0; i < area->nr_pages; i++) { __change_memory_common((u64)page_address(area->pages[i]), From 3c1963b5e7b913a08aad348c55219bfd19654a96 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Wed, 22 Nov 2023 15:07:41 -0800 Subject: [PATCH 0566/1397] arm/xen: fix xen_vcpu_info allocation alignment [ Upstream commit 7bf9a6b46549852a37e6d07e52c601c3c706b562 ] xen_vcpu_info is a percpu area than needs to be mapped by Xen. Currently, it could cross a page boundary resulting in Xen being unable to map it: [ 0.567318] kernel BUG at arch/arm64/xen/../../arm/xen/enlighten.c:164! [ 0.574002] Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP Fix the issue by using __alloc_percpu and requesting alignment for the memory allocation. Signed-off-by: Stefano Stabellini Link: https://lore.kernel.org/r/alpine.DEB.2.22.394.2311221501340.2053963@ubuntu-linux-20-04-desktop Fixes: 24d5373dda7c ("arm/xen: Use alloc_percpu rather than __alloc_percpu") Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- arch/arm/xen/enlighten.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 9afdc4c4a5dc..a395b6c0aae2 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -484,7 +484,8 @@ static int __init xen_guest_init(void) * for secondary CPUs as they are brought up. * For uniformity we use VCPUOP_register_vcpu_info even on cpu0. */ - xen_vcpu_info = alloc_percpu(struct vcpu_info); + xen_vcpu_info = __alloc_percpu(sizeof(struct vcpu_info), + 1 << fls(sizeof(struct vcpu_info) - 1)); if (xen_vcpu_info == NULL) return -ENOMEM; From f174a0dae5bc6da755368e4bcad6d86e06249ba4 Mon Sep 17 00:00:00 2001 From: Suman Ghosh Date: Tue, 21 Nov 2023 22:26:24 +0530 Subject: [PATCH 0567/1397] octeontx2-pf: Fix ntuple rule creation to direct packet to VF with higher Rx queue than its PF [ Upstream commit 4aa1d8f89b10cdc25a231dabf808d8935e0b137a ] It is possible to add a ntuple rule which would like to direct packet to a VF whose number of queues are greater/less than its PF's queue numbers. For example a PF can have 2 Rx queues but a VF created on that PF can have 8 Rx queues. As of today, ntuple rule will reject rule because it is checking the requested queue number against PF's number of Rx queues. As a part of this fix if the action of a ntuple rule is to move a packet to a VF's queue then the check is removed. Also, a debug information is printed to aware user that it is user's responsibility to cross check if the requested queue number on that VF is a valid one. Fixes: f0a1913f8a6f ("octeontx2-pf: Add support for ethtool ntuple filters") Signed-off-by: Suman Ghosh Reviewed-by: Wojciech Drewek Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20231121165624.3664182-1-sumang@marvell.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- .../marvell/octeontx2/nic/otx2_flows.c | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c index 4762dbea64a1..97a71e9b8563 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c @@ -1088,6 +1088,7 @@ int otx2_add_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc) struct ethhdr *eth_hdr; bool new = false; int err = 0; + u64 vf_num; u32 ring; if (!flow_cfg->max_flows) { @@ -1100,7 +1101,21 @@ int otx2_add_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc) if (!(pfvf->flags & OTX2_FLAG_NTUPLE_SUPPORT)) return -ENOMEM; - if (ring >= pfvf->hw.rx_queues && fsp->ring_cookie != RX_CLS_FLOW_DISC) + /* Number of queues on a VF can be greater or less than + * the PF's queue. Hence no need to check for the + * queue count. Hence no need to check queue count if PF + * is installing for its VF. Below is the expected vf_num value + * based on the ethtool commands. + * + * e.g. + * 1. ethtool -U ... action -1 ==> vf_num:255 + * 2. ethtool -U ... action ==> vf_num:0 + * 3. ethtool -U ... vf queue ==> + * vf_num:vf_idx+1 + */ + vf_num = ethtool_get_flow_spec_ring_vf(fsp->ring_cookie); + if (!is_otx2_vf(pfvf->pcifunc) && !vf_num && + ring >= pfvf->hw.rx_queues && fsp->ring_cookie != RX_CLS_FLOW_DISC) return -EINVAL; if (fsp->location >= otx2_get_maxflows(flow_cfg)) @@ -1182,6 +1197,9 @@ int otx2_add_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc) flow_cfg->nr_flows++; } + if (flow->is_vf) + netdev_info(pfvf->netdev, + "Make sure that VF's queue number is within its queue limit\n"); return 0; } From db7b6161338bbe88f27da0ea956ac16cc2f995da Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 21 Nov 2023 20:08:44 +0100 Subject: [PATCH 0568/1397] net: veth: fix ethtool stats reporting [ Upstream commit 818ad9cc90d4a7165caaee7e32800c50d0564ec3 ] Fix a possible misalignment between page_pool stats and tx xdp_stats reported in veth_get_ethtool_stats routine. The issue can be reproduced configuring the veth pair with the following tx/rx queues: $ip link add v0 numtxqueues 2 numrxqueues 4 type veth peer name v1 \ numtxqueues 1 numrxqueues 1 and loading a simple XDP program on v0 that just returns XDP_PASS. In this case on v0 the page_pool stats overwrites tx xdp_stats for queue 1. Fix the issue incrementing pp_idx of dev->real_num_tx_queues * VETH_TQ_STATS_LEN since we always report xdp_stats for all tx queues in ethtool. Fixes: 4fc418053ec7 ("net: veth: add page_pool stats") Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/c5b5d0485016836448453f12846c7c4ab75b094a.1700593593.git.lorenzo@kernel.org Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/veth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 0ee5d1e0759f..af326b91506e 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -236,8 +236,8 @@ static void veth_get_ethtool_stats(struct net_device *dev, data[tx_idx + j] += *(u64 *)(base + offset); } } while (u64_stats_fetch_retry(&rq_stats->syncp, start)); - pp_idx = tx_idx + VETH_TQ_STATS_LEN; } + pp_idx = idx + dev->real_num_tx_queues * VETH_TQ_STATS_LEN; page_pool_stats: veth_get_page_pool_stats(dev, &data[pp_idx]); From 50173aae586c33c26b81eb6ec6bad4457d249403 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Wed, 22 Nov 2023 00:44:33 +0530 Subject: [PATCH 0569/1397] amd-xgbe: handle corner-case during sfp hotplug [ Upstream commit 676ec53844cbdf2f47e68a076cdff7f0ec6cbe3f ] Force the mode change for SFI in Fixed PHY configurations. Fixed PHY configurations needs PLL to be enabled while doing mode set. When the SFP module isn't connected during boot, driver assumes AN is ON and attempts auto-negotiation. However, if the connected SFP comes up in Fixed PHY configuration the link will not come up as PLL isn't enabled while the initial mode set command is issued. So, force the mode change for SFI in Fixed PHY configuration to fix link issues. Fixes: e57f7a3feaef ("amd-xgbe: Prepare for working with more than one type of phy") Acked-by: Shyam Sundar S K Signed-off-by: Raju Rangoju Reviewed-by: Wojciech Drewek Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/amd/xgbe/xgbe-mdio.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c index 32d2c6fac652..4a2dc705b528 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c @@ -1193,7 +1193,19 @@ static int xgbe_phy_config_fixed(struct xgbe_prv_data *pdata) if (pdata->phy.duplex != DUPLEX_FULL) return -EINVAL; - xgbe_set_mode(pdata, mode); + /* Force the mode change for SFI in Fixed PHY config. + * Fixed PHY configs needs PLL to be enabled while doing mode set. + * When the SFP module isn't connected during boot, driver assumes + * AN is ON and attempts autonegotiation. However, if the connected + * SFP comes up in Fixed PHY config, the link will not come up as + * PLL isn't enabled while the initial mode set command is issued. + * So, force the mode change for SFI in Fixed PHY configuration to + * fix link issues. + */ + if (mode == XGBE_MODE_SFI) + xgbe_change_mode(pdata, mode); + else + xgbe_set_mode(pdata, mode); return 0; } From b2943d777f718bb1e71239b74a3b41bf3d22d3fb Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Wed, 22 Nov 2023 00:44:34 +0530 Subject: [PATCH 0570/1397] amd-xgbe: handle the corner-case during tx completion [ Upstream commit 7121205d5330c6a3cb3379348886d47c77b78d06 ] The existing implementation uses software logic to accumulate tx completions until the specified time (1ms) is met and then poll them. However, there exists a tiny gap which leads to a race between resetting and checking the tx_activate flag. Due to this the tx completions are not reported to upper layer and tx queue timeout kicks-in restarting the device. To address this, introduce a tx cleanup mechanism as part of the periodic maintenance process. Fixes: c5aa9e3b8156 ("amd-xgbe: Initial AMD 10GbE platform driver") Acked-by: Shyam Sundar S K Signed-off-by: Raju Rangoju Reviewed-by: Wojciech Drewek Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 614c0278419b..6b73648b3779 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -682,10 +682,24 @@ static void xgbe_service(struct work_struct *work) static void xgbe_service_timer(struct timer_list *t) { struct xgbe_prv_data *pdata = from_timer(pdata, t, service_timer); + struct xgbe_channel *channel; + unsigned int i; queue_work(pdata->dev_workqueue, &pdata->service_work); mod_timer(&pdata->service_timer, jiffies + HZ); + + if (!pdata->tx_usecs) + return; + + for (i = 0; i < pdata->channel_count; i++) { + channel = pdata->channel[i]; + if (!channel->tx_ring || channel->tx_timer_active) + break; + channel->tx_timer_active = 1; + mod_timer(&channel->tx_timer, + jiffies + usecs_to_jiffies(pdata->tx_usecs)); + } } static void xgbe_init_timers(struct xgbe_prv_data *pdata) From 54f06863c1e6b272a515fdba8f0514ace67b509d Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Wed, 22 Nov 2023 00:44:35 +0530 Subject: [PATCH 0571/1397] amd-xgbe: propagate the correct speed and duplex status [ Upstream commit 7a2323ac24a50311f64a3a9b54ed5bef5821ecae ] xgbe_get_link_ksettings() does not propagate correct speed and duplex information to ethtool during cable unplug. Due to which ethtool reports incorrect values for speed and duplex. Address this by propagating correct information. Fixes: 7c12aa08779c ("amd-xgbe: Move the PHY support into amd-xgbe") Acked-by: Shyam Sundar S K Signed-off-by: Raju Rangoju Reviewed-by: Wojciech Drewek Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c index 6e83ff59172a..32fab5e77246 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c @@ -314,10 +314,15 @@ static int xgbe_get_link_ksettings(struct net_device *netdev, cmd->base.phy_address = pdata->phy.address; - cmd->base.autoneg = pdata->phy.autoneg; - cmd->base.speed = pdata->phy.speed; - cmd->base.duplex = pdata->phy.duplex; + if (netif_carrier_ok(netdev)) { + cmd->base.speed = pdata->phy.speed; + cmd->base.duplex = pdata->phy.duplex; + } else { + cmd->base.speed = SPEED_UNKNOWN; + cmd->base.duplex = DUPLEX_UNKNOWN; + } + cmd->base.autoneg = pdata->phy.autoneg; cmd->base.port = PORT_NONE; XGBE_LM_COPY(cmd, supported, lks, supported); From 0bdd88af71e069f209ae8cf53af81c3fc0019bc1 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Tue, 21 Nov 2023 13:13:36 -0800 Subject: [PATCH 0572/1397] i40e: Fix adding unsupported cloud filters [ Upstream commit 4e20655e503e3a478cd1682bf25e3202dd823da8 ] If a VF tries to add unsupported cloud filter through virtchnl then i40e_add_del_cloud_filter(_big_buf) returns -ENOTSUPP but this error code is stored in 'ret' instead of 'aq_ret' that is used as error code sent back to VF. In this scenario where one of the mentioned functions fails the value of 'aq_ret' is zero so the VF will incorrectly receive a 'success'. Use 'aq_ret' to store return value and remove 'ret' local variable. Additionally fix the issue when filter allocation fails, in this case no notification is sent back to the VF. Fixes: e284fc280473 ("i40e: Add and delete cloud filter") Reviewed-by: Simon Horman Signed-off-by: Ivan Vecera Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20231121211338.3348677-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index d3d6415553ed..4441b00297f4 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -3842,7 +3842,7 @@ static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg) struct i40e_pf *pf = vf->pf; struct i40e_vsi *vsi = NULL; int aq_ret = 0; - int i, ret; + int i; if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = -EINVAL; @@ -3866,8 +3866,10 @@ static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg) } cfilter = kzalloc(sizeof(*cfilter), GFP_KERNEL); - if (!cfilter) - return -ENOMEM; + if (!cfilter) { + aq_ret = -ENOMEM; + goto err_out; + } /* parse destination mac address */ for (i = 0; i < ETH_ALEN; i++) @@ -3915,13 +3917,13 @@ static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg) /* Adding cloud filter programmed as TC filter */ if (tcf.dst_port) - ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter, true); + aq_ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter, true); else - ret = i40e_add_del_cloud_filter(vsi, cfilter, true); - if (ret) { + aq_ret = i40e_add_del_cloud_filter(vsi, cfilter, true); + if (aq_ret) { dev_err(&pf->pdev->dev, "VF %d: Failed to add cloud filter, err %pe aq_err %s\n", - vf->vf_id, ERR_PTR(ret), + vf->vf_id, ERR_PTR(aq_ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); goto err_free; } From 78c06141f7061b320579c9dd2ef9df6f56105130 Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Wed, 22 Nov 2023 00:16:42 +0300 Subject: [PATCH 0573/1397] vsock/test: fix SEQPACKET message bounds test [ Upstream commit f0863888f6cfef33e3117dccfe94fa78edf76be4 ] Tune message length calculation to make this test work on machines where 'getpagesize()' returns >32KB. Now maximum message length is not hardcoded (on machines above it was smaller than 'getpagesize()' return value, thus we get negative value and test fails), but calculated at runtime and always bigger than 'getpagesize()' result. Reproduced on aarch64 with 64KB page size. Fixes: 5c338112e48a ("test/vsock: rework message bounds test") Signed-off-by: Arseniy Krasnov Reported-by: Bogdan Marcynkov Reviewed-by: Stefano Garzarella Link: https://lore.kernel.org/r/20231121211642.163474-1-avkrasnov@salutedevices.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- tools/testing/vsock/vsock_test.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 90718c2fd4ea..5dc7767039f6 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -392,11 +392,12 @@ static void test_stream_msg_peek_server(const struct test_opts *opts) } #define SOCK_BUF_SIZE (2 * 1024 * 1024) -#define MAX_MSG_SIZE (32 * 1024) +#define MAX_MSG_PAGES 4 static void test_seqpacket_msg_bounds_client(const struct test_opts *opts) { unsigned long curr_hash; + size_t max_msg_size; int page_size; int msg_count; int fd; @@ -412,7 +413,8 @@ static void test_seqpacket_msg_bounds_client(const struct test_opts *opts) curr_hash = 0; page_size = getpagesize(); - msg_count = SOCK_BUF_SIZE / MAX_MSG_SIZE; + max_msg_size = MAX_MSG_PAGES * page_size; + msg_count = SOCK_BUF_SIZE / max_msg_size; for (int i = 0; i < msg_count; i++) { ssize_t send_size; @@ -423,7 +425,7 @@ static void test_seqpacket_msg_bounds_client(const struct test_opts *opts) /* Use "small" buffers and "big" buffers. */ if (i & 1) buf_size = page_size + - (rand() % (MAX_MSG_SIZE - page_size)); + (rand() % (max_msg_size - page_size)); else buf_size = 1 + (rand() % page_size); @@ -479,7 +481,6 @@ static void test_seqpacket_msg_bounds_server(const struct test_opts *opts) unsigned long remote_hash; unsigned long curr_hash; int fd; - char buf[MAX_MSG_SIZE]; struct msghdr msg = {0}; struct iovec iov = {0}; @@ -507,8 +508,13 @@ static void test_seqpacket_msg_bounds_server(const struct test_opts *opts) control_writeln("SRVREADY"); /* Wait, until peer sends whole data. */ control_expectln("SENDDONE"); - iov.iov_base = buf; - iov.iov_len = sizeof(buf); + iov.iov_len = MAX_MSG_PAGES * getpagesize(); + iov.iov_base = malloc(iov.iov_len); + if (!iov.iov_base) { + perror("malloc"); + exit(EXIT_FAILURE); + } + msg.msg_iov = &iov; msg.msg_iovlen = 1; @@ -533,6 +539,7 @@ static void test_seqpacket_msg_bounds_server(const struct test_opts *opts) curr_hash += hash_djb2(msg.msg_iov[0].iov_base, recv_size); } + free(iov.iov_base); close(fd); remote_hash = control_readulong(); From 2ea6b18acb8de060048324081c127f634330b9dc Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 21 Nov 2023 16:42:17 -0800 Subject: [PATCH 0574/1397] net: axienet: Fix check for partial TX checksum [ Upstream commit fd0413bbf8b11f56e8aa842783b0deda0dfe2926 ] Due to a typo, the code checked the RX checksum feature in the TX path. Fixes: 8a3b7a252dca ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet driver") Signed-off-by: Samuel Holland Reviewed-by: Andrew Lunn Reviewed-by: Radhey Shyam Pandey Link: https://lore.kernel.org/r/20231122004219.3504219-1-samuel.holland@sifive.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index b7ec4dafae90..3297aff969c8 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -822,7 +822,7 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev) if (lp->features & XAE_FEATURE_FULL_TX_CSUM) { /* Tx Full Checksum Offload Enabled */ cur_p->app0 |= 2; - } else if (lp->features & XAE_FEATURE_PARTIAL_RX_CSUM) { + } else if (lp->features & XAE_FEATURE_PARTIAL_TX_CSUM) { csum_start_off = skb_transport_offset(skb); csum_index_off = csum_start_off + skb->csum_offset; /* Tx Partial Checksum Offload Enabled */ From 1dca26d632d481648867f251e7a05ce280754583 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 22 Nov 2023 17:17:08 -0600 Subject: [PATCH 0575/1397] net: ipa: fix one GSI register field width [ Upstream commit 37f0205538baf70beb57cdcb6c7d14aa13257926 ] The width of the R_LENGTH field of the EV_CH_E_CNTXT_1 GSI register is 24 bits (not 20 bits) starting with IPA v5.0. Fix this. Fixes: faf0678ec8a0 ("net: ipa: add IPA v5.0 GSI register definitions") Signed-off-by: Alex Elder Link: https://lore.kernel.org/r/20231122231708.896632-1-elder@linaro.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ipa/reg/gsi_reg-v5.0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ipa/reg/gsi_reg-v5.0.c b/drivers/net/ipa/reg/gsi_reg-v5.0.c index d7b81a36d673..145eb0bd096d 100644 --- a/drivers/net/ipa/reg/gsi_reg-v5.0.c +++ b/drivers/net/ipa/reg/gsi_reg-v5.0.c @@ -78,7 +78,7 @@ REG_STRIDE_FIELDS(EV_CH_E_CNTXT_0, ev_ch_e_cntxt_0, 0x0001c000 + 0x12000 * GSI_EE_AP, 0x80); static const u32 reg_ev_ch_e_cntxt_1_fmask[] = { - [R_LENGTH] = GENMASK(19, 0), + [R_LENGTH] = GENMASK(23, 0), }; REG_STRIDE_FIELDS(EV_CH_E_CNTXT_1, ev_ch_e_cntxt_1, From 844a4272a462fb02a5245315258ba5a6808db840 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 26 Oct 2023 01:25:07 +0100 Subject: [PATCH 0576/1397] afs: Return ENOENT if no cell DNS record can be found [ Upstream commit 0167236e7d66c5e1e85d902a6abc2529b7544539 ] Make AFS return error ENOENT if no cell SRV or AFSDB DNS record (or cellservdb config file record) can be found rather than returning EDESTADDRREQ. Also add cell name lookup info to the cursor dump. Fixes: d5c32c89b208 ("afs: Fix cell DNS lookup") Reported-by: Markus Suvanto Link: https://bugzilla.kernel.org/show_bug.cgi?id=216637 Signed-off-by: David Howells Reviewed-by: Marc Dionne cc: linux-afs@lists.infradead.org Signed-off-by: Sasha Levin --- fs/afs/vl_rotate.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c index 488e58490b16..eb415ce56360 100644 --- a/fs/afs/vl_rotate.c +++ b/fs/afs/vl_rotate.c @@ -58,6 +58,12 @@ static bool afs_start_vl_iteration(struct afs_vl_cursor *vc) } /* Status load is ordered after lookup counter load */ + if (cell->dns_status == DNS_LOOKUP_GOT_NOT_FOUND) { + pr_warn("No record of cell %s\n", cell->name); + vc->error = -ENOENT; + return false; + } + if (cell->dns_source == DNS_RECORD_UNAVAILABLE) { vc->error = -EDESTADDRREQ; return false; @@ -285,6 +291,7 @@ bool afs_select_vlserver(struct afs_vl_cursor *vc) */ static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc) { + struct afs_cell *cell = vc->cell; static int count; int i; @@ -294,6 +301,9 @@ static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc) rcu_read_lock(); pr_notice("EDESTADDR occurred\n"); + pr_notice("CELL: %s err=%d\n", cell->name, cell->error); + pr_notice("DNS: src=%u st=%u lc=%x\n", + cell->dns_source, cell->dns_status, cell->dns_lookup_count); pr_notice("VC: ut=%lx ix=%u ni=%hu fl=%hx err=%hd\n", vc->untried, vc->index, vc->nr_iterations, vc->flags, vc->error); From ceca54f1f0ff6ddc23412951d829b9c34b0246c3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 1 Nov 2023 22:03:28 +0000 Subject: [PATCH 0577/1397] afs: Fix file locking on R/O volumes to operate in local mode [ Upstream commit b590eb41be766c5a63acc7e8896a042f7a4e8293 ] AFS doesn't really do locking on R/O volumes as fileservers don't maintain state with each other and thus a lock on a R/O volume file on one fileserver will not be be visible to someone looking at the same file on another fileserver. Further, the server may return an error if you try it. Fix this by doing what other AFS clients do and handle filelocking on R/O volume files entirely within the client and don't touch the server. Fixes: 6c6c1d63c243 ("afs: Provide mount-time configurable byte-range file locking emulation") Signed-off-by: David Howells Reviewed-by: Marc Dionne cc: linux-afs@lists.infradead.org Signed-off-by: Sasha Levin --- fs/afs/super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/afs/super.c b/fs/afs/super.c index 95d713074dc8..e95fb4cb4fcd 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -407,6 +407,8 @@ static int afs_validate_fc(struct fs_context *fc) return PTR_ERR(volume); ctx->volume = volume; + if (volume->type != AFSVL_RWVOL) + ctx->flock_mode = afs_flock_mode_local; } return 0; From 399d76d3306453200621f8b5c6fc7df28c659dcd Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 16 Nov 2023 13:14:35 +0100 Subject: [PATCH 0578/1397] nvme: blank out authentication fabrics options if not configured [ Upstream commit c7ca9757bda35ff9ce27ab42f2cb8b84d983e6ad ] If the config option NVME_HOST_AUTH is not selected we should not accept the corresponding fabrics options. This allows userspace to detect if NVMe authentication has been enabled for the kernel. Cc: Shin'ichiro Kawasaki Fixes: f50fff73d620 ("nvme: implement In-Band authentication") Signed-off-by: Hannes Reinecke Tested-by: Shin'ichiro Kawasaki Reviewed-by: Daniel Wagner Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/host/fabrics.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 8175d49f2909..92ba315cfe19 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -645,8 +645,10 @@ static const match_table_t opt_tokens = { { NVMF_OPT_TOS, "tos=%d" }, { NVMF_OPT_FAIL_FAST_TMO, "fast_io_fail_tmo=%d" }, { NVMF_OPT_DISCOVERY, "discovery" }, +#ifdef CONFIG_NVME_HOST_AUTH { NVMF_OPT_DHCHAP_SECRET, "dhchap_secret=%s" }, { NVMF_OPT_DHCHAP_CTRL_SECRET, "dhchap_ctrl_secret=%s" }, +#endif { NVMF_OPT_ERR, NULL } }; From 2291653c27236d34ca7df3bfd3427ce6e30c2d95 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Nov 2023 08:13:36 -0500 Subject: [PATCH 0579/1397] nvmet: nul-terminate the NQNs passed in the connect command [ Upstream commit 1c22e0295a5eb571c27b53c7371f95699ef705ff ] The host and subsystem NQNs are passed in the connect command payload and interpreted as nul-terminated strings. Ensure they actually are nul-terminated before using them. Fixes: a07b4970f464 "nvmet: add a generic NVMe target") Reported-by: Alon Zahavi Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/target/fabrics-cmd.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index 43b5bd8bb6a5..d8da840a1c0e 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -244,6 +244,8 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) goto out; } + d->subsysnqn[NVMF_NQN_FIELD_LEN - 1] = '\0'; + d->hostnqn[NVMF_NQN_FIELD_LEN - 1] = '\0'; status = nvmet_alloc_ctrl(d->subsysnqn, d->hostnqn, req, le32_to_cpu(c->kato), &ctrl); if (status) @@ -313,6 +315,8 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) goto out; } + d->subsysnqn[NVMF_NQN_FIELD_LEN - 1] = '\0'; + d->hostnqn[NVMF_NQN_FIELD_LEN - 1] = '\0'; ctrl = nvmet_ctrl_find_get(d->subsysnqn, d->hostnqn, le16_to_cpu(d->cntlid), req); if (!ctrl) { From 7091d915a26af44aa81f0c138ff2f0a9f83ed51a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 17 Nov 2023 18:36:48 +0100 Subject: [PATCH 0580/1397] USB: dwc3: qcom: fix resource leaks on probe deferral [ Upstream commit 51392a1879ff06dc21b68aef4825f6ef68a7be42 ] The driver needs to deregister and free the newly allocated dwc3 core platform device on ACPI probe errors (e.g. probe deferral) and on driver unbind but instead it leaked those resources while erroneously dropping a reference to the parent platform device which is still in use. For OF probing the driver takes a reference to the dwc3 core platform device which has also always been leaked. Fix the broken ACPI tear down and make sure to drop the dwc3 core reference for both OF and ACPI. Fixes: 8fd95da2cfb5 ("usb: dwc3: qcom: Release the correct resources in dwc3_qcom_remove()") Fixes: 2bc02355f8ba ("usb: dwc3: qcom: Add support for booting with ACPI") Fixes: a4333c3a6ba9 ("usb: dwc3: Add Qualcomm DWC3 glue driver") Cc: stable@vger.kernel.org # 4.18 Cc: Christophe JAILLET Cc: Lee Jones Signed-off-by: Johan Hovold Acked-by: Andrew Halaney Link: https://lore.kernel.org/r/20231117173650.21161-2-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman Stable-dep-of: 9cf87666fc6e ("USB: dwc3: qcom: fix ACPI platform device leak") Signed-off-by: Sasha Levin --- drivers/usb/dwc3/dwc3-qcom.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c index 3de43df6bbe8..00c3021b43ce 100644 --- a/drivers/usb/dwc3/dwc3-qcom.c +++ b/drivers/usb/dwc3/dwc3-qcom.c @@ -758,6 +758,7 @@ static int dwc3_qcom_of_register_core(struct platform_device *pdev) if (!qcom->dwc3) { ret = -ENODEV; dev_err(dev, "failed to get dwc3 platform device\n"); + of_platform_depopulate(dev); } node_put: @@ -899,7 +900,7 @@ static int dwc3_qcom_probe(struct platform_device *pdev) if (ret) { dev_err(dev, "failed to register DWC3 Core, err=%d\n", ret); - goto depopulate; + goto clk_disable; } ret = dwc3_qcom_interconnect_init(qcom); @@ -934,7 +935,8 @@ static int dwc3_qcom_probe(struct platform_device *pdev) if (np) of_platform_depopulate(&pdev->dev); else - platform_device_put(pdev); + platform_device_del(qcom->dwc3); + platform_device_put(qcom->dwc3); clk_disable: for (i = qcom->num_clocks - 1; i >= 0; i--) { clk_disable_unprepare(qcom->clks[i]); @@ -957,7 +959,8 @@ static void dwc3_qcom_remove(struct platform_device *pdev) if (np) of_platform_depopulate(&pdev->dev); else - platform_device_put(pdev); + platform_device_del(qcom->dwc3); + platform_device_put(qcom->dwc3); for (i = qcom->num_clocks - 1; i >= 0; i--) { clk_disable_unprepare(qcom->clks[i]); From 513d4a5e438a51cbb13ad6761916a41c50d65845 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 17 Nov 2023 18:36:50 +0100 Subject: [PATCH 0581/1397] USB: dwc3: qcom: fix ACPI platform device leak [ Upstream commit 9cf87666fc6e08572341fe08ecd909935998fbbd ] Make sure to free the "urs" platform device, which is created for some ACPI platforms, on probe errors and on driver unbind. Compile-tested only. Fixes: c25c210f590e ("usb: dwc3: qcom: add URS Host support for sdm845 ACPI boot") Cc: Shawn Guo Signed-off-by: Johan Hovold Acked-by: Andrew Halaney Acked-by: Shawn Guo Link: https://lore.kernel.org/r/20231117173650.21161-4-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/dwc3/dwc3-qcom.c | 37 +++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c index 00c3021b43ce..d6c81aa298b9 100644 --- a/drivers/usb/dwc3/dwc3-qcom.c +++ b/drivers/usb/dwc3/dwc3-qcom.c @@ -767,9 +767,9 @@ static int dwc3_qcom_of_register_core(struct platform_device *pdev) return ret; } -static struct platform_device * -dwc3_qcom_create_urs_usb_platdev(struct device *dev) +static struct platform_device *dwc3_qcom_create_urs_usb_platdev(struct device *dev) { + struct platform_device *urs_usb = NULL; struct fwnode_handle *fwh; struct acpi_device *adev; char name[8]; @@ -789,9 +789,26 @@ dwc3_qcom_create_urs_usb_platdev(struct device *dev) adev = to_acpi_device_node(fwh); if (!adev) - return NULL; + goto err_put_handle; + + urs_usb = acpi_create_platform_device(adev, NULL); + if (IS_ERR_OR_NULL(urs_usb)) + goto err_put_handle; + + return urs_usb; - return acpi_create_platform_device(adev, NULL); +err_put_handle: + fwnode_handle_put(fwh); + + return urs_usb; +} + +static void dwc3_qcom_destroy_urs_usb_platdev(struct platform_device *urs_usb) +{ + struct fwnode_handle *fwh = urs_usb->dev.fwnode; + + platform_device_unregister(urs_usb); + fwnode_handle_put(fwh); } static int dwc3_qcom_probe(struct platform_device *pdev) @@ -875,13 +892,13 @@ static int dwc3_qcom_probe(struct platform_device *pdev) qcom->qscratch_base = devm_ioremap_resource(dev, parent_res); if (IS_ERR(qcom->qscratch_base)) { ret = PTR_ERR(qcom->qscratch_base); - goto clk_disable; + goto free_urs; } ret = dwc3_qcom_setup_irq(pdev); if (ret) { dev_err(dev, "failed to setup IRQs, err=%d\n", ret); - goto clk_disable; + goto free_urs; } /* @@ -900,7 +917,7 @@ static int dwc3_qcom_probe(struct platform_device *pdev) if (ret) { dev_err(dev, "failed to register DWC3 Core, err=%d\n", ret); - goto clk_disable; + goto free_urs; } ret = dwc3_qcom_interconnect_init(qcom); @@ -937,6 +954,9 @@ static int dwc3_qcom_probe(struct platform_device *pdev) else platform_device_del(qcom->dwc3); platform_device_put(qcom->dwc3); +free_urs: + if (qcom->urs_usb) + dwc3_qcom_destroy_urs_usb_platdev(qcom->urs_usb); clk_disable: for (i = qcom->num_clocks - 1; i >= 0; i--) { clk_disable_unprepare(qcom->clks[i]); @@ -962,6 +982,9 @@ static void dwc3_qcom_remove(struct platform_device *pdev) platform_device_del(qcom->dwc3); platform_device_put(qcom->dwc3); + if (qcom->urs_usb) + dwc3_qcom_destroy_urs_usb_platdev(qcom->urs_usb); + for (i = qcom->num_clocks - 1; i >= 0; i--) { clk_disable_unprepare(qcom->clks[i]); clk_put(qcom->clks[i]); From 328854deec80e115509cee74eeb533e139284a56 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 21 Nov 2023 12:41:26 +0100 Subject: [PATCH 0582/1397] lockdep: Fix block chain corruption [ Upstream commit bca4104b00fec60be330cd32818dd5c70db3d469 ] Kent reported an occasional KASAN splat in lockdep. Mark then noted: > I suspect the dodgy access is to chain_block_buckets[-1], which hits the last 4 > bytes of the redzone and gets (incorrectly/misleadingly) attributed to > nr_large_chain_blocks. That would mean @size == 0, at which point size_to_bucket() returns -1 and the above happens. alloc_chain_hlocks() has 'size - req', for the first with the precondition 'size >= rq', which allows the 0. This code is trying to split a block, del_chain_block() takes what we need, and add_chain_block() puts back the remainder, except in the above case the remainder is 0 sized and things go sideways. Fixes: 810507fe6fd5 ("locking/lockdep: Reuse freed chain_hlocks entries") Reported-by: Kent Overstreet Signed-off-by: Peter Zijlstra (Intel) Tested-by: Kent Overstreet Link: https://lkml.kernel.org/r/20231121114126.GH8262@noisy.programming.kicks-ass.net Signed-off-by: Sasha Levin --- kernel/locking/lockdep.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index e85b5ad3e206..151bd3de5936 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -3497,7 +3497,8 @@ static int alloc_chain_hlocks(int req) size = chain_block_size(curr); if (likely(size >= req)) { del_chain_block(0, size, chain_block_next(curr)); - add_chain_block(curr + req, size - req); + if (size > req) + add_chain_block(curr + req, size - req); return curr; } } From 2b00d1fd9a40eede0dd38265f839026a4bb2b4b1 Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Mon, 28 Aug 2023 17:08:57 +0200 Subject: [PATCH 0583/1397] mm: add a NO_INHERIT flag to the PR_SET_MDWE prctl [ Upstream commit 24e41bf8a6b424c76c5902fb999e9eca61bdf83d ] This extends the current PR_SET_MDWE prctl arg with a bit to indicate that the process doesn't want MDWE protection to propagate to children. To implement this no-inherit mode, the tag in current->mm->flags must be absent from MMF_INIT_MASK. This means that the encoding for "MDWE but without inherit" is different in the prctl than in the mm flags. This leads to a bit of bit-mangling in the prctl implementation. Link: https://lkml.kernel.org/r/20230828150858.393570-6-revest@chromium.org Signed-off-by: Florent Revest Reviewed-by: Kees Cook Reviewed-by: Catalin Marinas Cc: Alexey Izbyshev Cc: Anshuman Khandual Cc: Ayush Jain Cc: David Hildenbrand Cc: Greg Thelen Cc: Joey Gouly Cc: KP Singh Cc: Mark Brown Cc: Michal Hocko Cc: Peter Xu Cc: Ryan Roberts Cc: Szabolcs Nagy Cc: Topi Miettinen Signed-off-by: Andrew Morton Stable-dep-of: 793838138c15 ("prctl: Disable prctl(PR_SET_MDWE) on parisc") Signed-off-by: Sasha Levin --- include/linux/sched/coredump.h | 10 ++++++++++ include/uapi/linux/prctl.h | 1 + kernel/fork.c | 2 +- kernel/sys.c | 32 ++++++++++++++++++++++++++------ tools/include/uapi/linux/prctl.h | 1 + 5 files changed, 39 insertions(+), 7 deletions(-) diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h index 0ee96ea7a0e9..1b37fa8fc723 100644 --- a/include/linux/sched/coredump.h +++ b/include/linux/sched/coredump.h @@ -91,4 +91,14 @@ static inline int get_dumpable(struct mm_struct *mm) MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK) #define MMF_VM_MERGE_ANY 29 +#define MMF_HAS_MDWE_NO_INHERIT 30 + +static inline unsigned long mmf_init_flags(unsigned long flags) +{ + if (flags & (1UL << MMF_HAS_MDWE_NO_INHERIT)) + flags &= ~((1UL << MMF_HAS_MDWE) | + (1UL << MMF_HAS_MDWE_NO_INHERIT)); + return flags & MMF_INIT_MASK; +} + #endif /* _LINUX_SCHED_COREDUMP_H */ diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 9a85c69782bd..370ed14b1ae0 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -284,6 +284,7 @@ struct prctl_mm_map { /* Memory deny write / execute */ #define PR_SET_MDWE 65 # define PR_MDWE_REFUSE_EXEC_GAIN (1UL << 0) +# define PR_MDWE_NO_INHERIT (1UL << 1) #define PR_GET_MDWE 66 diff --git a/kernel/fork.c b/kernel/fork.c index 3b6d20dfb9a8..177ce7438db6 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1288,7 +1288,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, hugetlb_count_init(mm); if (current->mm) { - mm->flags = current->mm->flags & MMF_INIT_MASK; + mm->flags = mmf_init_flags(current->mm->flags); mm->def_flags = current->mm->def_flags & VM_INIT_DEF_MASK; } else { mm->flags = default_dump_filter; diff --git a/kernel/sys.c b/kernel/sys.c index 2410e3999ebe..4a8073c1b255 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2368,19 +2368,41 @@ static int prctl_set_vma(unsigned long opt, unsigned long start, } #endif /* CONFIG_ANON_VMA_NAME */ +static inline unsigned long get_current_mdwe(void) +{ + unsigned long ret = 0; + + if (test_bit(MMF_HAS_MDWE, ¤t->mm->flags)) + ret |= PR_MDWE_REFUSE_EXEC_GAIN; + if (test_bit(MMF_HAS_MDWE_NO_INHERIT, ¤t->mm->flags)) + ret |= PR_MDWE_NO_INHERIT; + + return ret; +} + static inline int prctl_set_mdwe(unsigned long bits, unsigned long arg3, unsigned long arg4, unsigned long arg5) { + unsigned long current_bits; + if (arg3 || arg4 || arg5) return -EINVAL; - if (bits & ~(PR_MDWE_REFUSE_EXEC_GAIN)) + if (bits & ~(PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT)) + return -EINVAL; + + /* NO_INHERIT only makes sense with REFUSE_EXEC_GAIN */ + if (bits & PR_MDWE_NO_INHERIT && !(bits & PR_MDWE_REFUSE_EXEC_GAIN)) return -EINVAL; + current_bits = get_current_mdwe(); + if (current_bits && current_bits != bits) + return -EPERM; /* Cannot unset the flags */ + + if (bits & PR_MDWE_NO_INHERIT) + set_bit(MMF_HAS_MDWE_NO_INHERIT, ¤t->mm->flags); if (bits & PR_MDWE_REFUSE_EXEC_GAIN) set_bit(MMF_HAS_MDWE, ¤t->mm->flags); - else if (test_bit(MMF_HAS_MDWE, ¤t->mm->flags)) - return -EPERM; /* Cannot unset the flag */ return 0; } @@ -2390,9 +2412,7 @@ static inline int prctl_get_mdwe(unsigned long arg2, unsigned long arg3, { if (arg2 || arg3 || arg4 || arg5) return -EINVAL; - - return test_bit(MMF_HAS_MDWE, ¤t->mm->flags) ? - PR_MDWE_REFUSE_EXEC_GAIN : 0; + return get_current_mdwe(); } static int prctl_get_auxv(void __user *addr, unsigned long len) diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index 9a85c69782bd..370ed14b1ae0 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -284,6 +284,7 @@ struct prctl_mm_map { /* Memory deny write / execute */ #define PR_SET_MDWE 65 # define PR_MDWE_REFUSE_EXEC_GAIN (1UL << 0) +# define PR_MDWE_NO_INHERIT (1UL << 1) #define PR_GET_MDWE 66 From 790afa133f890c9dcf46f3476406eb0e2c638634 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 18 Nov 2023 19:33:35 +0100 Subject: [PATCH 0584/1397] prctl: Disable prctl(PR_SET_MDWE) on parisc [ Upstream commit 793838138c157d4c49f4fb744b170747e3dabf58 ] systemd-254 tries to use prctl(PR_SET_MDWE) for it's MemoryDenyWriteExecute functionality, but fails on parisc which still needs executable stacks in certain combinations of gcc/glibc/kernel. Disable prctl(PR_SET_MDWE) by returning -EINVAL for now on parisc, until userspace has catched up. Signed-off-by: Helge Deller Co-developed-by: Linus Torvalds Reported-by: Sam James Closes: https://github.com/systemd/systemd/issues/29775 Tested-by: Sam James Link: https://lore.kernel.org/all/875y2jro9a.fsf@gentoo.org/ Cc: # v6.3+ Signed-off-by: Sasha Levin --- kernel/sys.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/sys.c b/kernel/sys.c index 4a8073c1b255..7a4ae6d5aecd 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2395,6 +2395,10 @@ static inline int prctl_set_mdwe(unsigned long bits, unsigned long arg3, if (bits & PR_MDWE_NO_INHERIT && !(bits & PR_MDWE_REFUSE_EXEC_GAIN)) return -EINVAL; + /* PARISC cannot allow mdwe as it needs writable stacks */ + if (IS_ENABLED(CONFIG_PARISC)) + return -EINVAL; + current_bits = get_current_mdwe(); if (current_bits && current_bits != bits) return -EPERM; /* Cannot unset the flags */ From fbb41cf29d6ca1534e0e145b5b48db1bedaa02f4 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 16 Nov 2023 12:52:29 +0000 Subject: [PATCH 0585/1397] kselftest/arm64: Fix output formatting for za-fork commit 460e462d22542adfafd8a5bc979437df73f1cbf3 upstream. The za-fork test does not output a newline when reporting the result of the one test it runs, causing the counts printed by kselftest to be included in the test name. Add the newline. Fixes: 266679ffd867 ("kselftest/arm64: Convert za-fork to use kselftest.h") Cc: # 6.4.x Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20231116-arm64-fix-za-fork-output-v1-1-42c03d4f5759@kernel.org Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/arm64/fp/za-fork.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/arm64/fp/za-fork.c b/tools/testing/selftests/arm64/fp/za-fork.c index b86cb1049497..587b94648222 100644 --- a/tools/testing/selftests/arm64/fp/za-fork.c +++ b/tools/testing/selftests/arm64/fp/za-fork.c @@ -85,7 +85,7 @@ int main(int argc, char **argv) */ ret = open("/proc/sys/abi/sme_default_vector_length", O_RDONLY, 0); if (ret >= 0) { - ksft_test_result(fork_test(), "fork_test"); + ksft_test_result(fork_test(), "fork_test\n"); } else { ksft_print_msg("SME not supported\n"); From 405b4f6d211345b1be64df938ef980e816519af3 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Mon, 30 Oct 2023 16:23:20 -0700 Subject: [PATCH 0586/1397] drm/msm/dpu: Add missing safe_lut_tbl in sc8280xp catalog commit a33b2431d11b4df137bbcfdd5a5adfa054c2479e upstream. During USB transfers on the SC8280XP __arm_smmu_tlb_sync() is seen to typically take 1-2ms to complete. As expected this results in poor performance, something that has been mitigated by proposing running the iommu in non-strict mode (boot with iommu.strict=0). This turns out to be related to the SAFE logic, and programming the QOS SAFE values in the DPU (per suggestion from Rob and Doug) reduces the TLB sync time to below 10us, which means significant less time spent with interrupts disabled and a significant boost in throughput. Fixes: 4a352c2fc15a ("drm/msm/dpu: Introduce SC8280XP") Cc: stable@vger.kernel.org Suggested-by: Doug Anderson Suggested-by: Rob Clark Signed-off-by: Bjorn Andersson Tested-by: Johan Hovold Tested-by: Steev Klimaszewski Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/565094/ Link: https://lore.kernel.org/r/20231030-sc8280xp-dpu-safe-lut-v1-1-6d485d7b428f@quicinc.com Signed-off-by: Abhinav Kumar Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h index 58f5e25679b1..ff9adb8000ac 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h @@ -419,6 +419,7 @@ static const struct dpu_perf_cfg sc8280xp_perf_data = { .min_llcc_ib = 0, .min_dram_ib = 800000, .danger_lut_tbl = {0xf, 0xffff, 0x0}, + .safe_lut_tbl = {0xfe00, 0xfe00, 0xffff}, .qos_lut_tbl = { {.nentry = ARRAY_SIZE(sc8180x_qos_linear), .entries = sc8180x_qos_linear From f1a0e42f6651b86b02fdb1881ec69eeba5cf155c Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Thu, 16 Nov 2023 14:02:12 +0100 Subject: [PATCH 0587/1397] drm/ast: Disconnect BMC if physical connector is connected commit 8d6ef26501b97243ee6c16b8187c5b38cb69b77d upstream. Many user-space compositors fail with mode setting if a CRTC has more than one connected connector. This is the case with the BMC on Aspeed systems. Work around this problem by setting the BMC's connector status to disconnected when the physical connector has a display attached. This way compositors will only see one connected connector at a time; either the physical one or the BMC. Suggested-by: Jocelyn Falempe Fixes: e329cb53b45d ("drm/ast: Add BMC virtual connector") Signed-off-by: Thomas Zimmermann Cc: # v6.6+ Reviewed-by: Jocelyn Falempe Link: https://patchwork.freedesktop.org/patch/msgid/20231116130217.22931-1-tzimmermann@suse.de Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/ast/ast_drv.h | 13 ++++++- drivers/gpu/drm/ast/ast_mode.c | 62 ++++++++++++++++++++++++++++++---- 2 files changed, 67 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h index 848a9f1403e8..f7053f2972bb 100644 --- a/drivers/gpu/drm/ast/ast_drv.h +++ b/drivers/gpu/drm/ast/ast_drv.h @@ -172,6 +172,17 @@ to_ast_sil164_connector(struct drm_connector *connector) return container_of(connector, struct ast_sil164_connector, base); } +struct ast_bmc_connector { + struct drm_connector base; + struct drm_connector *physical_connector; +}; + +static inline struct ast_bmc_connector * +to_ast_bmc_connector(struct drm_connector *connector) +{ + return container_of(connector, struct ast_bmc_connector, base); +} + /* * Device */ @@ -216,7 +227,7 @@ struct ast_device { } astdp; struct { struct drm_encoder encoder; - struct drm_connector connector; + struct ast_bmc_connector bmc_connector; } bmc; } output; diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index 32f04ec6c386..3de0f457fff6 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -1767,6 +1767,30 @@ static const struct drm_encoder_funcs ast_bmc_encoder_funcs = { .destroy = drm_encoder_cleanup, }; +static int ast_bmc_connector_helper_detect_ctx(struct drm_connector *connector, + struct drm_modeset_acquire_ctx *ctx, + bool force) +{ + struct ast_bmc_connector *bmc_connector = to_ast_bmc_connector(connector); + struct drm_connector *physical_connector = bmc_connector->physical_connector; + + /* + * Most user-space compositors cannot handle more than one connected + * connector per CRTC. Hence, we only mark the BMC as connected if the + * physical connector is disconnected. If the physical connector's status + * is connected or unknown, the BMC remains disconnected. This has no + * effect on the output of the BMC. + * + * FIXME: Remove this logic once user-space compositors can handle more + * than one connector per CRTC. The BMC should always be connected. + */ + + if (physical_connector && physical_connector->status == connector_status_disconnected) + return connector_status_connected; + + return connector_status_disconnected; +} + static int ast_bmc_connector_helper_get_modes(struct drm_connector *connector) { return drm_add_modes_noedid(connector, 4096, 4096); @@ -1774,6 +1798,7 @@ static int ast_bmc_connector_helper_get_modes(struct drm_connector *connector) static const struct drm_connector_helper_funcs ast_bmc_connector_helper_funcs = { .get_modes = ast_bmc_connector_helper_get_modes, + .detect_ctx = ast_bmc_connector_helper_detect_ctx, }; static const struct drm_connector_funcs ast_bmc_connector_funcs = { @@ -1784,12 +1809,33 @@ static const struct drm_connector_funcs ast_bmc_connector_funcs = { .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, }; -static int ast_bmc_output_init(struct ast_device *ast) +static int ast_bmc_connector_init(struct drm_device *dev, + struct ast_bmc_connector *bmc_connector, + struct drm_connector *physical_connector) +{ + struct drm_connector *connector = &bmc_connector->base; + int ret; + + ret = drm_connector_init(dev, connector, &ast_bmc_connector_funcs, + DRM_MODE_CONNECTOR_VIRTUAL); + if (ret) + return ret; + + drm_connector_helper_add(connector, &ast_bmc_connector_helper_funcs); + + bmc_connector->physical_connector = physical_connector; + + return 0; +} + +static int ast_bmc_output_init(struct ast_device *ast, + struct drm_connector *physical_connector) { struct drm_device *dev = &ast->base; struct drm_crtc *crtc = &ast->crtc; struct drm_encoder *encoder = &ast->output.bmc.encoder; - struct drm_connector *connector = &ast->output.bmc.connector; + struct ast_bmc_connector *bmc_connector = &ast->output.bmc.bmc_connector; + struct drm_connector *connector = &bmc_connector->base; int ret; ret = drm_encoder_init(dev, encoder, @@ -1799,13 +1845,10 @@ static int ast_bmc_output_init(struct ast_device *ast) return ret; encoder->possible_crtcs = drm_crtc_mask(crtc); - ret = drm_connector_init(dev, connector, &ast_bmc_connector_funcs, - DRM_MODE_CONNECTOR_VIRTUAL); + ret = ast_bmc_connector_init(dev, bmc_connector, physical_connector); if (ret) return ret; - drm_connector_helper_add(connector, &ast_bmc_connector_helper_funcs); - ret = drm_connector_attach_encoder(connector, encoder); if (ret) return ret; @@ -1864,6 +1907,7 @@ static const struct drm_mode_config_funcs ast_mode_config_funcs = { int ast_mode_config_init(struct ast_device *ast) { struct drm_device *dev = &ast->base; + struct drm_connector *physical_connector = NULL; int ret; ret = drmm_mode_config_init(dev); @@ -1904,23 +1948,27 @@ int ast_mode_config_init(struct ast_device *ast) ret = ast_vga_output_init(ast); if (ret) return ret; + physical_connector = &ast->output.vga.vga_connector.base; } if (ast->tx_chip_types & AST_TX_SIL164_BIT) { ret = ast_sil164_output_init(ast); if (ret) return ret; + physical_connector = &ast->output.sil164.sil164_connector.base; } if (ast->tx_chip_types & AST_TX_DP501_BIT) { ret = ast_dp501_output_init(ast); if (ret) return ret; + physical_connector = &ast->output.dp501.connector; } if (ast->tx_chip_types & AST_TX_ASTDP_BIT) { ret = ast_astdp_output_init(ast); if (ret) return ret; + physical_connector = &ast->output.astdp.connector; } - ret = ast_bmc_output_init(ast); + ret = ast_bmc_output_init(ast, physical_connector); if (ret) return ret; From e9c62671852f7eea68c9a834c6ded9bca8a66267 Mon Sep 17 00:00:00 2001 From: Gil Fine Date: Tue, 7 Nov 2023 12:22:40 +0200 Subject: [PATCH 0588/1397] thunderbolt: Set lane bonding bit only for downstream port commit 24d85bb3be373b5831699bddf698b392bd2b904d upstream. Fix the lane bonding procedure to follow the steps described in USB4 Connection Manager guide. Hence, set the lane bonding bit only for downstream port. This is needed for certain ASMedia device, otherwise lane bonding fails and the device disconnects. Cc: stable@vger.kernel.org Signed-off-by: Gil Fine Signed-off-by: Mika Westerberg Signed-off-by: Greg Kroah-Hartman --- drivers/thunderbolt/switch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index bd5815f8f23b..509b99af5087 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -1082,7 +1082,7 @@ int tb_port_lane_bonding_enable(struct tb_port *port) * Only set bonding if the link was not already bonded. This * avoids the lane adapter to re-enter bonding state. */ - if (width == TB_LINK_WIDTH_SINGLE) { + if (width == TB_LINK_WIDTH_SINGLE && !tb_is_upstream_port(port)) { ret = tb_port_set_lane_bonding(port, true); if (ret) goto err_lane1; From 30b5526beaf3885eb80c6d95c0dc63889001f8c0 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 12 Nov 2023 21:36:27 +0100 Subject: [PATCH 0589/1397] ACPI: video: Use acpi_device_fix_up_power_children() commit c93695494606326d7fd72b46a2a657139ccb0dec upstream. Commit 89c290ea7589 ("ACPI: video: Put ACPI video and its child devices into D0 on boot") introduced calling acpi_device_fix_up_power_extended() on the video card for which the ACPI video bus is the companion device. This unnecessarily touches the power-state of the GPU itself, while the issue it tries to address only requires calling _PS0 on the child devices. Touching the power-state of the GPU itself is causing suspend / resume issues on e.g. a Lenovo ThinkPad W530. Instead use acpi_device_fix_up_power_children(), which only touches the child devices, to fix this. Fixes: 89c290ea7589 ("ACPI: video: Put ACPI video and its child devices into D0 on boot") Reported-by: Owen T. Heisler Closes: https://lore.kernel.org/regressions/9f36fb06-64c4-4264-aaeb-4e1289e764c4@owenh.net/ Closes: https://gitlab.freedesktop.org/drm/nouveau/-/issues/273 Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218124 Tested-by: Kai-Heng Feng Tested-by: Owen T. Heisler Signed-off-by: Hans de Goede Cc: 6.6+ # 6.6+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpi_video.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index b411948594ff..4e868454b38d 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -2031,7 +2031,7 @@ static int acpi_video_bus_add(struct acpi_device *device) * HP ZBook Fury 16 G10 requires ACPI video's child devices have _PS0 * evaluated to have functional panel brightness control. */ - acpi_device_fix_up_power_extended(device); + acpi_device_fix_up_power_children(device); pr_info("%s [%s] (multi-head: %s rom: %s post: %s)\n", ACPI_VIDEO_DEVICE_NAME, acpi_device_bid(device), From 73111754b5260ce3b8cebcb31b3fdcabfd5e9401 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 27 Oct 2023 19:36:51 +0100 Subject: [PATCH 0590/1397] ACPI: processor_idle: use raw_safe_halt() in acpi_idle_play_dead() commit 9bb69ba4c177dccaa1f5b5cbdf80b67813328348 upstream. Xen HVM guests were observed taking triple-faults when attempting to online a previously offlined vCPU. Investigation showed that the fault was coming from a failing call to lockdep_assert_irqs_disabled(), in load_current_idt() which was too early in the CPU bringup to actually catch the exception and report the failure cleanly. This was a false positive, caused by acpi_idle_play_dead() setting the per-cpu hardirqs_enabled flag by calling safe_halt(). Switch it to use raw_safe_halt() instead, which doesn't do so. Signed-off-by: David Woodhouse Acked-by: Peter Zijlstra (Intel) Cc: 6.6+ # 6.6+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/processor_idle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 3a34a8c425fe..55437f5e0c3a 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -592,7 +592,7 @@ static int acpi_idle_play_dead(struct cpuidle_device *dev, int index) while (1) { if (cx->entry_method == ACPI_CSTATE_HALT) - safe_halt(); + raw_safe_halt(); else if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) { io_idle(cx->address); } else From a0bb55dbedbe7ab284990ba4d6a2f59a56c99a4f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 15 Nov 2023 19:02:22 +0100 Subject: [PATCH 0591/1397] ACPI: resource: Skip IRQ override on ASUS ExpertBook B1402CVA commit bd911485294a6f0596e4592ed442438015cffc8a upstream. Like various other ASUS ExpertBook-s, the ASUS ExpertBook B1402CVA has an ACPI DSDT table that describes IRQ 1 as ActiveLow while the kernel overrides it to EdgeHigh. This prevents the keyboard from working. To fix this issue, add this laptop to the skip_override_table so that the kernel does not override IRQ 1. Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218114 Cc: All applicable Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/resource.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 80fbd385e8b4..d09e3e7bcb58 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -446,6 +446,13 @@ static const struct dmi_system_id asus_laptop[] = { DMI_MATCH(DMI_BOARD_NAME, "B1402CBA"), }, }, + { + /* Asus ExpertBook B1402CVA */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_BOARD_NAME, "B1402CVA"), + }, + }, { .ident = "Asus ExpertBook B1502CBA", .matches = { From 9b9b04eb713c461ae46c11c3b396fa436a491e9a Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 12 Nov 2023 21:36:26 +0100 Subject: [PATCH 0592/1397] ACPI: PM: Add acpi_device_fix_up_power_children() function commit 37ba91a82e3b9de35f64348c62b5ec7d74e3a41c upstream. In some cases it is necessary to fix-up the power-state of an ACPI device's children without touching the ACPI device itself add a new acpi_device_fix_up_power_children() function for this. Signed-off-by: Hans de Goede Cc: 6.6+ # 6.6+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/device_pm.c | 13 +++++++++++++ include/acpi/acpi_bus.h | 1 + 2 files changed, 14 insertions(+) diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index f007116a8427..3b4d048c4941 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -397,6 +397,19 @@ void acpi_device_fix_up_power_extended(struct acpi_device *adev) } EXPORT_SYMBOL_GPL(acpi_device_fix_up_power_extended); +/** + * acpi_device_fix_up_power_children - Force a device's children into D0. + * @adev: Parent device object whose children's power state is to be fixed up. + * + * Call acpi_device_fix_up_power() for @adev's children so long as they + * are reported as present and enabled. + */ +void acpi_device_fix_up_power_children(struct acpi_device *adev) +{ + acpi_dev_for_each_child(adev, fix_up_power_if_applicable, NULL); +} +EXPORT_SYMBOL_GPL(acpi_device_fix_up_power_children); + int acpi_device_update_power(struct acpi_device *device, int *state_p) { int state; diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 254685085c82..0b7eab0ef7d7 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -539,6 +539,7 @@ int acpi_device_set_power(struct acpi_device *device, int state); int acpi_bus_init_power(struct acpi_device *device); int acpi_device_fix_up_power(struct acpi_device *device); void acpi_device_fix_up_power_extended(struct acpi_device *adev); +void acpi_device_fix_up_power_children(struct acpi_device *adev); int acpi_bus_update_power(acpi_handle handle, int *state_p); int acpi_device_update_power(struct acpi_device *device, int *state_p); bool acpi_bus_power_manageable(acpi_handle handle); From fe5b55bcfe242fc4a02770a3f79ac76e28334271 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 6 Nov 2023 18:12:30 +0100 Subject: [PATCH 0593/1397] swiotlb-xen: provide the "max_mapping_size" method MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit bff2a2d453a1b683378b4508b86b84389f551a00 upstream. There's a bug that when using the XEN hypervisor with bios with large multi-page bio vectors on NVMe, the kernel deadlocks [1]. The deadlocks are caused by inability to map a large bio vector - dma_map_sgtable always returns an error, this gets propagated to the block layer as BLK_STS_RESOURCE and the block layer retries the request indefinitely. XEN uses the swiotlb framework to map discontiguous pages into contiguous runs that are submitted to the PCIe device. The swiotlb framework has a limitation on the length of a mapping - this needs to be announced with the max_mapping_size method to make sure that the hardware drivers do not create larger mappings. Without max_mapping_size, the NVMe block driver would create large mappings that overrun the maximum mapping size. Reported-by: Marek Marczykowski-Górecki Link: https://lore.kernel.org/stable/ZTNH0qtmint%2FzLJZ@mail-itl/ [1] Tested-by: Marek Marczykowski-Górecki Suggested-by: Christoph Hellwig Cc: stable@vger.kernel.org Signed-off-by: Keith Busch Signed-off-by: Mikulas Patocka Acked-by: Stefano Stabellini Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/151bef41-e817-aea9-675-a35fdac4ed@redhat.com Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- drivers/xen/swiotlb-xen.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 946bd56f0ac5..0e6c6c25d154 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -405,4 +405,5 @@ const struct dma_map_ops xen_swiotlb_dma_ops = { .get_sgtable = dma_common_get_sgtable, .alloc_pages = dma_common_alloc_pages, .free_pages = dma_common_free_pages, + .max_mapping_size = swiotlb_max_mapping_size, }; From 2214e2bb5489145aba944874d0ee1652a0a63dc8 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 22 Nov 2023 22:44:47 +0100 Subject: [PATCH 0594/1397] tls: fix NULL deref on tls_sw_splice_eof() with empty record commit 53f2cb491b500897a619ff6abd72f565933760f0 upstream. syzkaller discovered that if tls_sw_splice_eof() is executed as part of sendfile() when the plaintext/ciphertext sk_msg are empty, the send path gets confused because the empty ciphertext buffer does not have enough space for the encryption overhead. This causes tls_push_record() to go on the `split = true` path (which is only supposed to be used when interacting with an attached BPF program), and then get further confused and hit the tls_merge_open_record() path, which then assumes that there must be at least one populated buffer element, leading to a NULL deref. It is possible to have empty plaintext/ciphertext buffers if we previously bailed from tls_sw_sendmsg_locked() via the tls_trim_both_msgs() path. tls_sw_push_pending_record() already handles this case correctly; let's do the same check in tls_sw_splice_eof(). Fixes: df720d288dbb ("tls/sw: Use splice_eof() to flush") Cc: stable@vger.kernel.org Reported-by: syzbot+40d43509a099ea756317@syzkaller.appspotmail.com Signed-off-by: Jann Horn Link: https://lore.kernel.org/r/20231122214447.675768-1-jannh@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/tls/tls_sw.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 9634dfd636fd..779815b885e9 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1232,11 +1232,14 @@ void tls_sw_splice_eof(struct socket *sock) lock_sock(sk); retry: + /* same checks as in tls_sw_push_pending_record() */ rec = ctx->open_rec; if (!rec) goto unlock; msg_pl = &rec->msg_plaintext; + if (msg_pl->sg.size == 0) + goto unlock; /* Check the BPF advisor and perform transmission. */ ret = bpf_exec_tx_verdict(msg_pl, sk, false, TLS_RECORD_TYPE_DATA, From 9ab2b084737963812371d071fc83e886e3525147 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 20 Nov 2023 14:18:31 -0800 Subject: [PATCH 0595/1397] io_uring: fix off-by one bvec index commit d6fef34ee4d102be448146f24caf96d7b4a05401 upstream. If the offset equals the bv_len of the first registered bvec, then the request does not include any of that first bvec. Skip it so that drivers don't have to deal with a zero length bvec, which was observed to break NVMe's PRP list creation. Cc: stable@vger.kernel.org Fixes: bd11b3a391e3 ("io_uring: don't use iov_iter_advance() for fixed buffers") Signed-off-by: Keith Busch Link: https://lore.kernel.org/r/20231120221831.2646460-1-kbusch@meta.com Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/rsrc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index d9c853d10587..dde501abd719 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -1261,7 +1261,7 @@ int io_import_fixed(int ddir, struct iov_iter *iter, */ const struct bio_vec *bvec = imu->bvec; - if (offset <= bvec->bv_len) { + if (offset < bvec->bv_len) { /* * Note, huge pages buffers consists of one large * bvec entry and should always go this way. The other From 7337e993b535d3c982506d0242a20270e28c06aa Mon Sep 17 00:00:00 2001 From: Coly Li Date: Mon, 20 Nov 2023 13:25:01 +0800 Subject: [PATCH 0596/1397] bcache: replace a mistaken IS_ERR() by IS_ERR_OR_NULL() in btree_gc_coalesce() commit f72f4312d4388376fc8a1f6cf37cb21a0d41758b upstream. Commit 028ddcac477b ("bcache: Remove unnecessary NULL point check in node allocations") do the following change inside btree_gc_coalesce(), 31 @@ -1340,7 +1340,7 @@ static int btree_gc_coalesce( 32 memset(new_nodes, 0, sizeof(new_nodes)); 33 closure_init_stack(&cl); 34 35 - while (nodes < GC_MERGE_NODES && !IS_ERR_OR_NULL(r[nodes].b)) 36 + while (nodes < GC_MERGE_NODES && !IS_ERR(r[nodes].b)) 37 keys += r[nodes++].keys; 38 39 blocks = btree_default_blocks(b->c) * 2 / 3; At line 35 the original r[nodes].b is not always allocatored from __bch_btree_node_alloc(), and possibly initialized as NULL pointer by caller of btree_gc_coalesce(). Therefore the change at line 36 is not correct. This patch replaces the mistaken IS_ERR() by IS_ERR_OR_NULL() to avoid potential issue. Fixes: 028ddcac477b ("bcache: Remove unnecessary NULL point check in node allocations") Cc: # 6.5+ Cc: Zheng Wang Signed-off-by: Coly Li Link: https://lore.kernel.org/r/20231120052503.6122-9-colyli@suse.de Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/btree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index fd121a61f17c..e922c6aae41b 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1363,7 +1363,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, memset(new_nodes, 0, sizeof(new_nodes)); closure_init_stack(&cl); - while (nodes < GC_MERGE_NODES && !IS_ERR(r[nodes].b)) + while (nodes < GC_MERGE_NODES && !IS_ERR_OR_NULL(r[nodes].b)) keys += r[nodes++].keys; blocks = btree_default_blocks(b->c) * 2 / 3; From 2c975b0b8b11f1ffb1ed538609e2c89d8abf800e Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 17 Nov 2023 15:56:30 -0800 Subject: [PATCH 0597/1397] md: fix bi_status reporting in md_end_clone_io commit 45b478951b2ba5aea70b2850c49c1aa83aedd0d2 upstream. md_end_clone_io() may overwrite error status in orig_bio->bi_status with BLK_STS_OK. This could happen when orig_bio has BIO_CHAIN (split by md_submit_bio => bio_split_to_limits, for example). As a result, upper layer may miss error reported from md (or the device) and consider the failed IO was successful. Fix this by only update orig_bio->bi_status when current bio reports error and orig_bio is BLK_STS_OK. This is the same behavior as __bio_chain_endio(). Fixes: 10764815ff47 ("md: add io accounting for raid0 and raid5") Cc: stable@vger.kernel.org # v5.14+ Reported-by: Bhanu Victor DiCara <00bvd0+linux@gmail.com> Closes: https://lore.kernel.org/regressions/5727380.DvuYhMxLoT@bvd0/ Signed-off-by: Song Liu Tested-by: Xiao Ni Reviewed-by: Yu Kuai Acked-by: Guoqing Jiang Signed-off-by: Greg Kroah-Hartman --- drivers/md/md.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 9247e55c7eaf..2748b0b424cf 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -8669,7 +8669,8 @@ static void md_end_clone_io(struct bio *bio) struct bio *orig_bio = md_io_clone->orig_bio; struct mddev *mddev = md_io_clone->mddev; - orig_bio->bi_status = bio->bi_status; + if (bio->bi_status && !orig_bio->bi_status) + orig_bio->bi_status = bio->bi_status; if (md_io_clone->start_time) bio_end_io_acct(orig_bio, md_io_clone->start_time); From d1280fd4114a5f79d34b934ca5430f8b7aa9ab7a Mon Sep 17 00:00:00 2001 From: Mingzhe Zou Date: Mon, 20 Nov 2023 13:25:00 +0800 Subject: [PATCH 0598/1397] bcache: fixup multi-threaded bch_sectors_dirty_init() wake-up race commit 2faac25d7958c4761bb8cec54adb79f806783ad6 upstream. We get a kernel crash about "unable to handle kernel paging request": ```dmesg [368033.032005] BUG: unable to handle kernel paging request at ffffffffad9ae4b5 [368033.032007] PGD fc3a0d067 P4D fc3a0d067 PUD fc3a0e063 PMD 8000000fc38000e1 [368033.032012] Oops: 0003 [#1] SMP PTI [368033.032015] CPU: 23 PID: 55090 Comm: bch_dirtcnt[0] Kdump: loaded Tainted: G OE --------- - - 4.18.0-147.5.1.es8_24.x86_64 #1 [368033.032017] Hardware name: Tsinghua Tongfang THTF Chaoqiang Server/072T6D, BIOS 2.4.3 01/17/2017 [368033.032027] RIP: 0010:native_queued_spin_lock_slowpath+0x183/0x1d0 [368033.032029] Code: 8b 02 48 85 c0 74 f6 48 89 c1 eb d0 c1 e9 12 83 e0 03 83 e9 01 48 c1 e0 05 48 63 c9 48 05 c0 3d 02 00 48 03 04 cd 60 68 93 ad <48> 89 10 8b 42 08 85 c0 75 09 f3 90 8b 42 08 85 c0 74 f7 48 8b 02 [368033.032031] RSP: 0018:ffffbb48852abe00 EFLAGS: 00010082 [368033.032032] RAX: ffffffffad9ae4b5 RBX: 0000000000000246 RCX: 0000000000003bf3 [368033.032033] RDX: ffff97b0ff8e3dc0 RSI: 0000000000600000 RDI: ffffbb4884743c68 [368033.032034] RBP: 0000000000000001 R08: 0000000000000000 R09: 000007ffffffffff [368033.032035] R10: ffffbb486bb01000 R11: 0000000000000001 R12: ffffffffc068da70 [368033.032036] R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000000000 [368033.032038] FS: 0000000000000000(0000) GS:ffff97b0ff8c0000(0000) knlGS:0000000000000000 [368033.032039] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [368033.032040] CR2: ffffffffad9ae4b5 CR3: 0000000fc3a0a002 CR4: 00000000003626e0 [368033.032042] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [368033.032043] bcache: bch_cached_dev_attach() Caching rbd479 as bcache462 on set 8cff3c36-4a76-4242-afaa-7630206bc70b [368033.032045] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [368033.032046] Call Trace: [368033.032054] _raw_spin_lock_irqsave+0x32/0x40 [368033.032061] __wake_up_common_lock+0x63/0xc0 [368033.032073] ? bch_ptr_invalid+0x10/0x10 [bcache] [368033.033502] bch_dirty_init_thread+0x14c/0x160 [bcache] [368033.033511] ? read_dirty_submit+0x60/0x60 [bcache] [368033.033516] kthread+0x112/0x130 [368033.033520] ? kthread_flush_work_fn+0x10/0x10 [368033.034505] ret_from_fork+0x35/0x40 ``` The crash occurred when call wake_up(&state->wait), and then we want to look at the value in the state. However, bch_sectors_dirty_init() is not found in the stack of any task. Since state is allocated on the stack, we guess that bch_sectors_dirty_init() has exited, causing bch_dirty_init_thread() to be unable to handle kernel paging request. In order to verify this idea, we added some printing information during wake_up(&state->wait). We find that "wake up" is printed twice, however we only expect the last thread to wake up once. ```dmesg [ 994.641004] alcache: bch_dirty_init_thread() wake up [ 994.641018] alcache: bch_dirty_init_thread() wake up [ 994.641523] alcache: bch_sectors_dirty_init() init exit ``` There is a race. If bch_sectors_dirty_init() exits after the first wake up, the second wake up will trigger this bug("unable to handle kernel paging request"). Proceed as follows: bch_sectors_dirty_init kthread_run ==============> bch_dirty_init_thread(bch_dirtcnt[0]) ... ... atomic_inc(&state.started) ... ... ... atomic_read(&state.enough) ... ... atomic_set(&state->enough, 1) kthread_run ======================================================> bch_dirty_init_thread(bch_dirtcnt[1]) ... atomic_dec_and_test(&state->started) ... atomic_inc(&state.started) ... ... ... wake_up(&state->wait) ... atomic_read(&state.enough) atomic_dec_and_test(&state->started) ... ... wait_event(state.wait, atomic_read(&state.started) == 0) ... return ... wake_up(&state->wait) We believe it is very common to wake up twice if there is no dirty, but crash is an extremely low probability event. It's hard for us to reproduce this issue. We attached and detached continuously for a week, with a total of more than one million attaches and only one crash. Putting atomic_inc(&state.started) before kthread_run() can avoid waking up twice. Fixes: b144e45fc576 ("bcache: make bch_sectors_dirty_init() to be multithreaded") Signed-off-by: Mingzhe Zou Cc: Signed-off-by: Coly Li Link: https://lore.kernel.org/r/20231120052503.6122-8-colyli@suse.de Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/writeback.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 24c049067f61..a6ddd0bb9220 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -1014,17 +1014,18 @@ void bch_sectors_dirty_init(struct bcache_device *d) if (atomic_read(&state.enough)) break; + atomic_inc(&state.started); state.infos[i].state = &state; state.infos[i].thread = kthread_run(bch_dirty_init_thread, &state.infos[i], "bch_dirtcnt[%d]", i); if (IS_ERR(state.infos[i].thread)) { pr_err("fails to run thread bch_dirty_init[%d]\n", i); + atomic_dec(&state.started); for (--i; i >= 0; i--) kthread_stop(state.infos[i].thread); goto out; } - atomic_inc(&state.started); } out: From ed869aee2adcf2b0ea38bf52f9660f0f5b7e9faf Mon Sep 17 00:00:00 2001 From: Charles Mirabile Date: Mon, 20 Nov 2023 05:55:45 -0500 Subject: [PATCH 0599/1397] io_uring/fs: consider link->flags when getting path for LINKAT commit 8479063f1fbee201a8739130e816cc331b675838 upstream. In order for `AT_EMPTY_PATH` to work as expected, the fact that the user wants that behavior needs to make it to `getname_flags` or it will return ENOENT. Fixes: cf30da90bc3a ("io_uring: add support for IORING_OP_LINKAT") Cc: Link: https://github.com/axboe/liburing/issues/995 Signed-off-by: Charles Mirabile Link: https://lore.kernel.org/r/20231120105545.1209530-1-cmirabil@redhat.com Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/fs.c b/io_uring/fs.c index 08e3b175469c..eccea851dd5a 100644 --- a/io_uring/fs.c +++ b/io_uring/fs.c @@ -254,7 +254,7 @@ int io_linkat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) newf = u64_to_user_ptr(READ_ONCE(sqe->addr2)); lnk->flags = READ_ONCE(sqe->hardlink_flags); - lnk->oldpath = getname(oldf); + lnk->oldpath = getname_uflags(oldf, lnk->flags); if (IS_ERR(lnk->oldpath)) return PTR_ERR(lnk->oldpath); From 9372aab5d0ff621ea203c8c603e7e5f75e888240 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=C3=B6ppner?= Date: Wed, 25 Oct 2023 15:24:37 +0200 Subject: [PATCH 0600/1397] s390/dasd: protect device queue against concurrent access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit db46cd1e0426f52999d50fa72cfa97fa39952885 upstream. In dasd_profile_start() the amount of requests on the device queue are counted. The access to the device queue is unprotected against concurrent access. With a lot of parallel I/O, especially with alias devices enabled, the device queue can change while dasd_profile_start() is accessing the queue. In the worst case this leads to a kernel panic due to incorrect pointer accesses. Fix this by taking the device lock before accessing the queue and counting the requests. Additionally the check for a valid profile data pointer can be done earlier to avoid unnecessary locking in a hot path. Cc: Fixes: 4fa52aa7a82f ("[S390] dasd: add enhanced DASD statistics interface") Reviewed-by: Stefan Haberland Signed-off-by: Jan Höppner Signed-off-by: Stefan Haberland Link: https://lore.kernel.org/r/20231025132437.1223363-3-sth@linux.ibm.com Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/s390/block/dasd.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 215597f73be4..5b11ee923457 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -674,18 +674,20 @@ static void dasd_profile_start(struct dasd_block *block, * we count each request only once. */ device = cqr->startdev; - if (device->profile.data) { - counter = 1; /* request is not yet queued on the start device */ - list_for_each(l, &device->ccw_queue) - if (++counter >= 31) - break; - } + if (!device->profile.data) + return; + + spin_lock(get_ccwdev_lock(device->cdev)); + counter = 1; /* request is not yet queued on the start device */ + list_for_each(l, &device->ccw_queue) + if (++counter >= 31) + break; + spin_unlock(get_ccwdev_lock(device->cdev)); + spin_lock(&device->profile.lock); - if (device->profile.data) { - device->profile.data->dasd_io_nr_req[counter]++; - if (rq_data_dir(req) == READ) - device->profile.data->dasd_read_nr_req[counter]++; - } + device->profile.data->dasd_io_nr_req[counter]++; + if (rq_data_dir(req) == READ) + device->profile.data->dasd_read_nr_req[counter]++; spin_unlock(&device->profile.lock); } From a9dc6f2e27baf909a61c428b7d03fd1312703230 Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Mon, 13 Nov 2023 12:07:37 -0800 Subject: [PATCH 0601/1397] platform/x86: hp-bioscfg: Simplify return check in hp_add_other_attributes() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c5dbf04160005e07e8ca7232a7faa77ab1547ae0 upstream. All cases in switch-case have a same goto on error, move the return check out of the switch. This is a cleanup. Signed-off-by: Harshit Mogalapalli Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20231113200742.3593548-1-harshit.m.mogalapalli@oracle.com Signed-off-by: Ilpo Järvinen Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/hp/hp-bioscfg/bioscfg.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c b/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c index 5798b49ddaba..10676e1abc28 100644 --- a/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c +++ b/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c @@ -630,21 +630,19 @@ static int hp_add_other_attributes(int attr_type) switch (attr_type) { case HPWMI_SECURE_PLATFORM_TYPE: ret = hp_populate_secure_platform_data(attr_name_kobj); - if (ret) - goto err_other_attr_init; break; case HPWMI_SURE_START_TYPE: ret = hp_populate_sure_start_data(attr_name_kobj); - if (ret) - goto err_other_attr_init; break; default: ret = -EINVAL; - goto err_other_attr_init; } + if (ret) + goto err_other_attr_init; + mutex_unlock(&bioscfg_drv.mutex); return 0; From e73ebb93558bb2028722adc23496f39c44cdd7f8 Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Mon, 13 Nov 2023 12:07:38 -0800 Subject: [PATCH 0602/1397] platform/x86: hp-bioscfg: move mutex_lock() down in hp_add_other_attributes() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5736aa9537c9b8927dec32d3d47c8c31fe560f62 upstream. attr_name_kobj's memory allocation is done with mutex_lock() held, this is not needed. Move allocation outside of mutex_lock() so unlock is not needed when allocation fails. Suggested-by: Ilpo Järvinen Signed-off-by: Harshit Mogalapalli Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20231113200742.3593548-2-harshit.m.mogalapalli@oracle.com Signed-off-by: Ilpo Järvinen Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/hp/hp-bioscfg/bioscfg.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c b/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c index 10676e1abc28..a3599498c4e8 100644 --- a/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c +++ b/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c @@ -592,13 +592,11 @@ static int hp_add_other_attributes(int attr_type) int ret; char *attr_name; - mutex_lock(&bioscfg_drv.mutex); - attr_name_kobj = kzalloc(sizeof(*attr_name_kobj), GFP_KERNEL); - if (!attr_name_kobj) { - ret = -ENOMEM; - goto err_other_attr_init; - } + if (!attr_name_kobj) + return -ENOMEM; + + mutex_lock(&bioscfg_drv.mutex); /* Check if attribute type is supported */ switch (attr_type) { From 9a98ab01e3acba830cb0917296a13192fd23f305 Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Mon, 13 Nov 2023 12:07:39 -0800 Subject: [PATCH 0603/1397] platform/x86: hp-bioscfg: Fix error handling in hp_add_other_attributes() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f40f939917b2b4cbf18450096c0ce1c58ed59fae upstream. 'attr_name_kobj' is allocated using kzalloc, but on all the error paths it is not freed, hence we have a memory leak. Fix the error path before kobject_init_and_add() by adding kfree(). kobject_put() must be always called after passing the object to kobject_init_and_add(). Only the error path which is immediately next to kobject_init_and_add() calls kobject_put() and not any other error path after it. Fix the error handling after kobject_init_and_add() by moving the kobject_put() into the goto label err_other_attr_init that is already used by all the error paths after kobject_init_and_add(). Fixes: a34fc329b189 ("platform/x86: hp-bioscfg: bioscfg") Cc: stable@vger.kernel.org # 6.6.x: c5dbf0416000: platform/x86: hp-bioscfg: Simplify return check in hp_add_other_attributes() Cc: stable@vger.kernel.org # 6.6.x: 5736aa9537c9: platform/x86: hp-bioscfg: move mutex_lock() down in hp_add_other_attributes() Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202309201412.on0VXJGo-lkp@intel.com/ Signed-off-by: Harshit Mogalapalli [ij: Added the stable dep tags] Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20231113200742.3593548-3-harshit.m.mogalapalli@oracle.com Signed-off-by: Ilpo Järvinen Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/hp/hp-bioscfg/bioscfg.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c b/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c index a3599498c4e8..6ddca857cc4d 100644 --- a/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c +++ b/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c @@ -613,14 +613,14 @@ static int hp_add_other_attributes(int attr_type) default: pr_err("Error: Unknown attr_type: %d\n", attr_type); ret = -EINVAL; - goto err_other_attr_init; + kfree(attr_name_kobj); + goto unlock_drv_mutex; } ret = kobject_init_and_add(attr_name_kobj, &attr_name_ktype, NULL, "%s", attr_name); if (ret) { pr_err("Error encountered [%d]\n", ret); - kobject_put(attr_name_kobj); goto err_other_attr_init; } @@ -645,6 +645,8 @@ static int hp_add_other_attributes(int attr_type) return 0; err_other_attr_init: + kobject_put(attr_name_kobj); +unlock_drv_mutex: mutex_unlock(&bioscfg_drv.mutex); kfree(obj); return ret; From a367031ab27461ae6cb594fe108a39d7ae54bfe2 Mon Sep 17 00:00:00 2001 From: Stefan Eichenberger Date: Mon, 13 Nov 2023 15:59:20 +0100 Subject: [PATCH 0604/1397] dt-bindings: usb: microchip,usb5744: Add second supply commit d0c930b745cafde8e7d25d0356c648bca669556a upstream. The USB5744 has two power supplies one for 3V3 and one for 1V2. Add the second supply to the USB5744 DT binding. Signed-off-by: Stefan Eichenberger Signed-off-by: Francesco Dolcini Acked-by: Conor Dooley Cc: stable Link: https://lore.kernel.org/r/20231113145921.30104-2-francesco@dolcini.it Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/usb/microchip,usb5744.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/usb/microchip,usb5744.yaml b/Documentation/devicetree/bindings/usb/microchip,usb5744.yaml index ff3a1707ef57..6d4cfd943f58 100644 --- a/Documentation/devicetree/bindings/usb/microchip,usb5744.yaml +++ b/Documentation/devicetree/bindings/usb/microchip,usb5744.yaml @@ -36,7 +36,11 @@ properties: vdd-supply: description: - VDD power supply to the hub + 3V3 power supply to the hub + + vdd2-supply: + description: + 1V2 power supply to the hub peer-hub: $ref: /schemas/types.yaml#/definitions/phandle @@ -62,6 +66,7 @@ allOf: properties: reset-gpios: false vdd-supply: false + vdd2-supply: false peer-hub: false i2c-bus: false else: From 7b1e1c2cf6ded159d69f131e90b185bbc47fcd9a Mon Sep 17 00:00:00 2001 From: Stefan Eichenberger Date: Mon, 13 Nov 2023 15:59:21 +0100 Subject: [PATCH 0605/1397] usb: misc: onboard-hub: add support for Microchip USB5744 commit 6972b38ca05235f6142715db7062ecc87a422e22 upstream. Add support for the Microchip USB5744 USB3.0 and USB2.0 Hub. The Microchip USB5744 supports two power supplies, one for 1V2 and one for 3V3. According to the datasheet there is no need for a delay between power on and reset, so this value is set to 0. Signed-off-by: Stefan Eichenberger Signed-off-by: Francesco Dolcini Cc: stable Acked-by: Matthias Kaehlcke Link: https://lore.kernel.org/r/20231113145921.30104-3-francesco@dolcini.it Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/onboard_usb_hub.c | 2 ++ drivers/usb/misc/onboard_usb_hub.h | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/usb/misc/onboard_usb_hub.c b/drivers/usb/misc/onboard_usb_hub.c index 57bbe1309094..d72130eda57d 100644 --- a/drivers/usb/misc/onboard_usb_hub.c +++ b/drivers/usb/misc/onboard_usb_hub.c @@ -437,6 +437,8 @@ static const struct usb_device_id onboard_hub_id_table[] = { { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2412) }, /* USB2412 USB 2.0 */ { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2514) }, /* USB2514B USB 2.0 */ { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2517) }, /* USB2517 USB 2.0 */ + { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2744) }, /* USB5744 USB 2.0 */ + { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x5744) }, /* USB5744 USB 3.0 */ { USB_DEVICE(VENDOR_ID_REALTEK, 0x0411) }, /* RTS5411 USB 3.1 */ { USB_DEVICE(VENDOR_ID_REALTEK, 0x5411) }, /* RTS5411 USB 2.1 */ { USB_DEVICE(VENDOR_ID_REALTEK, 0x0414) }, /* RTS5414 USB 3.2 */ diff --git a/drivers/usb/misc/onboard_usb_hub.h b/drivers/usb/misc/onboard_usb_hub.h index 2a4ab5ac0ebe..8af34e6d1aff 100644 --- a/drivers/usb/misc/onboard_usb_hub.h +++ b/drivers/usb/misc/onboard_usb_hub.h @@ -16,6 +16,11 @@ static const struct onboard_hub_pdata microchip_usb424_data = { .num_supplies = 1, }; +static const struct onboard_hub_pdata microchip_usb5744_data = { + .reset_us = 0, + .num_supplies = 2, +}; + static const struct onboard_hub_pdata realtek_rts5411_data = { .reset_us = 0, .num_supplies = 1, @@ -50,6 +55,8 @@ static const struct of_device_id onboard_hub_match[] = { { .compatible = "usb424,2412", .data = µchip_usb424_data, }, { .compatible = "usb424,2514", .data = µchip_usb424_data, }, { .compatible = "usb424,2517", .data = µchip_usb424_data, }, + { .compatible = "usb424,2744", .data = µchip_usb5744_data, }, + { .compatible = "usb424,5744", .data = µchip_usb5744_data, }, { .compatible = "usb451,8140", .data = &ti_tusb8041_data, }, { .compatible = "usb451,8142", .data = &ti_tusb8041_data, }, { .compatible = "usb4b4,6504", .data = &cypress_hx3_data, }, From 2beaa03e15ff24994acd318e8b0c72399d88680b Mon Sep 17 00:00:00 2001 From: Asuna Yang Date: Wed, 22 Nov 2023 22:18:03 +0800 Subject: [PATCH 0606/1397] USB: serial: option: add Luat Air72*U series products commit da90e45d5afc4da2de7cd3ea7943d0f1baa47cc2 upstream. Update the USB serial option driver support for Luat Air72*U series products. ID 1782:4e00 Spreadtrum Communications Inc. UNISOC-8910 T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 13 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1782 ProdID=4e00 Rev=00.00 S: Manufacturer=UNISOC S: Product=UNISOC-8910 C: #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=400mA I: If#= 0 Alt= 0 #EPs= 1 Cls=e0(wlcon) Sub=01 Prot=03 Driver=rndis_host E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl=4096ms I: If#= 1 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=rndis_host E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms If#= 2: AT If#= 3: PPP + AT If#= 4: Debug Co-developed-by: Yangyu Chen Signed-off-by: Yangyu Chen Signed-off-by: Asuna Yang Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 45dcfaadaf98..b76cb9a096f7 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -609,6 +609,8 @@ static void option_instat_callback(struct urb *urb); #define UNISOC_VENDOR_ID 0x1782 /* TOZED LT70-C based on UNISOC SL8563 uses UNISOC's vendor ID */ #define TOZED_PRODUCT_LT70C 0x4055 +/* Luat Air72*U series based on UNISOC UIS8910 uses UNISOC's vendor ID */ +#define LUAT_PRODUCT_AIR720U 0x4e00 /* Device flags */ @@ -2271,6 +2273,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, TOZED_PRODUCT_LT70C, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, LUAT_PRODUCT_AIR720U, 0xff, 0, 0) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, option_ids); From 1cceb4820f0523888082173b0ff60e16127b5d73 Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Thu, 16 Nov 2023 22:31:21 +0530 Subject: [PATCH 0607/1397] platform/x86/amd/pmc: adjust getting DRAM size behavior MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c6ea14d557343cd3af6c6be2f5a78c98bdb281bb upstream. amd_pmc_get_dram_size() is used to get the DRAM size information. But in the current code, mailbox command to get the DRAM size info is sent based on the values of dev->major and dev->minor. But dev->major and dev->minor will have either junk or zero assigned to them until at least once a call to amd_pmc_get_smu_version() is made which ideally populates dev->major and dev->minor. However, adding a amd_pmc_get_smu_version() call to amd_pmc_get_dram_size() has a downside of elevating the boot times. After talking to the PMFW team, it's understood that the "get dram size" mbox command would only be supported on specific platforms (like Mendocino) and not all. So, adjust getting DRAM size behavior such that, - if running on Rembrandt or Mendocino and the underlying PMFW knows how to execute the "get dram size" command it shall give the custom dram size. - if the underlying FW does not report the dram size, we just proceed further and assign the default dram size. The simplest way to address this is to remove amd_pmc_get_dram_size() function and directly call the "get dram size" command in the amd_pmc_s2d_init(). Reported-by: Mark Hasemeyer Fixes: be8325fb3d8c ("platform/x86/amd: pmc: Get STB DRAM size from PMFW") Cc: stable@vger.kernel.org Suggested-by: Sanket Goswami Signed-off-by: Shyam Sundar S K Link: https://lore.kernel.org/r/20231116170121.3372222-1-Shyam-sundar.S-k@amd.com Reviewed-by: Mario Limonciello Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/amd/pmc/pmc.c | 31 ++---------------------------- 1 file changed, 2 insertions(+), 29 deletions(-) diff --git a/drivers/platform/x86/amd/pmc/pmc.c b/drivers/platform/x86/amd/pmc/pmc.c index c1e788b67a74..212f164bc3db 100644 --- a/drivers/platform/x86/amd/pmc/pmc.c +++ b/drivers/platform/x86/amd/pmc/pmc.c @@ -912,33 +912,6 @@ static const struct pci_device_id pmc_pci_ids[] = { { } }; -static int amd_pmc_get_dram_size(struct amd_pmc_dev *dev) -{ - int ret; - - switch (dev->cpu_id) { - case AMD_CPU_ID_YC: - if (!(dev->major > 90 || (dev->major == 90 && dev->minor > 39))) { - ret = -EINVAL; - goto err_dram_size; - } - break; - default: - ret = -EINVAL; - goto err_dram_size; - } - - ret = amd_pmc_send_cmd(dev, S2D_DRAM_SIZE, &dev->dram_size, dev->s2d_msg_id, true); - if (ret || !dev->dram_size) - goto err_dram_size; - - return 0; - -err_dram_size: - dev_err(dev->dev, "DRAM size command not supported for this platform\n"); - return ret; -} - static int amd_pmc_s2d_init(struct amd_pmc_dev *dev) { u32 phys_addr_low, phys_addr_hi; @@ -957,8 +930,8 @@ static int amd_pmc_s2d_init(struct amd_pmc_dev *dev) return -EIO; /* Get DRAM size */ - ret = amd_pmc_get_dram_size(dev); - if (ret) + ret = amd_pmc_send_cmd(dev, S2D_DRAM_SIZE, &dev->dram_size, dev->s2d_msg_id, true); + if (ret || !dev->dram_size) dev->dram_size = S2D_TELEMETRY_DRAMBYTES_MAX; /* Get STB DRAM address */ From 77cc23ef4306356e96a21f52edc254d9eb7a2c44 Mon Sep 17 00:00:00 2001 From: Stuart Hayhurst Date: Tue, 14 Nov 2023 11:38:08 +0000 Subject: [PATCH 0608/1397] platform/x86: ideapad-laptop: Set max_brightness before using it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7a3c36eef9a5d13b16aa954da54224c9c6bed339 upstream. max_brightness is used in ideapad_kbd_bl_brightness_get() before it's set, causing ideapad_kbd_bl_brightness_get() to return -EINVAL sometimes. Fixes: ecaa1867b524 ("platform/x86: ideapad-laptop: Add support for keyboard backlights using KBLC ACPI symbol") Signed-off-by: Stuart Hayhurst Cc: stable@vger.kernel.org Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20231114114055.6220-2-stuart.a.hayhurst@gmail.com Signed-off-by: Ilpo Järvinen Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/ideapad-laptop.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index ac037540acfc..88eefccb6ed2 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -1425,18 +1425,17 @@ static int ideapad_kbd_bl_init(struct ideapad_private *priv) if (WARN_ON(priv->kbd_bl.initialized)) return -EEXIST; - brightness = ideapad_kbd_bl_brightness_get(priv); - if (brightness < 0) - return brightness; - - priv->kbd_bl.last_brightness = brightness; - if (ideapad_kbd_bl_check_tristate(priv->kbd_bl.type)) { priv->kbd_bl.led.max_brightness = 2; } else { priv->kbd_bl.led.max_brightness = 1; } + brightness = ideapad_kbd_bl_brightness_get(priv); + if (brightness < 0) + return brightness; + + priv->kbd_bl.last_brightness = brightness; priv->kbd_bl.led.name = "platform::" LED_FUNCTION_KBD_BACKLIGHT; priv->kbd_bl.led.brightness_get = ideapad_kbd_bl_led_cdev_brightness_get; priv->kbd_bl.led.brightness_set_blocking = ideapad_kbd_bl_led_cdev_brightness_set; From 6ebd69f721d4774e25b5f530ff833800542dd73c Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Sun, 19 Nov 2023 08:23:41 -0800 Subject: [PATCH 0609/1397] hv_netvsc: fix race of netvsc and VF register_netdevice commit d30fb712e52964f2cf9a9c14cf67078394044837 upstream. The rtnl lock also needs to be held before rndis_filter_device_add() which advertises nvsp_2_vsc_capability / sriov bit, and triggers VF NIC offering and registering. If VF NIC finished register_netdev() earlier it may cause name based config failure. To fix this issue, move the call to rtnl_lock() before rndis_filter_device_add(), so VF will be registered later than netvsc / synthetic NIC, and gets a name numbered (ethX) after netvsc. Cc: stable@vger.kernel.org Fixes: e04e7a7bbd4b ("hv_netvsc: Fix a deadlock by getting rtnl lock earlier in netvsc_probe()") Reported-by: Dexuan Cui Signed-off-by: Haiyang Zhang Reviewed-by: Wojciech Drewek Reviewed-by: Simon Horman Reviewed-by: Dexuan Cui Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- drivers/net/hyperv/netvsc_drv.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 3ba3c8fb28a5..5e528a76f5f5 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2531,15 +2531,6 @@ static int netvsc_probe(struct hv_device *dev, goto devinfo_failed; } - nvdev = rndis_filter_device_add(dev, device_info); - if (IS_ERR(nvdev)) { - ret = PTR_ERR(nvdev); - netdev_err(net, "unable to add netvsc device (ret %d)\n", ret); - goto rndis_failed; - } - - eth_hw_addr_set(net, device_info->mac_adr); - /* We must get rtnl lock before scheduling nvdev->subchan_work, * otherwise netvsc_subchan_work() can get rtnl lock first and wait * all subchannels to show up, but that may not happen because @@ -2547,9 +2538,23 @@ static int netvsc_probe(struct hv_device *dev, * -> ... -> device_add() -> ... -> __device_attach() can't get * the device lock, so all the subchannels can't be processed -- * finally netvsc_subchan_work() hangs forever. + * + * The rtnl lock also needs to be held before rndis_filter_device_add() + * which advertises nvsp_2_vsc_capability / sriov bit, and triggers + * VF NIC offering and registering. If VF NIC finished register_netdev() + * earlier it may cause name based config failure. */ rtnl_lock(); + nvdev = rndis_filter_device_add(dev, device_info); + if (IS_ERR(nvdev)) { + ret = PTR_ERR(nvdev); + netdev_err(net, "unable to add netvsc device (ret %d)\n", ret); + goto rndis_failed; + } + + eth_hw_addr_set(net, device_info->mac_adr); + if (nvdev->num_chn > 1) schedule_work(&nvdev->subchan_work); @@ -2586,9 +2591,9 @@ static int netvsc_probe(struct hv_device *dev, return 0; register_failed: - rtnl_unlock(); rndis_filter_device_remove(dev, nvdev); rndis_failed: + rtnl_unlock(); netvsc_devinfo_put(device_info); devinfo_failed: free_percpu(net_device_ctx->vf_stats); From 7350c460f7f48a8653a15c5c90fc9070aaa29535 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Sun, 19 Nov 2023 08:23:42 -0800 Subject: [PATCH 0610/1397] hv_netvsc: Fix race of register_netdevice_notifier and VF register commit 85520856466ed6bc3b1ccb013cddac70ceb437db upstream. If VF NIC is registered earlier, NETDEV_REGISTER event is replayed, but NETDEV_POST_INIT is not. Move register_netdevice_notifier() earlier, so the call back function is set before probing. Cc: stable@vger.kernel.org Fixes: e04e7a7bbd4b ("hv_netvsc: Fix a deadlock by getting rtnl lock earlier in netvsc_probe()") Reported-by: Dexuan Cui Signed-off-by: Haiyang Zhang Reviewed-by: Wojciech Drewek Reviewed-by: Dexuan Cui Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- drivers/net/hyperv/netvsc_drv.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 5e528a76f5f5..b7dfd51f09e6 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2793,12 +2793,17 @@ static int __init netvsc_drv_init(void) } netvsc_ring_bytes = ring_size * PAGE_SIZE; + register_netdevice_notifier(&netvsc_netdev_notifier); + ret = vmbus_driver_register(&netvsc_drv); if (ret) - return ret; + goto err_vmbus_reg; - register_netdevice_notifier(&netvsc_netdev_notifier); return 0; + +err_vmbus_reg: + unregister_netdevice_notifier(&netvsc_netdev_notifier); + return ret; } MODULE_LICENSE("GPL"); From 6a2b0b689b2b4fa8e001962da62df04f303ec1c3 Mon Sep 17 00:00:00 2001 From: Long Li Date: Sun, 19 Nov 2023 08:23:43 -0800 Subject: [PATCH 0611/1397] hv_netvsc: Mark VF as slave before exposing it to user-mode commit c807d6cd089d2f4951baa838081ec5ae3e2360f8 upstream. When a VF is being exposed form the kernel, it should be marked as "slave" before exposing to the user-mode. The VF is not usable without netvsc running as master. The user-mode should never see a VF without the "slave" flag. This commit moves the code of setting the slave flag to the time before VF is exposed to user-mode. Cc: stable@vger.kernel.org Fixes: 0c195567a8f6 ("netvsc: transparent VF management") Signed-off-by: Long Li Signed-off-by: Haiyang Zhang Acked-by: Stephen Hemminger Acked-by: Dexuan Cui Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- drivers/net/hyperv/netvsc_drv.c | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index b7dfd51f09e6..706ea5263e87 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2206,9 +2206,6 @@ static int netvsc_vf_join(struct net_device *vf_netdev, goto upper_link_failed; } - /* set slave flag before open to prevent IPv6 addrconf */ - vf_netdev->flags |= IFF_SLAVE; - schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT); call_netdevice_notifiers(NETDEV_JOIN, vf_netdev); @@ -2315,16 +2312,18 @@ static struct net_device *get_netvsc_byslot(const struct net_device *vf_netdev) } - /* Fallback path to check synthetic vf with - * help of mac addr + /* Fallback path to check synthetic vf with help of mac addr. + * Because this function can be called before vf_netdev is + * initialized (NETDEV_POST_INIT) when its perm_addr has not been copied + * from dev_addr, also try to match to its dev_addr. + * Note: On Hyper-V and Azure, it's not possible to set a MAC address + * on a VF that matches to the MAC of a unrelated NETVSC device. */ list_for_each_entry(ndev_ctx, &netvsc_dev_list, list) { ndev = hv_get_drvdata(ndev_ctx->device_ctx); - if (ether_addr_equal(vf_netdev->perm_addr, ndev->perm_addr)) { - netdev_notice(vf_netdev, - "falling back to mac addr based matching\n"); + if (ether_addr_equal(vf_netdev->perm_addr, ndev->perm_addr) || + ether_addr_equal(vf_netdev->dev_addr, ndev->perm_addr)) return ndev; - } } netdev_notice(vf_netdev, @@ -2332,6 +2331,19 @@ static struct net_device *get_netvsc_byslot(const struct net_device *vf_netdev) return NULL; } +static int netvsc_prepare_bonding(struct net_device *vf_netdev) +{ + struct net_device *ndev; + + ndev = get_netvsc_byslot(vf_netdev); + if (!ndev) + return NOTIFY_DONE; + + /* set slave flag before open to prevent IPv6 addrconf */ + vf_netdev->flags |= IFF_SLAVE; + return NOTIFY_DONE; +} + static int netvsc_register_vf(struct net_device *vf_netdev) { struct net_device_context *net_device_ctx; @@ -2758,6 +2770,8 @@ static int netvsc_netdev_event(struct notifier_block *this, return NOTIFY_DONE; switch (event) { + case NETDEV_POST_INIT: + return netvsc_prepare_bonding(event_dev); case NETDEV_REGISTER: return netvsc_register_vf(event_dev); case NETDEV_UNREGISTER: From abd8dd55ee4e3f5e2144eb3750c6c43c72b0277b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Nov 2023 12:06:54 +0100 Subject: [PATCH 0612/1397] Revert "usb: phy: add usb phy notify port status API" commit 1a229d8690a0f8951fc4aa8b76a7efab0d8de342 upstream. This reverts commit a08799cf17c22375752abfad3b4a2b34b3acb287. The recently added Realtek PHY drivers depend on the new port status notification mechanism which was built on the deprecated USB PHY implementation and devicetree binding. Specifically, using these PHYs would require describing the very same PHY using both the generic "phy" property and the deprecated "usb-phy" property which is clearly wrong. We should not be building new functionality on top of the legacy USB PHY implementation even if it is currently stuck in some kind of transitional limbo. Revert the new notification interface which is broken by design. Fixes: a08799cf17c2 ("usb: phy: add usb phy notify port status API") Cc: stable@vger.kernel.org # 6.6 Cc: Stanley Chang Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20231106110654.31090-4-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 23 ----------------------- include/linux/usb/phy.h | 13 ------------- 2 files changed, 36 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 0ff47eeffb49..dfc30cebd4c4 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -622,29 +622,6 @@ static int hub_ext_port_status(struct usb_hub *hub, int port1, int type, ret = 0; } mutex_unlock(&hub->status_mutex); - - /* - * There is no need to lock status_mutex here, because status_mutex - * protects hub->status, and the phy driver only checks the port - * status without changing the status. - */ - if (!ret) { - struct usb_device *hdev = hub->hdev; - - /* - * Only roothub will be notified of port state changes, - * since the USB PHY only cares about changes at the next - * level. - */ - if (is_root_hub(hdev)) { - struct usb_hcd *hcd = bus_to_hcd(hdev->bus); - - if (hcd->usb_phy) - usb_phy_notify_port_status(hcd->usb_phy, - port1 - 1, *status, *change); - } - } - return ret; } diff --git a/include/linux/usb/phy.h b/include/linux/usb/phy.h index b513749582d7..e4de6bc1f69b 100644 --- a/include/linux/usb/phy.h +++ b/include/linux/usb/phy.h @@ -144,10 +144,6 @@ struct usb_phy { */ int (*set_wakeup)(struct usb_phy *x, bool enabled); - /* notify phy port status change */ - int (*notify_port_status)(struct usb_phy *x, int port, - u16 portstatus, u16 portchange); - /* notify phy connect status change */ int (*notify_connect)(struct usb_phy *x, enum usb_device_speed speed); @@ -320,15 +316,6 @@ usb_phy_set_wakeup(struct usb_phy *x, bool enabled) return 0; } -static inline int -usb_phy_notify_port_status(struct usb_phy *x, int port, u16 portstatus, u16 portchange) -{ - if (x && x->notify_port_status) - return x->notify_port_status(x, port, portstatus, portchange); - else - return 0; -} - static inline int usb_phy_notify_connect(struct usb_phy *x, enum usb_device_speed speed) { From e27877990e54bfe4246dd850f7ec8646c999ce58 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Nov 2023 12:06:52 +0100 Subject: [PATCH 0613/1397] Revert "phy: realtek: usb: Add driver for the Realtek SoC USB 3.0 PHY" commit 258ea41c926b7b3a16d0d7aa210a1401c4a1601b upstream. This reverts commit adda6e82a7de7d6d478f6c8ef127f0ac51c510a1. The recently added Realtek PHY drivers depend on the new port status notification mechanism which was built on the deprecated USB PHY implementation and devicetree binding. Specifically, using these PHYs would require describing the very same PHY using both the generic "phy" property and the deprecated "usb-phy" property which is clearly wrong. We should not be building new functionality on top of the legacy USB PHY implementation even if it is currently stuck in some kind of transitional limbo. Revert the new Realtek PHY drivers for now so that the port status notification interface can be reverted and replaced. Fixes: adda6e82a7de ("phy: realtek: usb: Add driver for the Realtek SoC USB 3.0 PHY") Cc: stable@vger.kernel.org # 6.6 Cc: Stanley Chang Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20231106110654.31090-2-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/phy/realtek/Kconfig | 12 - drivers/phy/realtek/Makefile | 1 - drivers/phy/realtek/phy-rtk-usb3.c | 761 ----------------------------- 3 files changed, 774 deletions(-) delete mode 100644 drivers/phy/realtek/phy-rtk-usb3.c diff --git a/drivers/phy/realtek/Kconfig b/drivers/phy/realtek/Kconfig index 75ac7e7c31ae..745587751070 100644 --- a/drivers/phy/realtek/Kconfig +++ b/drivers/phy/realtek/Kconfig @@ -17,16 +17,4 @@ config PHY_RTK_RTD_USB2PHY DWC3 USB IP. This driver will do the PHY initialization of the parameters. -config PHY_RTK_RTD_USB3PHY - tristate "Realtek RTD USB3 PHY Transceiver Driver" - depends on USB_SUPPORT - select GENERIC_PHY - select USB_PHY - select USB_COMMON - help - Enable this to support Realtek SoC USB3 phy transceiver. - The DHC (digital home center) RTD series SoCs used the Synopsys - DWC3 USB IP. This driver will do the PHY initialization - of the parameters. - endif # ARCH_REALTEK || COMPILE_TEST diff --git a/drivers/phy/realtek/Makefile b/drivers/phy/realtek/Makefile index ed7b47ff8a26..cf5d440841a2 100644 --- a/drivers/phy/realtek/Makefile +++ b/drivers/phy/realtek/Makefile @@ -1,3 +1,2 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_PHY_RTK_RTD_USB2PHY) += phy-rtk-usb2.o -obj-$(CONFIG_PHY_RTK_RTD_USB3PHY) += phy-rtk-usb3.o diff --git a/drivers/phy/realtek/phy-rtk-usb3.c b/drivers/phy/realtek/phy-rtk-usb3.c deleted file mode 100644 index dfb3122f3f11..000000000000 --- a/drivers/phy/realtek/phy-rtk-usb3.c +++ /dev/null @@ -1,761 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * phy-rtk-usb3.c RTK usb3.0 phy driver - * - * copyright (c) 2023 realtek semiconductor corporation - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define USB_MDIO_CTRL_PHY_BUSY BIT(7) -#define USB_MDIO_CTRL_PHY_WRITE BIT(0) -#define USB_MDIO_CTRL_PHY_ADDR_SHIFT 8 -#define USB_MDIO_CTRL_PHY_DATA_SHIFT 16 - -#define MAX_USB_PHY_DATA_SIZE 0x30 -#define PHY_ADDR_0X09 0x09 -#define PHY_ADDR_0X0B 0x0b -#define PHY_ADDR_0X0D 0x0d -#define PHY_ADDR_0X10 0x10 -#define PHY_ADDR_0X1F 0x1f -#define PHY_ADDR_0X20 0x20 -#define PHY_ADDR_0X21 0x21 -#define PHY_ADDR_0X30 0x30 - -#define REG_0X09_FORCE_CALIBRATION BIT(9) -#define REG_0X0B_RX_OFFSET_RANGE_MASK 0xc -#define REG_0X0D_RX_DEBUG_TEST_EN BIT(6) -#define REG_0X10_DEBUG_MODE_SETTING 0x3c0 -#define REG_0X10_DEBUG_MODE_SETTING_MASK 0x3f8 -#define REG_0X1F_RX_OFFSET_CODE_MASK 0x1e - -#define USB_U3_TX_LFPS_SWING_TRIM_SHIFT 4 -#define USB_U3_TX_LFPS_SWING_TRIM_MASK 0xf -#define AMPLITUDE_CONTROL_COARSE_MASK 0xff -#define AMPLITUDE_CONTROL_FINE_MASK 0xffff -#define AMPLITUDE_CONTROL_COARSE_DEFAULT 0xff -#define AMPLITUDE_CONTROL_FINE_DEFAULT 0xffff - -#define PHY_ADDR_MAP_ARRAY_INDEX(addr) (addr) -#define ARRAY_INDEX_MAP_PHY_ADDR(index) (index) - -struct phy_reg { - void __iomem *reg_mdio_ctl; -}; - -struct phy_data { - u8 addr; - u16 data; -}; - -struct phy_cfg { - int param_size; - struct phy_data param[MAX_USB_PHY_DATA_SIZE]; - - bool check_efuse; - bool do_toggle; - bool do_toggle_once; - bool use_default_parameter; - bool check_rx_front_end_offset; -}; - -struct phy_parameter { - struct phy_reg phy_reg; - - /* Get from efuse */ - u8 efuse_usb_u3_tx_lfps_swing_trim; - - /* Get from dts */ - u32 amplitude_control_coarse; - u32 amplitude_control_fine; -}; - -struct rtk_phy { - struct usb_phy phy; - struct device *dev; - - struct phy_cfg *phy_cfg; - int num_phy; - struct phy_parameter *phy_parameter; - - struct dentry *debug_dir; -}; - -#define PHY_IO_TIMEOUT_USEC (50000) -#define PHY_IO_DELAY_US (100) - -static inline int utmi_wait_register(void __iomem *reg, u32 mask, u32 result) -{ - int ret; - unsigned int val; - - ret = read_poll_timeout(readl, val, ((val & mask) == result), - PHY_IO_DELAY_US, PHY_IO_TIMEOUT_USEC, false, reg); - if (ret) { - pr_err("%s can't program USB phy\n", __func__); - return -ETIMEDOUT; - } - - return 0; -} - -static int rtk_phy3_wait_vbusy(struct phy_reg *phy_reg) -{ - return utmi_wait_register(phy_reg->reg_mdio_ctl, USB_MDIO_CTRL_PHY_BUSY, 0); -} - -static u16 rtk_phy_read(struct phy_reg *phy_reg, char addr) -{ - unsigned int tmp; - u32 value; - - tmp = (addr << USB_MDIO_CTRL_PHY_ADDR_SHIFT); - - writel(tmp, phy_reg->reg_mdio_ctl); - - rtk_phy3_wait_vbusy(phy_reg); - - value = readl(phy_reg->reg_mdio_ctl); - value = value >> USB_MDIO_CTRL_PHY_DATA_SHIFT; - - return (u16)value; -} - -static int rtk_phy_write(struct phy_reg *phy_reg, char addr, u16 data) -{ - unsigned int val; - - val = USB_MDIO_CTRL_PHY_WRITE | - (addr << USB_MDIO_CTRL_PHY_ADDR_SHIFT) | - (data << USB_MDIO_CTRL_PHY_DATA_SHIFT); - - writel(val, phy_reg->reg_mdio_ctl); - - rtk_phy3_wait_vbusy(phy_reg); - - return 0; -} - -static void do_rtk_usb3_phy_toggle(struct rtk_phy *rtk_phy, int index, bool connect) -{ - struct phy_cfg *phy_cfg = rtk_phy->phy_cfg; - struct phy_reg *phy_reg; - struct phy_parameter *phy_parameter; - struct phy_data *phy_data; - u8 addr; - u16 data; - int i; - - phy_parameter = &((struct phy_parameter *)rtk_phy->phy_parameter)[index]; - phy_reg = &phy_parameter->phy_reg; - - if (!phy_cfg->do_toggle) - return; - - i = PHY_ADDR_MAP_ARRAY_INDEX(PHY_ADDR_0X09); - phy_data = phy_cfg->param + i; - addr = phy_data->addr; - data = phy_data->data; - - if (!addr && !data) { - addr = PHY_ADDR_0X09; - data = rtk_phy_read(phy_reg, addr); - phy_data->addr = addr; - phy_data->data = data; - } - - rtk_phy_write(phy_reg, addr, data & (~REG_0X09_FORCE_CALIBRATION)); - mdelay(1); - rtk_phy_write(phy_reg, addr, data | REG_0X09_FORCE_CALIBRATION); -} - -static int do_rtk_phy_init(struct rtk_phy *rtk_phy, int index) -{ - struct phy_cfg *phy_cfg; - struct phy_reg *phy_reg; - struct phy_parameter *phy_parameter; - int i = 0; - - phy_cfg = rtk_phy->phy_cfg; - phy_parameter = &((struct phy_parameter *)rtk_phy->phy_parameter)[index]; - phy_reg = &phy_parameter->phy_reg; - - if (phy_cfg->use_default_parameter) - goto do_toggle; - - for (i = 0; i < phy_cfg->param_size; i++) { - struct phy_data *phy_data = phy_cfg->param + i; - u8 addr = phy_data->addr; - u16 data = phy_data->data; - - if (!addr && !data) - continue; - - rtk_phy_write(phy_reg, addr, data); - } - -do_toggle: - if (phy_cfg->do_toggle_once) - phy_cfg->do_toggle = true; - - do_rtk_usb3_phy_toggle(rtk_phy, index, false); - - if (phy_cfg->do_toggle_once) { - u16 check_value = 0; - int count = 10; - u16 value_0x0d, value_0x10; - - /* Enable Debug mode by set 0x0D and 0x10 */ - value_0x0d = rtk_phy_read(phy_reg, PHY_ADDR_0X0D); - value_0x10 = rtk_phy_read(phy_reg, PHY_ADDR_0X10); - - rtk_phy_write(phy_reg, PHY_ADDR_0X0D, - value_0x0d | REG_0X0D_RX_DEBUG_TEST_EN); - rtk_phy_write(phy_reg, PHY_ADDR_0X10, - (value_0x10 & ~REG_0X10_DEBUG_MODE_SETTING_MASK) | - REG_0X10_DEBUG_MODE_SETTING); - - check_value = rtk_phy_read(phy_reg, PHY_ADDR_0X30); - - while (!(check_value & BIT(15))) { - check_value = rtk_phy_read(phy_reg, PHY_ADDR_0X30); - mdelay(1); - if (count-- < 0) - break; - } - - if (!(check_value & BIT(15))) - dev_info(rtk_phy->dev, "toggle fail addr=0x%02x, data=0x%04x\n", - PHY_ADDR_0X30, check_value); - - /* Disable Debug mode by set 0x0D and 0x10 to default*/ - rtk_phy_write(phy_reg, PHY_ADDR_0X0D, value_0x0d); - rtk_phy_write(phy_reg, PHY_ADDR_0X10, value_0x10); - - phy_cfg->do_toggle = false; - } - - if (phy_cfg->check_rx_front_end_offset) { - u16 rx_offset_code, rx_offset_range; - u16 code_mask = REG_0X1F_RX_OFFSET_CODE_MASK; - u16 range_mask = REG_0X0B_RX_OFFSET_RANGE_MASK; - bool do_update = false; - - rx_offset_code = rtk_phy_read(phy_reg, PHY_ADDR_0X1F); - if (((rx_offset_code & code_mask) == 0x0) || - ((rx_offset_code & code_mask) == code_mask)) - do_update = true; - - rx_offset_range = rtk_phy_read(phy_reg, PHY_ADDR_0X0B); - if (((rx_offset_range & range_mask) == range_mask) && do_update) { - dev_warn(rtk_phy->dev, "Don't update rx_offset_range (rx_offset_code=0x%x, rx_offset_range=0x%x)\n", - rx_offset_code, rx_offset_range); - do_update = false; - } - - if (do_update) { - u16 tmp1, tmp2; - - tmp1 = rx_offset_range & (~range_mask); - tmp2 = rx_offset_range & range_mask; - tmp2 += (1 << 2); - rx_offset_range = tmp1 | (tmp2 & range_mask); - rtk_phy_write(phy_reg, PHY_ADDR_0X0B, rx_offset_range); - goto do_toggle; - } - } - - return 0; -} - -static int rtk_phy_init(struct phy *phy) -{ - struct rtk_phy *rtk_phy = phy_get_drvdata(phy); - int ret = 0; - int i; - unsigned long phy_init_time = jiffies; - - for (i = 0; i < rtk_phy->num_phy; i++) - ret = do_rtk_phy_init(rtk_phy, i); - - dev_dbg(rtk_phy->dev, "Initialized RTK USB 3.0 PHY (take %dms)\n", - jiffies_to_msecs(jiffies - phy_init_time)); - - return ret; -} - -static int rtk_phy_exit(struct phy *phy) -{ - return 0; -} - -static const struct phy_ops ops = { - .init = rtk_phy_init, - .exit = rtk_phy_exit, - .owner = THIS_MODULE, -}; - -static void rtk_phy_toggle(struct usb_phy *usb3_phy, bool connect, int port) -{ - int index = port; - struct rtk_phy *rtk_phy = NULL; - - rtk_phy = dev_get_drvdata(usb3_phy->dev); - - if (index > rtk_phy->num_phy) { - dev_err(rtk_phy->dev, "%s: The port=%d is not in usb phy (num_phy=%d)\n", - __func__, index, rtk_phy->num_phy); - return; - } - - do_rtk_usb3_phy_toggle(rtk_phy, index, connect); -} - -static int rtk_phy_notify_port_status(struct usb_phy *x, int port, - u16 portstatus, u16 portchange) -{ - bool connect = false; - - pr_debug("%s port=%d portstatus=0x%x portchange=0x%x\n", - __func__, port, (int)portstatus, (int)portchange); - if (portstatus & USB_PORT_STAT_CONNECTION) - connect = true; - - if (portchange & USB_PORT_STAT_C_CONNECTION) - rtk_phy_toggle(x, connect, port); - - return 0; -} - -#ifdef CONFIG_DEBUG_FS -static struct dentry *create_phy_debug_root(void) -{ - struct dentry *phy_debug_root; - - phy_debug_root = debugfs_lookup("phy", usb_debug_root); - if (!phy_debug_root) - phy_debug_root = debugfs_create_dir("phy", usb_debug_root); - - return phy_debug_root; -} - -static int rtk_usb3_parameter_show(struct seq_file *s, void *unused) -{ - struct rtk_phy *rtk_phy = s->private; - struct phy_cfg *phy_cfg; - int i, index; - - phy_cfg = rtk_phy->phy_cfg; - - seq_puts(s, "Property:\n"); - seq_printf(s, " check_efuse: %s\n", - phy_cfg->check_efuse ? "Enable" : "Disable"); - seq_printf(s, " do_toggle: %s\n", - phy_cfg->do_toggle ? "Enable" : "Disable"); - seq_printf(s, " do_toggle_once: %s\n", - phy_cfg->do_toggle_once ? "Enable" : "Disable"); - seq_printf(s, " use_default_parameter: %s\n", - phy_cfg->use_default_parameter ? "Enable" : "Disable"); - - for (index = 0; index < rtk_phy->num_phy; index++) { - struct phy_reg *phy_reg; - struct phy_parameter *phy_parameter; - - phy_parameter = &((struct phy_parameter *)rtk_phy->phy_parameter)[index]; - phy_reg = &phy_parameter->phy_reg; - - seq_printf(s, "PHY %d:\n", index); - - for (i = 0; i < phy_cfg->param_size; i++) { - struct phy_data *phy_data = phy_cfg->param + i; - u8 addr = ARRAY_INDEX_MAP_PHY_ADDR(i); - u16 data = phy_data->data; - - if (!phy_data->addr && !data) - seq_printf(s, " addr = 0x%02x, data = none ==> read value = 0x%04x\n", - addr, rtk_phy_read(phy_reg, addr)); - else - seq_printf(s, " addr = 0x%02x, data = 0x%04x ==> read value = 0x%04x\n", - addr, data, rtk_phy_read(phy_reg, addr)); - } - - seq_puts(s, "PHY Property:\n"); - seq_printf(s, " efuse_usb_u3_tx_lfps_swing_trim: 0x%x\n", - (int)phy_parameter->efuse_usb_u3_tx_lfps_swing_trim); - seq_printf(s, " amplitude_control_coarse: 0x%x\n", - (int)phy_parameter->amplitude_control_coarse); - seq_printf(s, " amplitude_control_fine: 0x%x\n", - (int)phy_parameter->amplitude_control_fine); - } - - return 0; -} -DEFINE_SHOW_ATTRIBUTE(rtk_usb3_parameter); - -static inline void create_debug_files(struct rtk_phy *rtk_phy) -{ - struct dentry *phy_debug_root = NULL; - - phy_debug_root = create_phy_debug_root(); - - if (!phy_debug_root) - return; - - rtk_phy->debug_dir = debugfs_create_dir(dev_name(rtk_phy->dev), phy_debug_root); - - debugfs_create_file("parameter", 0444, rtk_phy->debug_dir, rtk_phy, - &rtk_usb3_parameter_fops); - - return; -} - -static inline void remove_debug_files(struct rtk_phy *rtk_phy) -{ - debugfs_remove_recursive(rtk_phy->debug_dir); -} -#else -static inline void create_debug_files(struct rtk_phy *rtk_phy) { } -static inline void remove_debug_files(struct rtk_phy *rtk_phy) { } -#endif /* CONFIG_DEBUG_FS */ - -static int get_phy_data_by_efuse(struct rtk_phy *rtk_phy, - struct phy_parameter *phy_parameter, int index) -{ - struct phy_cfg *phy_cfg = rtk_phy->phy_cfg; - u8 value = 0; - struct nvmem_cell *cell; - - if (!phy_cfg->check_efuse) - goto out; - - cell = nvmem_cell_get(rtk_phy->dev, "usb_u3_tx_lfps_swing_trim"); - if (IS_ERR(cell)) { - dev_dbg(rtk_phy->dev, "%s no usb_u3_tx_lfps_swing_trim: %ld\n", - __func__, PTR_ERR(cell)); - } else { - unsigned char *buf; - size_t buf_size; - - buf = nvmem_cell_read(cell, &buf_size); - if (!IS_ERR(buf)) { - value = buf[0] & USB_U3_TX_LFPS_SWING_TRIM_MASK; - kfree(buf); - } - nvmem_cell_put(cell); - } - - if (value > 0 && value < 0x8) - phy_parameter->efuse_usb_u3_tx_lfps_swing_trim = 0x8; - else - phy_parameter->efuse_usb_u3_tx_lfps_swing_trim = (u8)value; - -out: - return 0; -} - -static void update_amplitude_control_value(struct rtk_phy *rtk_phy, - struct phy_parameter *phy_parameter) -{ - struct phy_cfg *phy_cfg; - struct phy_reg *phy_reg; - - phy_reg = &phy_parameter->phy_reg; - phy_cfg = rtk_phy->phy_cfg; - - if (phy_parameter->amplitude_control_coarse != AMPLITUDE_CONTROL_COARSE_DEFAULT) { - u16 val_mask = AMPLITUDE_CONTROL_COARSE_MASK; - u16 data; - - if (!phy_cfg->param[PHY_ADDR_0X20].addr && !phy_cfg->param[PHY_ADDR_0X20].data) { - phy_cfg->param[PHY_ADDR_0X20].addr = PHY_ADDR_0X20; - data = rtk_phy_read(phy_reg, PHY_ADDR_0X20); - } else { - data = phy_cfg->param[PHY_ADDR_0X20].data; - } - - data &= (~val_mask); - data |= (phy_parameter->amplitude_control_coarse & val_mask); - - phy_cfg->param[PHY_ADDR_0X20].data = data; - } - - if (phy_parameter->efuse_usb_u3_tx_lfps_swing_trim) { - u8 efuse_val = phy_parameter->efuse_usb_u3_tx_lfps_swing_trim; - u16 val_mask = USB_U3_TX_LFPS_SWING_TRIM_MASK; - int val_shift = USB_U3_TX_LFPS_SWING_TRIM_SHIFT; - u16 data; - - if (!phy_cfg->param[PHY_ADDR_0X20].addr && !phy_cfg->param[PHY_ADDR_0X20].data) { - phy_cfg->param[PHY_ADDR_0X20].addr = PHY_ADDR_0X20; - data = rtk_phy_read(phy_reg, PHY_ADDR_0X20); - } else { - data = phy_cfg->param[PHY_ADDR_0X20].data; - } - - data &= ~(val_mask << val_shift); - data |= ((efuse_val & val_mask) << val_shift); - - phy_cfg->param[PHY_ADDR_0X20].data = data; - } - - if (phy_parameter->amplitude_control_fine != AMPLITUDE_CONTROL_FINE_DEFAULT) { - u16 val_mask = AMPLITUDE_CONTROL_FINE_MASK; - - if (!phy_cfg->param[PHY_ADDR_0X21].addr && !phy_cfg->param[PHY_ADDR_0X21].data) - phy_cfg->param[PHY_ADDR_0X21].addr = PHY_ADDR_0X21; - - phy_cfg->param[PHY_ADDR_0X21].data = - phy_parameter->amplitude_control_fine & val_mask; - } -} - -static int parse_phy_data(struct rtk_phy *rtk_phy) -{ - struct device *dev = rtk_phy->dev; - struct phy_parameter *phy_parameter; - int ret = 0; - int index; - - rtk_phy->phy_parameter = devm_kzalloc(dev, sizeof(struct phy_parameter) * - rtk_phy->num_phy, GFP_KERNEL); - if (!rtk_phy->phy_parameter) - return -ENOMEM; - - for (index = 0; index < rtk_phy->num_phy; index++) { - phy_parameter = &((struct phy_parameter *)rtk_phy->phy_parameter)[index]; - - phy_parameter->phy_reg.reg_mdio_ctl = of_iomap(dev->of_node, 0) + index; - - /* Amplitude control address 0x20 bit 0 to bit 7 */ - if (of_property_read_u32(dev->of_node, "realtek,amplitude-control-coarse-tuning", - &phy_parameter->amplitude_control_coarse)) - phy_parameter->amplitude_control_coarse = AMPLITUDE_CONTROL_COARSE_DEFAULT; - - /* Amplitude control address 0x21 bit 0 to bit 16 */ - if (of_property_read_u32(dev->of_node, "realtek,amplitude-control-fine-tuning", - &phy_parameter->amplitude_control_fine)) - phy_parameter->amplitude_control_fine = AMPLITUDE_CONTROL_FINE_DEFAULT; - - get_phy_data_by_efuse(rtk_phy, phy_parameter, index); - - update_amplitude_control_value(rtk_phy, phy_parameter); - } - - return ret; -} - -static int rtk_usb3phy_probe(struct platform_device *pdev) -{ - struct rtk_phy *rtk_phy; - struct device *dev = &pdev->dev; - struct phy *generic_phy; - struct phy_provider *phy_provider; - const struct phy_cfg *phy_cfg; - int ret; - - phy_cfg = of_device_get_match_data(dev); - if (!phy_cfg) { - dev_err(dev, "phy config are not assigned!\n"); - return -EINVAL; - } - - rtk_phy = devm_kzalloc(dev, sizeof(*rtk_phy), GFP_KERNEL); - if (!rtk_phy) - return -ENOMEM; - - rtk_phy->dev = &pdev->dev; - rtk_phy->phy.dev = rtk_phy->dev; - rtk_phy->phy.label = "rtk-usb3phy"; - rtk_phy->phy.notify_port_status = rtk_phy_notify_port_status; - - rtk_phy->phy_cfg = devm_kzalloc(dev, sizeof(*phy_cfg), GFP_KERNEL); - - memcpy(rtk_phy->phy_cfg, phy_cfg, sizeof(*phy_cfg)); - - rtk_phy->num_phy = 1; - - ret = parse_phy_data(rtk_phy); - if (ret) - goto err; - - platform_set_drvdata(pdev, rtk_phy); - - generic_phy = devm_phy_create(rtk_phy->dev, NULL, &ops); - if (IS_ERR(generic_phy)) - return PTR_ERR(generic_phy); - - phy_set_drvdata(generic_phy, rtk_phy); - - phy_provider = devm_of_phy_provider_register(rtk_phy->dev, of_phy_simple_xlate); - if (IS_ERR(phy_provider)) - return PTR_ERR(phy_provider); - - ret = usb_add_phy_dev(&rtk_phy->phy); - if (ret) - goto err; - - create_debug_files(rtk_phy); - -err: - return ret; -} - -static void rtk_usb3phy_remove(struct platform_device *pdev) -{ - struct rtk_phy *rtk_phy = platform_get_drvdata(pdev); - - remove_debug_files(rtk_phy); - - usb_remove_phy(&rtk_phy->phy); -} - -static const struct phy_cfg rtd1295_phy_cfg = { - .param_size = MAX_USB_PHY_DATA_SIZE, - .param = { [0] = {0x01, 0x4008}, [1] = {0x01, 0xe046}, - [2] = {0x02, 0x6046}, [3] = {0x03, 0x2779}, - [4] = {0x04, 0x72f5}, [5] = {0x05, 0x2ad3}, - [6] = {0x06, 0x000e}, [7] = {0x07, 0x2e00}, - [8] = {0x08, 0x3591}, [9] = {0x09, 0x525c}, - [10] = {0x0a, 0xa600}, [11] = {0x0b, 0xa904}, - [12] = {0x0c, 0xc000}, [13] = {0x0d, 0xef1c}, - [14] = {0x0e, 0x2000}, [15] = {0x0f, 0x0000}, - [16] = {0x10, 0x000c}, [17] = {0x11, 0x4c00}, - [18] = {0x12, 0xfc00}, [19] = {0x13, 0x0c81}, - [20] = {0x14, 0xde01}, [21] = {0x15, 0x0000}, - [22] = {0x16, 0x0000}, [23] = {0x17, 0x0000}, - [24] = {0x18, 0x0000}, [25] = {0x19, 0x4004}, - [26] = {0x1a, 0x1260}, [27] = {0x1b, 0xff00}, - [28] = {0x1c, 0xcb00}, [29] = {0x1d, 0xa03f}, - [30] = {0x1e, 0xc2e0}, [31] = {0x1f, 0x2807}, - [32] = {0x20, 0x947a}, [33] = {0x21, 0x88aa}, - [34] = {0x22, 0x0057}, [35] = {0x23, 0xab66}, - [36] = {0x24, 0x0800}, [37] = {0x25, 0x0000}, - [38] = {0x26, 0x040a}, [39] = {0x27, 0x01d6}, - [40] = {0x28, 0xf8c2}, [41] = {0x29, 0x3080}, - [42] = {0x2a, 0x3082}, [43] = {0x2b, 0x2078}, - [44] = {0x2c, 0xffff}, [45] = {0x2d, 0xffff}, - [46] = {0x2e, 0x0000}, [47] = {0x2f, 0x0040}, }, - .check_efuse = false, - .do_toggle = true, - .do_toggle_once = false, - .use_default_parameter = false, - .check_rx_front_end_offset = false, -}; - -static const struct phy_cfg rtd1619_phy_cfg = { - .param_size = MAX_USB_PHY_DATA_SIZE, - .param = { [8] = {0x08, 0x3591}, - [38] = {0x26, 0x840b}, - [40] = {0x28, 0xf842}, }, - .check_efuse = false, - .do_toggle = true, - .do_toggle_once = false, - .use_default_parameter = false, - .check_rx_front_end_offset = false, -}; - -static const struct phy_cfg rtd1319_phy_cfg = { - .param_size = MAX_USB_PHY_DATA_SIZE, - .param = { [1] = {0x01, 0xac86}, - [6] = {0x06, 0x0003}, - [9] = {0x09, 0x924c}, - [10] = {0x0a, 0xa608}, - [11] = {0x0b, 0xb905}, - [14] = {0x0e, 0x2010}, - [32] = {0x20, 0x705a}, - [33] = {0x21, 0xf645}, - [34] = {0x22, 0x0013}, - [35] = {0x23, 0xcb66}, - [41] = {0x29, 0xff00}, }, - .check_efuse = true, - .do_toggle = true, - .do_toggle_once = false, - .use_default_parameter = false, - .check_rx_front_end_offset = false, -}; - -static const struct phy_cfg rtd1619b_phy_cfg = { - .param_size = MAX_USB_PHY_DATA_SIZE, - .param = { [1] = {0x01, 0xac8c}, - [6] = {0x06, 0x0017}, - [9] = {0x09, 0x724c}, - [10] = {0x0a, 0xb610}, - [11] = {0x0b, 0xb90d}, - [13] = {0x0d, 0xef2a}, - [15] = {0x0f, 0x9050}, - [16] = {0x10, 0x000c}, - [32] = {0x20, 0x70ff}, - [34] = {0x22, 0x0013}, - [35] = {0x23, 0xdb66}, - [38] = {0x26, 0x8609}, - [41] = {0x29, 0xff13}, - [42] = {0x2a, 0x3070}, }, - .check_efuse = true, - .do_toggle = false, - .do_toggle_once = true, - .use_default_parameter = false, - .check_rx_front_end_offset = false, -}; - -static const struct phy_cfg rtd1319d_phy_cfg = { - .param_size = MAX_USB_PHY_DATA_SIZE, - .param = { [1] = {0x01, 0xac89}, - [4] = {0x04, 0xf2f5}, - [6] = {0x06, 0x0017}, - [9] = {0x09, 0x424c}, - [10] = {0x0a, 0x9610}, - [11] = {0x0b, 0x9901}, - [12] = {0x0c, 0xf000}, - [13] = {0x0d, 0xef2a}, - [14] = {0x0e, 0x1000}, - [15] = {0x0f, 0x9050}, - [32] = {0x20, 0x7077}, - [35] = {0x23, 0x0b62}, - [37] = {0x25, 0x10ec}, - [42] = {0x2a, 0x3070}, }, - .check_efuse = true, - .do_toggle = false, - .do_toggle_once = true, - .use_default_parameter = false, - .check_rx_front_end_offset = true, -}; - -static const struct of_device_id usbphy_rtk_dt_match[] = { - { .compatible = "realtek,rtd1295-usb3phy", .data = &rtd1295_phy_cfg }, - { .compatible = "realtek,rtd1319-usb3phy", .data = &rtd1319_phy_cfg }, - { .compatible = "realtek,rtd1319d-usb3phy", .data = &rtd1319d_phy_cfg }, - { .compatible = "realtek,rtd1619-usb3phy", .data = &rtd1619_phy_cfg }, - { .compatible = "realtek,rtd1619b-usb3phy", .data = &rtd1619b_phy_cfg }, - {}, -}; -MODULE_DEVICE_TABLE(of, usbphy_rtk_dt_match); - -static struct platform_driver rtk_usb3phy_driver = { - .probe = rtk_usb3phy_probe, - .remove_new = rtk_usb3phy_remove, - .driver = { - .name = "rtk-usb3phy", - .of_match_table = usbphy_rtk_dt_match, - }, -}; - -module_platform_driver(rtk_usb3phy_driver); - -MODULE_LICENSE("GPL"); -MODULE_ALIAS("platform: rtk-usb3phy"); -MODULE_AUTHOR("Stanley Chang "); -MODULE_DESCRIPTION("Realtek usb 3.0 phy driver"); From 7e2cde1813418b39b5e95d86e10d6701dccf18af Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Nov 2023 12:06:53 +0100 Subject: [PATCH 0614/1397] Revert "phy: realtek: usb: Add driver for the Realtek SoC USB 2.0 PHY" commit 7a784bcdd7e54f0599da3b2360e472238412623e upstream. This reverts commit 134e6d25f6bd06071e5aac0a7eefcea6f7713955. The recently added Realtek PHY drivers depend on the new port status notification mechanism which was built on the deprecated USB PHY implementation and devicetree binding. Specifically, using these PHYs would require describing the very same PHY using both the generic "phy" property and the deprecated "usb-phy" property which is clearly wrong. We should not be building new functionality on top of the legacy USB PHY implementation even if it is currently stuck in some kind of transitional limbo. Revert the new Realtek PHY drivers for now so that the port status notification interface can be reverted and replaced. Fixes: 134e6d25f6bd ("phy: realtek: usb: Add driver for the Realtek SoC USB 2.0 PHY") Cc: stable@vger.kernel.org # 6.6 Cc: Stanley Chang Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20231106110654.31090-3-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/phy/Kconfig | 1 - drivers/phy/Makefile | 1 - drivers/phy/realtek/Kconfig | 20 - drivers/phy/realtek/Makefile | 2 - drivers/phy/realtek/phy-rtk-usb2.c | 1325 ---------------------------- 5 files changed, 1349 deletions(-) delete mode 100644 drivers/phy/realtek/Kconfig delete mode 100644 drivers/phy/realtek/Makefile delete mode 100644 drivers/phy/realtek/phy-rtk-usb2.c diff --git a/drivers/phy/Kconfig b/drivers/phy/Kconfig index d1670bbe6d6b..e4502958fd62 100644 --- a/drivers/phy/Kconfig +++ b/drivers/phy/Kconfig @@ -87,7 +87,6 @@ source "drivers/phy/motorola/Kconfig" source "drivers/phy/mscc/Kconfig" source "drivers/phy/qualcomm/Kconfig" source "drivers/phy/ralink/Kconfig" -source "drivers/phy/realtek/Kconfig" source "drivers/phy/renesas/Kconfig" source "drivers/phy/rockchip/Kconfig" source "drivers/phy/samsung/Kconfig" diff --git a/drivers/phy/Makefile b/drivers/phy/Makefile index 868a220ed0f6..fb3dc9de6111 100644 --- a/drivers/phy/Makefile +++ b/drivers/phy/Makefile @@ -26,7 +26,6 @@ obj-y += allwinner/ \ mscc/ \ qualcomm/ \ ralink/ \ - realtek/ \ renesas/ \ rockchip/ \ samsung/ \ diff --git a/drivers/phy/realtek/Kconfig b/drivers/phy/realtek/Kconfig deleted file mode 100644 index 745587751070..000000000000 --- a/drivers/phy/realtek/Kconfig +++ /dev/null @@ -1,20 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# Phy drivers for Realtek platforms -# - -if ARCH_REALTEK || COMPILE_TEST - -config PHY_RTK_RTD_USB2PHY - tristate "Realtek RTD USB2 PHY Transceiver Driver" - depends on USB_SUPPORT - select GENERIC_PHY - select USB_PHY - select USB_COMMON - help - Enable this to support Realtek SoC USB2 phy transceiver. - The DHC (digital home center) RTD series SoCs used the Synopsys - DWC3 USB IP. This driver will do the PHY initialization - of the parameters. - -endif # ARCH_REALTEK || COMPILE_TEST diff --git a/drivers/phy/realtek/Makefile b/drivers/phy/realtek/Makefile deleted file mode 100644 index cf5d440841a2..000000000000 --- a/drivers/phy/realtek/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_PHY_RTK_RTD_USB2PHY) += phy-rtk-usb2.o diff --git a/drivers/phy/realtek/phy-rtk-usb2.c b/drivers/phy/realtek/phy-rtk-usb2.c deleted file mode 100644 index aedc78bd37f7..000000000000 --- a/drivers/phy/realtek/phy-rtk-usb2.c +++ /dev/null @@ -1,1325 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * phy-rtk-usb2.c RTK usb2.0 PHY driver - * - * Copyright (C) 2023 Realtek Semiconductor Corporation - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* GUSB2PHYACCn register */ -#define PHY_NEW_REG_REQ BIT(25) -#define PHY_VSTS_BUSY BIT(23) -#define PHY_VCTRL_SHIFT 8 -#define PHY_REG_DATA_MASK 0xff - -#define GET_LOW_NIBBLE(addr) ((addr) & 0x0f) -#define GET_HIGH_NIBBLE(addr) (((addr) & 0xf0) >> 4) - -#define EFUS_USB_DC_CAL_RATE 2 -#define EFUS_USB_DC_CAL_MAX 7 - -#define EFUS_USB_DC_DIS_RATE 1 -#define EFUS_USB_DC_DIS_MAX 7 - -#define MAX_PHY_DATA_SIZE 20 -#define OFFEST_PHY_READ 0x20 - -#define MAX_USB_PHY_NUM 4 -#define MAX_USB_PHY_PAGE0_DATA_SIZE 16 -#define MAX_USB_PHY_PAGE1_DATA_SIZE 16 -#define MAX_USB_PHY_PAGE2_DATA_SIZE 8 - -#define SET_PAGE_OFFSET 0xf4 -#define SET_PAGE_0 0x9b -#define SET_PAGE_1 0xbb -#define SET_PAGE_2 0xdb - -#define PAGE_START 0xe0 -#define PAGE0_0XE4 0xe4 -#define PAGE0_0XE6 0xe6 -#define PAGE0_0XE7 0xe7 -#define PAGE1_0XE0 0xe0 -#define PAGE1_0XE2 0xe2 - -#define SENSITIVITY_CTRL (BIT(4) | BIT(5) | BIT(6)) -#define ENABLE_AUTO_SENSITIVITY_CALIBRATION BIT(2) -#define DEFAULT_DC_DRIVING_VALUE (0x8) -#define DEFAULT_DC_DISCONNECTION_VALUE (0x6) -#define HS_CLK_SELECT BIT(6) - -struct phy_reg { - void __iomem *reg_wrap_vstatus; - void __iomem *reg_gusb2phyacc0; - int vstatus_index; -}; - -struct phy_data { - u8 addr; - u8 data; -}; - -struct phy_cfg { - int page0_size; - struct phy_data page0[MAX_USB_PHY_PAGE0_DATA_SIZE]; - int page1_size; - struct phy_data page1[MAX_USB_PHY_PAGE1_DATA_SIZE]; - int page2_size; - struct phy_data page2[MAX_USB_PHY_PAGE2_DATA_SIZE]; - - int num_phy; - - bool check_efuse; - int check_efuse_version; -#define CHECK_EFUSE_V1 1 -#define CHECK_EFUSE_V2 2 - int efuse_dc_driving_rate; - int efuse_dc_disconnect_rate; - int dc_driving_mask; - int dc_disconnect_mask; - bool usb_dc_disconnect_at_page0; - int driving_updated_for_dev_dis; - - bool do_toggle; - bool do_toggle_driving; - bool use_default_parameter; - bool is_double_sensitivity_mode; -}; - -struct phy_parameter { - struct phy_reg phy_reg; - - /* Get from efuse */ - s8 efuse_usb_dc_cal; - s8 efuse_usb_dc_dis; - - /* Get from dts */ - bool inverse_hstx_sync_clock; - u32 driving_level; - s32 driving_level_compensate; - s32 disconnection_compensate; -}; - -struct rtk_phy { - struct usb_phy phy; - struct device *dev; - - struct phy_cfg *phy_cfg; - int num_phy; - struct phy_parameter *phy_parameter; - - struct dentry *debug_dir; -}; - -/* mapping 0xE0 to 0 ... 0xE7 to 7, 0xF0 to 8 ,,, 0xF7 to 15 */ -static inline int page_addr_to_array_index(u8 addr) -{ - return (int)((((addr) - PAGE_START) & 0x7) + - ((((addr) - PAGE_START) & 0x10) >> 1)); -} - -static inline u8 array_index_to_page_addr(int index) -{ - return ((((index) + PAGE_START) & 0x7) + - ((((index) & 0x8) << 1) + PAGE_START)); -} - -#define PHY_IO_TIMEOUT_USEC (50000) -#define PHY_IO_DELAY_US (100) - -static inline int utmi_wait_register(void __iomem *reg, u32 mask, u32 result) -{ - int ret; - unsigned int val; - - ret = read_poll_timeout(readl, val, ((val & mask) == result), - PHY_IO_DELAY_US, PHY_IO_TIMEOUT_USEC, false, reg); - if (ret) { - pr_err("%s can't program USB phy\n", __func__); - return -ETIMEDOUT; - } - - return 0; -} - -static char rtk_phy_read(struct phy_reg *phy_reg, char addr) -{ - void __iomem *reg_gusb2phyacc0 = phy_reg->reg_gusb2phyacc0; - unsigned int val; - int ret = 0; - - addr -= OFFEST_PHY_READ; - - /* polling until VBusy == 0 */ - ret = utmi_wait_register(reg_gusb2phyacc0, PHY_VSTS_BUSY, 0); - if (ret) - return (char)ret; - - /* VCtrl = low nibble of addr, and set PHY_NEW_REG_REQ */ - val = PHY_NEW_REG_REQ | (GET_LOW_NIBBLE(addr) << PHY_VCTRL_SHIFT); - writel(val, reg_gusb2phyacc0); - ret = utmi_wait_register(reg_gusb2phyacc0, PHY_VSTS_BUSY, 0); - if (ret) - return (char)ret; - - /* VCtrl = high nibble of addr, and set PHY_NEW_REG_REQ */ - val = PHY_NEW_REG_REQ | (GET_HIGH_NIBBLE(addr) << PHY_VCTRL_SHIFT); - writel(val, reg_gusb2phyacc0); - ret = utmi_wait_register(reg_gusb2phyacc0, PHY_VSTS_BUSY, 0); - if (ret) - return (char)ret; - - val = readl(reg_gusb2phyacc0); - - return (char)(val & PHY_REG_DATA_MASK); -} - -static int rtk_phy_write(struct phy_reg *phy_reg, char addr, char data) -{ - unsigned int val; - void __iomem *reg_wrap_vstatus = phy_reg->reg_wrap_vstatus; - void __iomem *reg_gusb2phyacc0 = phy_reg->reg_gusb2phyacc0; - int shift_bits = phy_reg->vstatus_index * 8; - int ret = 0; - - /* write data to VStatusOut2 (data output to phy) */ - writel((u32)data << shift_bits, reg_wrap_vstatus); - - ret = utmi_wait_register(reg_gusb2phyacc0, PHY_VSTS_BUSY, 0); - if (ret) - return ret; - - /* VCtrl = low nibble of addr, set PHY_NEW_REG_REQ */ - val = PHY_NEW_REG_REQ | (GET_LOW_NIBBLE(addr) << PHY_VCTRL_SHIFT); - - writel(val, reg_gusb2phyacc0); - ret = utmi_wait_register(reg_gusb2phyacc0, PHY_VSTS_BUSY, 0); - if (ret) - return ret; - - /* VCtrl = high nibble of addr, set PHY_NEW_REG_REQ */ - val = PHY_NEW_REG_REQ | (GET_HIGH_NIBBLE(addr) << PHY_VCTRL_SHIFT); - - writel(val, reg_gusb2phyacc0); - ret = utmi_wait_register(reg_gusb2phyacc0, PHY_VSTS_BUSY, 0); - if (ret) - return ret; - - return 0; -} - -static int rtk_phy_set_page(struct phy_reg *phy_reg, int page) -{ - switch (page) { - case 0: - return rtk_phy_write(phy_reg, SET_PAGE_OFFSET, SET_PAGE_0); - case 1: - return rtk_phy_write(phy_reg, SET_PAGE_OFFSET, SET_PAGE_1); - case 2: - return rtk_phy_write(phy_reg, SET_PAGE_OFFSET, SET_PAGE_2); - default: - pr_err("%s error page=%d\n", __func__, page); - } - - return -EINVAL; -} - -static u8 __updated_dc_disconnect_level_page0_0xe4(struct phy_cfg *phy_cfg, - struct phy_parameter *phy_parameter, u8 data) -{ - u8 ret; - s32 val; - s32 dc_disconnect_mask = phy_cfg->dc_disconnect_mask; - int offset = 4; - - val = (s32)((data >> offset) & dc_disconnect_mask) - + phy_parameter->efuse_usb_dc_dis - + phy_parameter->disconnection_compensate; - - if (val > dc_disconnect_mask) - val = dc_disconnect_mask; - else if (val < 0) - val = 0; - - ret = (data & (~(dc_disconnect_mask << offset))) | - (val & dc_disconnect_mask) << offset; - - return ret; -} - -/* updated disconnect level at page0 */ -static void update_dc_disconnect_level_at_page0(struct rtk_phy *rtk_phy, - struct phy_parameter *phy_parameter, bool update) -{ - struct phy_cfg *phy_cfg; - struct phy_reg *phy_reg; - struct phy_data *phy_data_page; - struct phy_data *phy_data; - u8 addr, data; - int offset = 4; - s32 dc_disconnect_mask; - int i; - - phy_cfg = rtk_phy->phy_cfg; - phy_reg = &phy_parameter->phy_reg; - - /* Set page 0 */ - phy_data_page = phy_cfg->page0; - rtk_phy_set_page(phy_reg, 0); - - i = page_addr_to_array_index(PAGE0_0XE4); - phy_data = phy_data_page + i; - if (!phy_data->addr) { - phy_data->addr = PAGE0_0XE4; - phy_data->data = rtk_phy_read(phy_reg, PAGE0_0XE4); - } - - addr = phy_data->addr; - data = phy_data->data; - dc_disconnect_mask = phy_cfg->dc_disconnect_mask; - - if (update) - data = __updated_dc_disconnect_level_page0_0xe4(phy_cfg, phy_parameter, data); - else - data = (data & ~(dc_disconnect_mask << offset)) | - (DEFAULT_DC_DISCONNECTION_VALUE << offset); - - if (rtk_phy_write(phy_reg, addr, data)) - dev_err(rtk_phy->dev, - "%s: Error to set page1 parameter addr=0x%x value=0x%x\n", - __func__, addr, data); -} - -static u8 __updated_dc_disconnect_level_page1_0xe2(struct phy_cfg *phy_cfg, - struct phy_parameter *phy_parameter, u8 data) -{ - u8 ret; - s32 val; - s32 dc_disconnect_mask = phy_cfg->dc_disconnect_mask; - - if (phy_cfg->check_efuse_version == CHECK_EFUSE_V1) { - val = (s32)(data & dc_disconnect_mask) - + phy_parameter->efuse_usb_dc_dis - + phy_parameter->disconnection_compensate; - } else { /* for CHECK_EFUSE_V2 or no efuse */ - if (phy_parameter->efuse_usb_dc_dis) - val = (s32)(phy_parameter->efuse_usb_dc_dis + - phy_parameter->disconnection_compensate); - else - val = (s32)((data & dc_disconnect_mask) + - phy_parameter->disconnection_compensate); - } - - if (val > dc_disconnect_mask) - val = dc_disconnect_mask; - else if (val < 0) - val = 0; - - ret = (data & (~dc_disconnect_mask)) | (val & dc_disconnect_mask); - - return ret; -} - -/* updated disconnect level at page1 */ -static void update_dc_disconnect_level_at_page1(struct rtk_phy *rtk_phy, - struct phy_parameter *phy_parameter, bool update) -{ - struct phy_cfg *phy_cfg; - struct phy_data *phy_data_page; - struct phy_data *phy_data; - struct phy_reg *phy_reg; - u8 addr, data; - s32 dc_disconnect_mask; - int i; - - phy_cfg = rtk_phy->phy_cfg; - phy_reg = &phy_parameter->phy_reg; - - /* Set page 1 */ - phy_data_page = phy_cfg->page1; - rtk_phy_set_page(phy_reg, 1); - - i = page_addr_to_array_index(PAGE1_0XE2); - phy_data = phy_data_page + i; - if (!phy_data->addr) { - phy_data->addr = PAGE1_0XE2; - phy_data->data = rtk_phy_read(phy_reg, PAGE1_0XE2); - } - - addr = phy_data->addr; - data = phy_data->data; - dc_disconnect_mask = phy_cfg->dc_disconnect_mask; - - if (update) - data = __updated_dc_disconnect_level_page1_0xe2(phy_cfg, phy_parameter, data); - else - data = (data & ~dc_disconnect_mask) | DEFAULT_DC_DISCONNECTION_VALUE; - - if (rtk_phy_write(phy_reg, addr, data)) - dev_err(rtk_phy->dev, - "%s: Error to set page1 parameter addr=0x%x value=0x%x\n", - __func__, addr, data); -} - -static void update_dc_disconnect_level(struct rtk_phy *rtk_phy, - struct phy_parameter *phy_parameter, bool update) -{ - struct phy_cfg *phy_cfg = rtk_phy->phy_cfg; - - if (phy_cfg->usb_dc_disconnect_at_page0) - update_dc_disconnect_level_at_page0(rtk_phy, phy_parameter, update); - else - update_dc_disconnect_level_at_page1(rtk_phy, phy_parameter, update); -} - -static u8 __update_dc_driving_page0_0xe4(struct phy_cfg *phy_cfg, - struct phy_parameter *phy_parameter, u8 data) -{ - s32 driving_level_compensate = phy_parameter->driving_level_compensate; - s32 dc_driving_mask = phy_cfg->dc_driving_mask; - s32 val; - u8 ret; - - if (phy_cfg->check_efuse_version == CHECK_EFUSE_V1) { - val = (s32)(data & dc_driving_mask) + driving_level_compensate - + phy_parameter->efuse_usb_dc_cal; - } else { /* for CHECK_EFUSE_V2 or no efuse */ - if (phy_parameter->efuse_usb_dc_cal) - val = (s32)((phy_parameter->efuse_usb_dc_cal & dc_driving_mask) - + driving_level_compensate); - else - val = (s32)(data & dc_driving_mask); - } - - if (val > dc_driving_mask) - val = dc_driving_mask; - else if (val < 0) - val = 0; - - ret = (data & (~dc_driving_mask)) | (val & dc_driving_mask); - - return ret; -} - -static void update_dc_driving_level(struct rtk_phy *rtk_phy, - struct phy_parameter *phy_parameter) -{ - struct phy_cfg *phy_cfg; - struct phy_reg *phy_reg; - - phy_reg = &phy_parameter->phy_reg; - phy_cfg = rtk_phy->phy_cfg; - if (!phy_cfg->page0[4].addr) { - rtk_phy_set_page(phy_reg, 0); - phy_cfg->page0[4].addr = PAGE0_0XE4; - phy_cfg->page0[4].data = rtk_phy_read(phy_reg, PAGE0_0XE4); - } - - if (phy_parameter->driving_level != DEFAULT_DC_DRIVING_VALUE) { - u32 dc_driving_mask; - u8 driving_level; - u8 data; - - data = phy_cfg->page0[4].data; - dc_driving_mask = phy_cfg->dc_driving_mask; - driving_level = data & dc_driving_mask; - - dev_dbg(rtk_phy->dev, "%s driving_level=%d => dts driving_level=%d\n", - __func__, driving_level, phy_parameter->driving_level); - - phy_cfg->page0[4].data = (data & (~dc_driving_mask)) | - (phy_parameter->driving_level & dc_driving_mask); - } - - phy_cfg->page0[4].data = __update_dc_driving_page0_0xe4(phy_cfg, - phy_parameter, - phy_cfg->page0[4].data); -} - -static void update_hs_clk_select(struct rtk_phy *rtk_phy, - struct phy_parameter *phy_parameter) -{ - struct phy_cfg *phy_cfg; - struct phy_reg *phy_reg; - - phy_cfg = rtk_phy->phy_cfg; - phy_reg = &phy_parameter->phy_reg; - - if (phy_parameter->inverse_hstx_sync_clock) { - if (!phy_cfg->page0[6].addr) { - rtk_phy_set_page(phy_reg, 0); - phy_cfg->page0[6].addr = PAGE0_0XE6; - phy_cfg->page0[6].data = rtk_phy_read(phy_reg, PAGE0_0XE6); - } - - phy_cfg->page0[6].data = phy_cfg->page0[6].data | HS_CLK_SELECT; - } -} - -static void do_rtk_phy_toggle(struct rtk_phy *rtk_phy, - int index, bool connect) -{ - struct phy_parameter *phy_parameter; - struct phy_cfg *phy_cfg; - struct phy_reg *phy_reg; - struct phy_data *phy_data_page; - u8 addr, data; - int i; - - phy_cfg = rtk_phy->phy_cfg; - phy_parameter = &((struct phy_parameter *)rtk_phy->phy_parameter)[index]; - phy_reg = &phy_parameter->phy_reg; - - if (!phy_cfg->do_toggle) - goto out; - - if (phy_cfg->is_double_sensitivity_mode) - goto do_toggle_driving; - - /* Set page 0 */ - rtk_phy_set_page(phy_reg, 0); - - addr = PAGE0_0XE7; - data = rtk_phy_read(phy_reg, addr); - - if (connect) - rtk_phy_write(phy_reg, addr, data & (~SENSITIVITY_CTRL)); - else - rtk_phy_write(phy_reg, addr, data | (SENSITIVITY_CTRL)); - -do_toggle_driving: - - if (!phy_cfg->do_toggle_driving) - goto do_toggle; - - /* Page 0 addr 0xE4 driving capability */ - - /* Set page 0 */ - phy_data_page = phy_cfg->page0; - rtk_phy_set_page(phy_reg, 0); - - i = page_addr_to_array_index(PAGE0_0XE4); - addr = phy_data_page[i].addr; - data = phy_data_page[i].data; - - if (connect) { - rtk_phy_write(phy_reg, addr, data); - } else { - u8 value; - s32 tmp; - s32 driving_updated = - phy_cfg->driving_updated_for_dev_dis; - s32 dc_driving_mask = phy_cfg->dc_driving_mask; - - tmp = (s32)(data & dc_driving_mask) + driving_updated; - - if (tmp > dc_driving_mask) - tmp = dc_driving_mask; - else if (tmp < 0) - tmp = 0; - - value = (data & (~dc_driving_mask)) | (tmp & dc_driving_mask); - - rtk_phy_write(phy_reg, addr, value); - } - -do_toggle: - /* restore dc disconnect level before toggle */ - update_dc_disconnect_level(rtk_phy, phy_parameter, false); - - /* Set page 1 */ - rtk_phy_set_page(phy_reg, 1); - - addr = PAGE1_0XE0; - data = rtk_phy_read(phy_reg, addr); - - rtk_phy_write(phy_reg, addr, data & - (~ENABLE_AUTO_SENSITIVITY_CALIBRATION)); - mdelay(1); - rtk_phy_write(phy_reg, addr, data | - (ENABLE_AUTO_SENSITIVITY_CALIBRATION)); - - /* update dc disconnect level after toggle */ - update_dc_disconnect_level(rtk_phy, phy_parameter, true); - -out: - return; -} - -static int do_rtk_phy_init(struct rtk_phy *rtk_phy, int index) -{ - struct phy_parameter *phy_parameter; - struct phy_cfg *phy_cfg; - struct phy_data *phy_data_page; - struct phy_reg *phy_reg; - int i; - - phy_cfg = rtk_phy->phy_cfg; - phy_parameter = &((struct phy_parameter *)rtk_phy->phy_parameter)[index]; - phy_reg = &phy_parameter->phy_reg; - - if (phy_cfg->use_default_parameter) { - dev_dbg(rtk_phy->dev, "%s phy#%d use default parameter\n", - __func__, index); - goto do_toggle; - } - - /* Set page 0 */ - phy_data_page = phy_cfg->page0; - rtk_phy_set_page(phy_reg, 0); - - for (i = 0; i < phy_cfg->page0_size; i++) { - struct phy_data *phy_data = phy_data_page + i; - u8 addr = phy_data->addr; - u8 data = phy_data->data; - - if (!addr) - continue; - - if (rtk_phy_write(phy_reg, addr, data)) { - dev_err(rtk_phy->dev, - "%s: Error to set page0 parameter addr=0x%x value=0x%x\n", - __func__, addr, data); - return -EINVAL; - } - } - - /* Set page 1 */ - phy_data_page = phy_cfg->page1; - rtk_phy_set_page(phy_reg, 1); - - for (i = 0; i < phy_cfg->page1_size; i++) { - struct phy_data *phy_data = phy_data_page + i; - u8 addr = phy_data->addr; - u8 data = phy_data->data; - - if (!addr) - continue; - - if (rtk_phy_write(phy_reg, addr, data)) { - dev_err(rtk_phy->dev, - "%s: Error to set page1 parameter addr=0x%x value=0x%x\n", - __func__, addr, data); - return -EINVAL; - } - } - - if (phy_cfg->page2_size == 0) - goto do_toggle; - - /* Set page 2 */ - phy_data_page = phy_cfg->page2; - rtk_phy_set_page(phy_reg, 2); - - for (i = 0; i < phy_cfg->page2_size; i++) { - struct phy_data *phy_data = phy_data_page + i; - u8 addr = phy_data->addr; - u8 data = phy_data->data; - - if (!addr) - continue; - - if (rtk_phy_write(phy_reg, addr, data)) { - dev_err(rtk_phy->dev, - "%s: Error to set page2 parameter addr=0x%x value=0x%x\n", - __func__, addr, data); - return -EINVAL; - } - } - -do_toggle: - do_rtk_phy_toggle(rtk_phy, index, false); - - return 0; -} - -static int rtk_phy_init(struct phy *phy) -{ - struct rtk_phy *rtk_phy = phy_get_drvdata(phy); - unsigned long phy_init_time = jiffies; - int i, ret = 0; - - if (!rtk_phy) - return -EINVAL; - - for (i = 0; i < rtk_phy->num_phy; i++) - ret = do_rtk_phy_init(rtk_phy, i); - - dev_dbg(rtk_phy->dev, "Initialized RTK USB 2.0 PHY (take %dms)\n", - jiffies_to_msecs(jiffies - phy_init_time)); - return ret; -} - -static int rtk_phy_exit(struct phy *phy) -{ - return 0; -} - -static const struct phy_ops ops = { - .init = rtk_phy_init, - .exit = rtk_phy_exit, - .owner = THIS_MODULE, -}; - -static void rtk_phy_toggle(struct usb_phy *usb2_phy, bool connect, int port) -{ - int index = port; - struct rtk_phy *rtk_phy = NULL; - - rtk_phy = dev_get_drvdata(usb2_phy->dev); - - if (index > rtk_phy->num_phy) { - dev_err(rtk_phy->dev, "%s: The port=%d is not in usb phy (num_phy=%d)\n", - __func__, index, rtk_phy->num_phy); - return; - } - - do_rtk_phy_toggle(rtk_phy, index, connect); -} - -static int rtk_phy_notify_port_status(struct usb_phy *x, int port, - u16 portstatus, u16 portchange) -{ - bool connect = false; - - pr_debug("%s port=%d portstatus=0x%x portchange=0x%x\n", - __func__, port, (int)portstatus, (int)portchange); - if (portstatus & USB_PORT_STAT_CONNECTION) - connect = true; - - if (portchange & USB_PORT_STAT_C_CONNECTION) - rtk_phy_toggle(x, connect, port); - - return 0; -} - -#ifdef CONFIG_DEBUG_FS -static struct dentry *create_phy_debug_root(void) -{ - struct dentry *phy_debug_root; - - phy_debug_root = debugfs_lookup("phy", usb_debug_root); - if (!phy_debug_root) - phy_debug_root = debugfs_create_dir("phy", usb_debug_root); - - return phy_debug_root; -} - -static int rtk_usb2_parameter_show(struct seq_file *s, void *unused) -{ - struct rtk_phy *rtk_phy = s->private; - struct phy_cfg *phy_cfg; - int i, index; - - phy_cfg = rtk_phy->phy_cfg; - - seq_puts(s, "Property:\n"); - seq_printf(s, " check_efuse: %s\n", - phy_cfg->check_efuse ? "Enable" : "Disable"); - seq_printf(s, " check_efuse_version: %d\n", - phy_cfg->check_efuse_version); - seq_printf(s, " efuse_dc_driving_rate: %d\n", - phy_cfg->efuse_dc_driving_rate); - seq_printf(s, " dc_driving_mask: 0x%x\n", - phy_cfg->dc_driving_mask); - seq_printf(s, " efuse_dc_disconnect_rate: %d\n", - phy_cfg->efuse_dc_disconnect_rate); - seq_printf(s, " dc_disconnect_mask: 0x%x\n", - phy_cfg->dc_disconnect_mask); - seq_printf(s, " usb_dc_disconnect_at_page0: %s\n", - phy_cfg->usb_dc_disconnect_at_page0 ? "true" : "false"); - seq_printf(s, " do_toggle: %s\n", - phy_cfg->do_toggle ? "Enable" : "Disable"); - seq_printf(s, " do_toggle_driving: %s\n", - phy_cfg->do_toggle_driving ? "Enable" : "Disable"); - seq_printf(s, " driving_updated_for_dev_dis: 0x%x\n", - phy_cfg->driving_updated_for_dev_dis); - seq_printf(s, " use_default_parameter: %s\n", - phy_cfg->use_default_parameter ? "Enable" : "Disable"); - seq_printf(s, " is_double_sensitivity_mode: %s\n", - phy_cfg->is_double_sensitivity_mode ? "Enable" : "Disable"); - - for (index = 0; index < rtk_phy->num_phy; index++) { - struct phy_parameter *phy_parameter; - struct phy_reg *phy_reg; - struct phy_data *phy_data_page; - - phy_parameter = &((struct phy_parameter *)rtk_phy->phy_parameter)[index]; - phy_reg = &phy_parameter->phy_reg; - - seq_printf(s, "PHY %d:\n", index); - - seq_puts(s, "Page 0:\n"); - /* Set page 0 */ - phy_data_page = phy_cfg->page0; - rtk_phy_set_page(phy_reg, 0); - - for (i = 0; i < phy_cfg->page0_size; i++) { - struct phy_data *phy_data = phy_data_page + i; - u8 addr = array_index_to_page_addr(i); - u8 data = phy_data->data; - u8 value = rtk_phy_read(phy_reg, addr); - - if (phy_data->addr) - seq_printf(s, " Page 0: addr=0x%x data=0x%02x ==> read value=0x%02x\n", - addr, data, value); - else - seq_printf(s, " Page 0: addr=0x%x data=none ==> read value=0x%02x\n", - addr, value); - } - - seq_puts(s, "Page 1:\n"); - /* Set page 1 */ - phy_data_page = phy_cfg->page1; - rtk_phy_set_page(phy_reg, 1); - - for (i = 0; i < phy_cfg->page1_size; i++) { - struct phy_data *phy_data = phy_data_page + i; - u8 addr = array_index_to_page_addr(i); - u8 data = phy_data->data; - u8 value = rtk_phy_read(phy_reg, addr); - - if (phy_data->addr) - seq_printf(s, " Page 1: addr=0x%x data=0x%02x ==> read value=0x%02x\n", - addr, data, value); - else - seq_printf(s, " Page 1: addr=0x%x data=none ==> read value=0x%02x\n", - addr, value); - } - - if (phy_cfg->page2_size == 0) - goto out; - - seq_puts(s, "Page 2:\n"); - /* Set page 2 */ - phy_data_page = phy_cfg->page2; - rtk_phy_set_page(phy_reg, 2); - - for (i = 0; i < phy_cfg->page2_size; i++) { - struct phy_data *phy_data = phy_data_page + i; - u8 addr = array_index_to_page_addr(i); - u8 data = phy_data->data; - u8 value = rtk_phy_read(phy_reg, addr); - - if (phy_data->addr) - seq_printf(s, " Page 2: addr=0x%x data=0x%02x ==> read value=0x%02x\n", - addr, data, value); - else - seq_printf(s, " Page 2: addr=0x%x data=none ==> read value=0x%02x\n", - addr, value); - } - -out: - seq_puts(s, "PHY Property:\n"); - seq_printf(s, " efuse_usb_dc_cal: %d\n", - (int)phy_parameter->efuse_usb_dc_cal); - seq_printf(s, " efuse_usb_dc_dis: %d\n", - (int)phy_parameter->efuse_usb_dc_dis); - seq_printf(s, " inverse_hstx_sync_clock: %s\n", - phy_parameter->inverse_hstx_sync_clock ? "Enable" : "Disable"); - seq_printf(s, " driving_level: %d\n", - phy_parameter->driving_level); - seq_printf(s, " driving_level_compensate: %d\n", - phy_parameter->driving_level_compensate); - seq_printf(s, " disconnection_compensate: %d\n", - phy_parameter->disconnection_compensate); - } - - return 0; -} -DEFINE_SHOW_ATTRIBUTE(rtk_usb2_parameter); - -static inline void create_debug_files(struct rtk_phy *rtk_phy) -{ - struct dentry *phy_debug_root = NULL; - - phy_debug_root = create_phy_debug_root(); - if (!phy_debug_root) - return; - - rtk_phy->debug_dir = debugfs_create_dir(dev_name(rtk_phy->dev), - phy_debug_root); - - debugfs_create_file("parameter", 0444, rtk_phy->debug_dir, rtk_phy, - &rtk_usb2_parameter_fops); - - return; -} - -static inline void remove_debug_files(struct rtk_phy *rtk_phy) -{ - debugfs_remove_recursive(rtk_phy->debug_dir); -} -#else -static inline void create_debug_files(struct rtk_phy *rtk_phy) { } -static inline void remove_debug_files(struct rtk_phy *rtk_phy) { } -#endif /* CONFIG_DEBUG_FS */ - -static int get_phy_data_by_efuse(struct rtk_phy *rtk_phy, - struct phy_parameter *phy_parameter, int index) -{ - struct phy_cfg *phy_cfg = rtk_phy->phy_cfg; - u8 value = 0; - struct nvmem_cell *cell; - struct soc_device_attribute rtk_soc_groot[] = { - { .family = "Realtek Groot",}, - { /* empty */ } }; - - if (!phy_cfg->check_efuse) - goto out; - - /* Read efuse for usb dc cal */ - cell = nvmem_cell_get(rtk_phy->dev, "usb-dc-cal"); - if (IS_ERR(cell)) { - dev_dbg(rtk_phy->dev, "%s no usb-dc-cal: %ld\n", - __func__, PTR_ERR(cell)); - } else { - unsigned char *buf; - size_t buf_size; - - buf = nvmem_cell_read(cell, &buf_size); - if (!IS_ERR(buf)) { - value = buf[0] & phy_cfg->dc_driving_mask; - kfree(buf); - } - nvmem_cell_put(cell); - } - - if (phy_cfg->check_efuse_version == CHECK_EFUSE_V1) { - int rate = phy_cfg->efuse_dc_driving_rate; - - if (value <= EFUS_USB_DC_CAL_MAX) - phy_parameter->efuse_usb_dc_cal = (int8_t)(value * rate); - else - phy_parameter->efuse_usb_dc_cal = -(int8_t) - ((EFUS_USB_DC_CAL_MAX & value) * rate); - - if (soc_device_match(rtk_soc_groot)) { - dev_dbg(rtk_phy->dev, "For groot IC we need a workaround to adjust efuse_usb_dc_cal\n"); - - /* We don't multiple dc_cal_rate=2 for positive dc cal compensate */ - if (value <= EFUS_USB_DC_CAL_MAX) - phy_parameter->efuse_usb_dc_cal = (int8_t)(value); - - /* We set max dc cal compensate is 0x8 if otp is 0x7 */ - if (value == 0x7) - phy_parameter->efuse_usb_dc_cal = (int8_t)(value + 1); - } - } else { /* for CHECK_EFUSE_V2 */ - phy_parameter->efuse_usb_dc_cal = value & phy_cfg->dc_driving_mask; - } - - /* Read efuse for usb dc disconnect level */ - value = 0; - cell = nvmem_cell_get(rtk_phy->dev, "usb-dc-dis"); - if (IS_ERR(cell)) { - dev_dbg(rtk_phy->dev, "%s no usb-dc-dis: %ld\n", - __func__, PTR_ERR(cell)); - } else { - unsigned char *buf; - size_t buf_size; - - buf = nvmem_cell_read(cell, &buf_size); - if (!IS_ERR(buf)) { - value = buf[0] & phy_cfg->dc_disconnect_mask; - kfree(buf); - } - nvmem_cell_put(cell); - } - - if (phy_cfg->check_efuse_version == CHECK_EFUSE_V1) { - int rate = phy_cfg->efuse_dc_disconnect_rate; - - if (value <= EFUS_USB_DC_DIS_MAX) - phy_parameter->efuse_usb_dc_dis = (int8_t)(value * rate); - else - phy_parameter->efuse_usb_dc_dis = -(int8_t) - ((EFUS_USB_DC_DIS_MAX & value) * rate); - } else { /* for CHECK_EFUSE_V2 */ - phy_parameter->efuse_usb_dc_dis = value & phy_cfg->dc_disconnect_mask; - } - -out: - return 0; -} - -static int parse_phy_data(struct rtk_phy *rtk_phy) -{ - struct device *dev = rtk_phy->dev; - struct device_node *np = dev->of_node; - struct phy_parameter *phy_parameter; - int ret = 0; - int index; - - rtk_phy->phy_parameter = devm_kzalloc(dev, sizeof(struct phy_parameter) * - rtk_phy->num_phy, GFP_KERNEL); - if (!rtk_phy->phy_parameter) - return -ENOMEM; - - for (index = 0; index < rtk_phy->num_phy; index++) { - phy_parameter = &((struct phy_parameter *)rtk_phy->phy_parameter)[index]; - - phy_parameter->phy_reg.reg_wrap_vstatus = of_iomap(np, 0); - phy_parameter->phy_reg.reg_gusb2phyacc0 = of_iomap(np, 1) + index; - phy_parameter->phy_reg.vstatus_index = index; - - if (of_property_read_bool(np, "realtek,inverse-hstx-sync-clock")) - phy_parameter->inverse_hstx_sync_clock = true; - else - phy_parameter->inverse_hstx_sync_clock = false; - - if (of_property_read_u32_index(np, "realtek,driving-level", - index, &phy_parameter->driving_level)) - phy_parameter->driving_level = DEFAULT_DC_DRIVING_VALUE; - - if (of_property_read_u32_index(np, "realtek,driving-level-compensate", - index, &phy_parameter->driving_level_compensate)) - phy_parameter->driving_level_compensate = 0; - - if (of_property_read_u32_index(np, "realtek,disconnection-compensate", - index, &phy_parameter->disconnection_compensate)) - phy_parameter->disconnection_compensate = 0; - - get_phy_data_by_efuse(rtk_phy, phy_parameter, index); - - update_dc_driving_level(rtk_phy, phy_parameter); - - update_hs_clk_select(rtk_phy, phy_parameter); - } - - return ret; -} - -static int rtk_usb2phy_probe(struct platform_device *pdev) -{ - struct rtk_phy *rtk_phy; - struct device *dev = &pdev->dev; - struct phy *generic_phy; - struct phy_provider *phy_provider; - const struct phy_cfg *phy_cfg; - int ret = 0; - - phy_cfg = of_device_get_match_data(dev); - if (!phy_cfg) { - dev_err(dev, "phy config are not assigned!\n"); - return -EINVAL; - } - - rtk_phy = devm_kzalloc(dev, sizeof(*rtk_phy), GFP_KERNEL); - if (!rtk_phy) - return -ENOMEM; - - rtk_phy->dev = &pdev->dev; - rtk_phy->phy.dev = rtk_phy->dev; - rtk_phy->phy.label = "rtk-usb2phy"; - rtk_phy->phy.notify_port_status = rtk_phy_notify_port_status; - - rtk_phy->phy_cfg = devm_kzalloc(dev, sizeof(*phy_cfg), GFP_KERNEL); - - memcpy(rtk_phy->phy_cfg, phy_cfg, sizeof(*phy_cfg)); - - rtk_phy->num_phy = phy_cfg->num_phy; - - ret = parse_phy_data(rtk_phy); - if (ret) - goto err; - - platform_set_drvdata(pdev, rtk_phy); - - generic_phy = devm_phy_create(rtk_phy->dev, NULL, &ops); - if (IS_ERR(generic_phy)) - return PTR_ERR(generic_phy); - - phy_set_drvdata(generic_phy, rtk_phy); - - phy_provider = devm_of_phy_provider_register(rtk_phy->dev, - of_phy_simple_xlate); - if (IS_ERR(phy_provider)) - return PTR_ERR(phy_provider); - - ret = usb_add_phy_dev(&rtk_phy->phy); - if (ret) - goto err; - - create_debug_files(rtk_phy); - -err: - return ret; -} - -static void rtk_usb2phy_remove(struct platform_device *pdev) -{ - struct rtk_phy *rtk_phy = platform_get_drvdata(pdev); - - remove_debug_files(rtk_phy); - - usb_remove_phy(&rtk_phy->phy); -} - -static const struct phy_cfg rtd1295_phy_cfg = { - .page0_size = MAX_USB_PHY_PAGE0_DATA_SIZE, - .page0 = { [0] = {0xe0, 0x90}, - [3] = {0xe3, 0x3a}, - [4] = {0xe4, 0x68}, - [6] = {0xe6, 0x91}, - [13] = {0xf5, 0x81}, - [15] = {0xf7, 0x02}, }, - .page1_size = 8, - .page1 = { /* default parameter */ }, - .page2_size = 0, - .page2 = { /* no parameter */ }, - .num_phy = 1, - .check_efuse = false, - .check_efuse_version = CHECK_EFUSE_V1, - .efuse_dc_driving_rate = 1, - .dc_driving_mask = 0xf, - .efuse_dc_disconnect_rate = EFUS_USB_DC_DIS_RATE, - .dc_disconnect_mask = 0xf, - .usb_dc_disconnect_at_page0 = true, - .do_toggle = true, - .do_toggle_driving = false, - .driving_updated_for_dev_dis = 0xf, - .use_default_parameter = false, - .is_double_sensitivity_mode = false, -}; - -static const struct phy_cfg rtd1395_phy_cfg = { - .page0_size = MAX_USB_PHY_PAGE0_DATA_SIZE, - .page0 = { [4] = {0xe4, 0xac}, - [13] = {0xf5, 0x00}, - [15] = {0xf7, 0x02}, }, - .page1_size = 8, - .page1 = { /* default parameter */ }, - .page2_size = 0, - .page2 = { /* no parameter */ }, - .num_phy = 1, - .check_efuse = false, - .check_efuse_version = CHECK_EFUSE_V1, - .efuse_dc_driving_rate = 1, - .dc_driving_mask = 0xf, - .efuse_dc_disconnect_rate = EFUS_USB_DC_DIS_RATE, - .dc_disconnect_mask = 0xf, - .usb_dc_disconnect_at_page0 = true, - .do_toggle = true, - .do_toggle_driving = false, - .driving_updated_for_dev_dis = 0xf, - .use_default_parameter = false, - .is_double_sensitivity_mode = false, -}; - -static const struct phy_cfg rtd1395_phy_cfg_2port = { - .page0_size = MAX_USB_PHY_PAGE0_DATA_SIZE, - .page0 = { [4] = {0xe4, 0xac}, - [13] = {0xf5, 0x00}, - [15] = {0xf7, 0x02}, }, - .page1_size = 8, - .page1 = { /* default parameter */ }, - .page2_size = 0, - .page2 = { /* no parameter */ }, - .num_phy = 2, - .check_efuse = false, - .check_efuse_version = CHECK_EFUSE_V1, - .efuse_dc_driving_rate = 1, - .dc_driving_mask = 0xf, - .efuse_dc_disconnect_rate = EFUS_USB_DC_DIS_RATE, - .dc_disconnect_mask = 0xf, - .usb_dc_disconnect_at_page0 = true, - .do_toggle = true, - .do_toggle_driving = false, - .driving_updated_for_dev_dis = 0xf, - .use_default_parameter = false, - .is_double_sensitivity_mode = false, -}; - -static const struct phy_cfg rtd1619_phy_cfg = { - .page0_size = MAX_USB_PHY_PAGE0_DATA_SIZE, - .page0 = { [4] = {0xe4, 0x68}, }, - .page1_size = 8, - .page1 = { /* default parameter */ }, - .page2_size = 0, - .page2 = { /* no parameter */ }, - .num_phy = 1, - .check_efuse = true, - .check_efuse_version = CHECK_EFUSE_V1, - .efuse_dc_driving_rate = 1, - .dc_driving_mask = 0xf, - .efuse_dc_disconnect_rate = EFUS_USB_DC_DIS_RATE, - .dc_disconnect_mask = 0xf, - .usb_dc_disconnect_at_page0 = true, - .do_toggle = true, - .do_toggle_driving = false, - .driving_updated_for_dev_dis = 0xf, - .use_default_parameter = false, - .is_double_sensitivity_mode = false, -}; - -static const struct phy_cfg rtd1319_phy_cfg = { - .page0_size = MAX_USB_PHY_PAGE0_DATA_SIZE, - .page0 = { [0] = {0xe0, 0x18}, - [4] = {0xe4, 0x6a}, - [7] = {0xe7, 0x71}, - [13] = {0xf5, 0x15}, - [15] = {0xf7, 0x32}, }, - .page1_size = 8, - .page1 = { [3] = {0xe3, 0x44}, }, - .page2_size = MAX_USB_PHY_PAGE2_DATA_SIZE, - .page2 = { [0] = {0xe0, 0x01}, }, - .num_phy = 1, - .check_efuse = true, - .check_efuse_version = CHECK_EFUSE_V1, - .efuse_dc_driving_rate = 1, - .dc_driving_mask = 0xf, - .efuse_dc_disconnect_rate = EFUS_USB_DC_DIS_RATE, - .dc_disconnect_mask = 0xf, - .usb_dc_disconnect_at_page0 = true, - .do_toggle = true, - .do_toggle_driving = true, - .driving_updated_for_dev_dis = 0xf, - .use_default_parameter = false, - .is_double_sensitivity_mode = true, -}; - -static const struct phy_cfg rtd1312c_phy_cfg = { - .page0_size = MAX_USB_PHY_PAGE0_DATA_SIZE, - .page0 = { [0] = {0xe0, 0x14}, - [4] = {0xe4, 0x67}, - [5] = {0xe5, 0x55}, }, - .page1_size = 8, - .page1 = { [3] = {0xe3, 0x23}, - [6] = {0xe6, 0x58}, }, - .page2_size = MAX_USB_PHY_PAGE2_DATA_SIZE, - .page2 = { /* default parameter */ }, - .num_phy = 1, - .check_efuse = true, - .check_efuse_version = CHECK_EFUSE_V1, - .efuse_dc_driving_rate = 1, - .dc_driving_mask = 0xf, - .efuse_dc_disconnect_rate = EFUS_USB_DC_DIS_RATE, - .dc_disconnect_mask = 0xf, - .usb_dc_disconnect_at_page0 = true, - .do_toggle = true, - .do_toggle_driving = true, - .driving_updated_for_dev_dis = 0xf, - .use_default_parameter = false, - .is_double_sensitivity_mode = true, -}; - -static const struct phy_cfg rtd1619b_phy_cfg = { - .page0_size = MAX_USB_PHY_PAGE0_DATA_SIZE, - .page0 = { [0] = {0xe0, 0xa3}, - [4] = {0xe4, 0x88}, - [5] = {0xe5, 0x4f}, - [6] = {0xe6, 0x02}, }, - .page1_size = 8, - .page1 = { [3] = {0xe3, 0x64}, }, - .page2_size = MAX_USB_PHY_PAGE2_DATA_SIZE, - .page2 = { [7] = {0xe7, 0x45}, }, - .num_phy = 1, - .check_efuse = true, - .check_efuse_version = CHECK_EFUSE_V1, - .efuse_dc_driving_rate = EFUS_USB_DC_CAL_RATE, - .dc_driving_mask = 0x1f, - .efuse_dc_disconnect_rate = EFUS_USB_DC_DIS_RATE, - .dc_disconnect_mask = 0xf, - .usb_dc_disconnect_at_page0 = false, - .do_toggle = true, - .do_toggle_driving = true, - .driving_updated_for_dev_dis = 0x8, - .use_default_parameter = false, - .is_double_sensitivity_mode = true, -}; - -static const struct phy_cfg rtd1319d_phy_cfg = { - .page0_size = MAX_USB_PHY_PAGE0_DATA_SIZE, - .page0 = { [0] = {0xe0, 0xa3}, - [4] = {0xe4, 0x8e}, - [5] = {0xe5, 0x4f}, - [6] = {0xe6, 0x02}, }, - .page1_size = MAX_USB_PHY_PAGE1_DATA_SIZE, - .page1 = { [14] = {0xf5, 0x1}, }, - .page2_size = MAX_USB_PHY_PAGE2_DATA_SIZE, - .page2 = { [7] = {0xe7, 0x44}, }, - .check_efuse = true, - .num_phy = 1, - .check_efuse_version = CHECK_EFUSE_V1, - .efuse_dc_driving_rate = EFUS_USB_DC_CAL_RATE, - .dc_driving_mask = 0x1f, - .efuse_dc_disconnect_rate = EFUS_USB_DC_DIS_RATE, - .dc_disconnect_mask = 0xf, - .usb_dc_disconnect_at_page0 = false, - .do_toggle = true, - .do_toggle_driving = false, - .driving_updated_for_dev_dis = 0x8, - .use_default_parameter = false, - .is_double_sensitivity_mode = true, -}; - -static const struct phy_cfg rtd1315e_phy_cfg = { - .page0_size = MAX_USB_PHY_PAGE0_DATA_SIZE, - .page0 = { [0] = {0xe0, 0xa3}, - [4] = {0xe4, 0x8c}, - [5] = {0xe5, 0x4f}, - [6] = {0xe6, 0x02}, }, - .page1_size = MAX_USB_PHY_PAGE1_DATA_SIZE, - .page1 = { [3] = {0xe3, 0x7f}, - [14] = {0xf5, 0x01}, }, - .page2_size = MAX_USB_PHY_PAGE2_DATA_SIZE, - .page2 = { [7] = {0xe7, 0x44}, }, - .num_phy = 1, - .check_efuse = true, - .check_efuse_version = CHECK_EFUSE_V2, - .efuse_dc_driving_rate = EFUS_USB_DC_CAL_RATE, - .dc_driving_mask = 0x1f, - .efuse_dc_disconnect_rate = EFUS_USB_DC_DIS_RATE, - .dc_disconnect_mask = 0xf, - .usb_dc_disconnect_at_page0 = false, - .do_toggle = true, - .do_toggle_driving = false, - .driving_updated_for_dev_dis = 0x8, - .use_default_parameter = false, - .is_double_sensitivity_mode = true, -}; - -static const struct of_device_id usbphy_rtk_dt_match[] = { - { .compatible = "realtek,rtd1295-usb2phy", .data = &rtd1295_phy_cfg }, - { .compatible = "realtek,rtd1312c-usb2phy", .data = &rtd1312c_phy_cfg }, - { .compatible = "realtek,rtd1315e-usb2phy", .data = &rtd1315e_phy_cfg }, - { .compatible = "realtek,rtd1319-usb2phy", .data = &rtd1319_phy_cfg }, - { .compatible = "realtek,rtd1319d-usb2phy", .data = &rtd1319d_phy_cfg }, - { .compatible = "realtek,rtd1395-usb2phy", .data = &rtd1395_phy_cfg }, - { .compatible = "realtek,rtd1395-usb2phy-2port", .data = &rtd1395_phy_cfg_2port }, - { .compatible = "realtek,rtd1619-usb2phy", .data = &rtd1619_phy_cfg }, - { .compatible = "realtek,rtd1619b-usb2phy", .data = &rtd1619b_phy_cfg }, - {}, -}; -MODULE_DEVICE_TABLE(of, usbphy_rtk_dt_match); - -static struct platform_driver rtk_usb2phy_driver = { - .probe = rtk_usb2phy_probe, - .remove_new = rtk_usb2phy_remove, - .driver = { - .name = "rtk-usb2phy", - .of_match_table = usbphy_rtk_dt_match, - }, -}; - -module_platform_driver(rtk_usb2phy_driver); - -MODULE_LICENSE("GPL"); -MODULE_ALIAS("platform: rtk-usb2phy"); -MODULE_AUTHOR("Stanley Chang "); -MODULE_DESCRIPTION("Realtek usb 2.0 phy driver"); From 1cd8c353708de99d8bfa7db8a0c961a800b1fa7f Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Mon, 26 Dec 2022 11:24:56 +0000 Subject: [PATCH 0615/1397] cifs: distribute channels across interfaces based on speed [ Upstream commit a6d8fb54a515f0546ffdb7870102b1238917e567 ] Today, if the server interfaces RSS capable, we simply choose the fastest interface to setup a channel. This is not a scalable approach, and does not make a lot of attempt to distribute the connections. This change does a weighted distribution of channels across all the available server interfaces, where the weight is a function of the advertised interface speed. Also make sure that we don't mix rdma and non-rdma for channels. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French Stable-dep-of: fa1d0508bdd4 ("cifs: account for primary channel in the interface list") Signed-off-by: Sasha Levin --- fs/smb/client/cifs_debug.c | 16 ++++++++ fs/smb/client/cifsglob.h | 2 + fs/smb/client/sess.c | 84 +++++++++++++++++++++++++++++++------- 3 files changed, 88 insertions(+), 14 deletions(-) diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index 9a0ccd87468e..16282ecfe17a 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -279,6 +279,8 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) struct cifs_ses *ses; struct cifs_tcon *tcon; struct cifs_server_iface *iface; + size_t iface_weight = 0, iface_min_speed = 0; + struct cifs_server_iface *last_iface = NULL; int c, i, j; seq_puts(m, @@ -542,11 +544,25 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) "\tLast updated: %lu seconds ago", ses->iface_count, (jiffies - ses->iface_last_update) / HZ); + + last_iface = list_last_entry(&ses->iface_list, + struct cifs_server_iface, + iface_head); + iface_min_speed = last_iface->speed; + j = 0; list_for_each_entry(iface, &ses->iface_list, iface_head) { seq_printf(m, "\n\t%d)", ++j); cifs_dump_iface(m, iface); + + iface_weight = iface->speed / iface_min_speed; + seq_printf(m, "\t\tWeight (cur,total): (%zu,%zu)" + "\n\t\tAllocated channels: %u\n", + iface->weight_fulfilled, + iface_weight, + iface->num_channels); + if (is_ses_using_iface(ses, iface)) seq_puts(m, "\t\t[CONNECTED]\n"); } diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index e55f49e278a2..b8d1c19f6771 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -969,6 +969,8 @@ struct cifs_server_iface { struct list_head iface_head; struct kref refcount; size_t speed; + size_t weight_fulfilled; + unsigned int num_channels; unsigned int rdma_capable : 1; unsigned int rss_capable : 1; unsigned int is_active : 1; /* unset if non existent */ diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index 61cc7c415491..65545e65f1eb 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -164,7 +164,9 @@ int cifs_try_adding_channels(struct cifs_ses *ses) int left; int rc = 0; int tries = 0; + size_t iface_weight = 0, iface_min_speed = 0; struct cifs_server_iface *iface = NULL, *niface = NULL; + struct cifs_server_iface *last_iface = NULL; spin_lock(&ses->chan_lock); @@ -192,21 +194,11 @@ int cifs_try_adding_channels(struct cifs_ses *ses) } spin_unlock(&ses->chan_lock); - /* - * Keep connecting to same, fastest, iface for all channels as - * long as its RSS. Try next fastest one if not RSS or channel - * creation fails. - */ - spin_lock(&ses->iface_lock); - iface = list_first_entry(&ses->iface_list, struct cifs_server_iface, - iface_head); - spin_unlock(&ses->iface_lock); - while (left > 0) { tries++; if (tries > 3*ses->chan_max) { - cifs_dbg(FYI, "too many channel open attempts (%d channels left to open)\n", + cifs_dbg(VFS, "too many channel open attempts (%d channels left to open)\n", left); break; } @@ -214,17 +206,35 @@ int cifs_try_adding_channels(struct cifs_ses *ses) spin_lock(&ses->iface_lock); if (!ses->iface_count) { spin_unlock(&ses->iface_lock); + cifs_dbg(VFS, "server %s does not advertise interfaces\n", + ses->server->hostname); break; } + if (!iface) + iface = list_first_entry(&ses->iface_list, struct cifs_server_iface, + iface_head); + last_iface = list_last_entry(&ses->iface_list, struct cifs_server_iface, + iface_head); + iface_min_speed = last_iface->speed; + list_for_each_entry_safe_from(iface, niface, &ses->iface_list, iface_head) { + /* do not mix rdma and non-rdma interfaces */ + if (iface->rdma_capable != ses->server->rdma) + continue; + /* skip ifaces that are unusable */ if (!iface->is_active || (is_ses_using_iface(ses, iface) && - !iface->rss_capable)) { + !iface->rss_capable)) + continue; + + /* check if we already allocated enough channels */ + iface_weight = iface->speed / iface_min_speed; + + if (iface->weight_fulfilled >= iface_weight) continue; - } /* take ref before unlock */ kref_get(&iface->refcount); @@ -241,10 +251,21 @@ int cifs_try_adding_channels(struct cifs_ses *ses) continue; } - cifs_dbg(FYI, "successfully opened new channel on iface:%pIS\n", + iface->num_channels++; + iface->weight_fulfilled++; + cifs_dbg(VFS, "successfully opened new channel on iface:%pIS\n", &iface->sockaddr); break; } + + /* reached end of list. reset weight_fulfilled and start over */ + if (list_entry_is_head(iface, &ses->iface_list, iface_head)) { + list_for_each_entry(iface, &ses->iface_list, iface_head) + iface->weight_fulfilled = 0; + spin_unlock(&ses->iface_lock); + iface = NULL; + continue; + } spin_unlock(&ses->iface_lock); left--; @@ -263,8 +284,10 @@ int cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) { unsigned int chan_index; + size_t iface_weight = 0, iface_min_speed = 0; struct cifs_server_iface *iface = NULL; struct cifs_server_iface *old_iface = NULL; + struct cifs_server_iface *last_iface = NULL; int rc = 0; spin_lock(&ses->chan_lock); @@ -284,13 +307,34 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) spin_unlock(&ses->chan_lock); spin_lock(&ses->iface_lock); + if (!ses->iface_count) { + spin_unlock(&ses->iface_lock); + cifs_dbg(VFS, "server %s does not advertise interfaces\n", ses->server->hostname); + return 0; + } + + last_iface = list_last_entry(&ses->iface_list, struct cifs_server_iface, + iface_head); + iface_min_speed = last_iface->speed; + /* then look for a new one */ list_for_each_entry(iface, &ses->iface_list, iface_head) { + /* do not mix rdma and non-rdma interfaces */ + if (iface->rdma_capable != server->rdma) + continue; + if (!iface->is_active || (is_ses_using_iface(ses, iface) && !iface->rss_capable)) { continue; } + + /* check if we already allocated enough channels */ + iface_weight = iface->speed / iface_min_speed; + + if (iface->weight_fulfilled >= iface_weight) + continue; + kref_get(&iface->refcount); break; } @@ -306,10 +350,22 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) cifs_dbg(FYI, "replacing iface: %pIS with %pIS\n", &old_iface->sockaddr, &iface->sockaddr); + + old_iface->num_channels--; + if (old_iface->weight_fulfilled) + old_iface->weight_fulfilled--; + iface->num_channels++; + iface->weight_fulfilled++; + kref_put(&old_iface->refcount, release_iface); } else if (old_iface) { cifs_dbg(FYI, "releasing ref to iface: %pIS\n", &old_iface->sockaddr); + + old_iface->num_channels--; + if (old_iface->weight_fulfilled) + old_iface->weight_fulfilled--; + kref_put(&old_iface->refcount, release_iface); } else { WARN_ON(!iface); From d0b5a680a8ba155befd6df608cfc7bba594a8b81 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Tue, 14 Mar 2023 11:14:58 +0000 Subject: [PATCH 0616/1397] cifs: account for primary channel in the interface list [ Upstream commit fa1d0508bdd4a68c5e40f85f635712af8c12f180 ] The refcounting of server interfaces should account for the primary channel too. Although this is not strictly necessary, doing so will account for the primary channel in DebugData. Cc: stable@vger.kernel.org Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Shyam Prasad N Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/sess.c | 28 ++++++++++++++++++++++++++++ fs/smb/client/smb2ops.c | 6 ++++++ 2 files changed, 34 insertions(+) diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index 65545e65f1eb..80050e36f045 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -288,6 +288,7 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) struct cifs_server_iface *iface = NULL; struct cifs_server_iface *old_iface = NULL; struct cifs_server_iface *last_iface = NULL; + struct sockaddr_storage ss; int rc = 0; spin_lock(&ses->chan_lock); @@ -306,6 +307,10 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) } spin_unlock(&ses->chan_lock); + spin_lock(&server->srv_lock); + ss = server->dstaddr; + spin_unlock(&server->srv_lock); + spin_lock(&ses->iface_lock); if (!ses->iface_count) { spin_unlock(&ses->iface_lock); @@ -319,6 +324,16 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) /* then look for a new one */ list_for_each_entry(iface, &ses->iface_list, iface_head) { + if (!chan_index) { + /* if we're trying to get the updated iface for primary channel */ + if (!cifs_match_ipaddr((struct sockaddr *) &ss, + (struct sockaddr *) &iface->sockaddr)) + continue; + + kref_get(&iface->refcount); + break; + } + /* do not mix rdma and non-rdma interfaces */ if (iface->rdma_capable != server->rdma) continue; @@ -345,6 +360,13 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) cifs_dbg(FYI, "unable to find a suitable iface\n"); } + if (!chan_index && !iface) { + cifs_dbg(FYI, "unable to get the interface matching: %pIS\n", + &ss); + spin_unlock(&ses->iface_lock); + return 0; + } + /* now drop the ref to the current iface */ if (old_iface && iface) { cifs_dbg(FYI, "replacing iface: %pIS with %pIS\n", @@ -367,6 +389,12 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) old_iface->weight_fulfilled--; kref_put(&old_iface->refcount, release_iface); + } else if (!chan_index) { + /* special case: update interface for primary channel */ + cifs_dbg(FYI, "referencing primary channel iface: %pIS\n", + &iface->sockaddr); + iface->num_channels++; + iface->weight_fulfilled++; } else { WARN_ON(!iface); cifs_dbg(FYI, "adding new iface: %pIS\n", &iface->sockaddr); diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 4af0085239b7..0f8fa78cd47b 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -756,6 +756,7 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon, bool in_ unsigned int ret_data_len = 0; struct network_interface_info_ioctl_rsp *out_buf = NULL; struct cifs_ses *ses = tcon->ses; + struct TCP_Server_Info *pserver; /* do not query too frequently */ if (ses->iface_last_update && @@ -780,6 +781,11 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon, bool in_ if (rc) goto out; + /* check if iface is still active */ + pserver = ses->chans[0].server; + if (pserver && !cifs_chan_is_iface_active(ses, pserver)) + cifs_chan_update_iface(ses, pserver); + out: kfree(out_buf); return rc; From 4875ef3e606bf66d1eba1509f7df08b87dda6843 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Tue, 14 Nov 2023 04:54:12 +0000 Subject: [PATCH 0617/1397] cifs: fix leak of iface for primary channel [ Upstream commit 29954d5b1e0d67a4cd61c30c2201030c97e94b1e ] My last change in this area introduced a change which accounted for primary channel in the interface ref count. However, it did not reduce this ref count on deallocation of the primary channel. i.e. during umount. Fixing this leak here, by dropping this ref count for primary channel while freeing up the session. Fixes: fa1d0508bdd4 ("cifs: account for primary channel in the interface list") Cc: stable@vger.kernel.org Reported-by: Paulo Alcantara Signed-off-by: Shyam Prasad N Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/connect.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index a9632c060bce..d517651d7bce 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -2034,6 +2034,12 @@ void __cifs_put_smb_ses(struct cifs_ses *ses) } } + /* we now account for primary channel in iface->refcount */ + if (ses->chans[0].iface) { + kref_put(&ses->chans[0].iface->refcount, release_iface); + ses->chans[0].server = NULL; + } + sesInfoFree(ses); cifs_put_tcp_session(server, 0); } From 97189072f122f006f4bcd80a0b920053922e073f Mon Sep 17 00:00:00 2001 From: Vitalii Torshyn Date: Thu, 9 Nov 2023 01:13:54 +0200 Subject: [PATCH 0618/1397] ALSA: hda: ASUS UM5302LA: Added quirks for cs35L41/10431A83 on i2c bus [ Upstream commit 6ae90e906aed727759b88eb2b000fcdc8fcd94a3 ] Proposed patch fixes initialization of CSC3551 on the UM5302LA laptop. Patching DSDT table is not required since ASUS did added _DSD entry. Nothing new introduced but reused work started by Stefan B. Currently there is no official firmware available for 10431A83 on cirrus git unfortunately. For testing used 104317f3 (which is also seems on i2c bus): $ cd /lib/firmware/cirrus/ && \ for fw in $(find ./ -name '*104317f3*'); do newfw=$(echo $fw | sed 's/104317f3/10431a83/g'); echo echo "$fw -> $newfw"; ln -s $f $newfw; done With the patch applied to 6.6.0 and obviously symlinks to 104317F3 FW, speakers works and to my susrprise they sound quite good and loud without distortion. Probably confirmation from cirrus team is needed on firmware. Signed-off-by: Vitalii Torshyn Link: https://bugzilla.kernel.org/show_bug.cgi?id=218119 Link: https://lore.kernel.org/r/CAHiQ-bCMPpCJ8eOYAaVVoqGkFixS1qTgSS4xfbZvL4oZV9LYew@mail.gmail.com Signed-off-by: Takashi Iwai Stable-dep-of: 61cbc08fdb04 ("ALSA: hda/realtek: Add quirks for ASUS 2024 Zenbooks") Signed-off-by: Sasha Levin --- sound/pci/hda/patch_realtek.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 14fc4191fe77..60e99389bcdc 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7366,6 +7366,7 @@ enum { ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD, ALC2XX_FIXUP_HEADSET_MIC, ALC289_FIXUP_DELL_CS35L41_SPI_2, + ALC294_FIXUP_CS35L41_I2C_2, }; /* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -9495,6 +9496,10 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC289_FIXUP_DUAL_SPK }, + [ALC294_FIXUP_CS35L41_I2C_2] = { + .type = HDA_FIXUP_FUNC, + .v.func = cs35l41_fixup_i2c_two, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -9864,6 +9869,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x19e1, "ASUS UX581LV", ALC295_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1a13, "Asus G73Jw", ALC269_FIXUP_ASUS_G73JW), SND_PCI_QUIRK(0x1043, 0x1a30, "ASUS X705UD", ALC256_FIXUP_ASUS_MIC), + SND_PCI_QUIRK(0x1043, 0x1a83, "ASUS UM5302LA", ALC294_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1a8f, "ASUS UX582ZS", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1b11, "ASUS UX431DA", ALC294_FIXUP_ASUS_COEF_1B), SND_PCI_QUIRK(0x1043, 0x1b13, "Asus U41SV", ALC269_FIXUP_INV_DMIC), From 432aa136c82c90a149560297d843c148ce933d32 Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Wed, 15 Nov 2023 16:21:15 +0000 Subject: [PATCH 0619/1397] ALSA: hda/realtek: Add quirks for ASUS 2024 Zenbooks [ Upstream commit 61cbc08fdb04fd445458b0f4cba7e6929afdfaef ] These ASUS Zenbook laptops use Realtek HDA codec combined with 2xCS35L41 Amplifiers using SPI or I2C with External Boost or Internal Boost. Signed-off-by: Stefan Binding Cc: Link: https://lore.kernel.org/r/20231115162116.494968-2-sbinding@opensource.cirrus.com Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/patch_realtek.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 60e99389bcdc..87bc1d2f8a43 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9869,13 +9869,17 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x19e1, "ASUS UX581LV", ALC295_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1a13, "Asus G73Jw", ALC269_FIXUP_ASUS_G73JW), SND_PCI_QUIRK(0x1043, 0x1a30, "ASUS X705UD", ALC256_FIXUP_ASUS_MIC), + SND_PCI_QUIRK(0x1043, 0x1a63, "ASUS UX3405MA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1a83, "ASUS UM5302LA", ALC294_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1a8f, "ASUS UX582ZS", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1b11, "ASUS UX431DA", ALC294_FIXUP_ASUS_COEF_1B), SND_PCI_QUIRK(0x1043, 0x1b13, "Asus U41SV", ALC269_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x1043, 0x1b93, "ASUS G614JVR/JIR", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1bbd, "ASUS Z550MA", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1043, 0x1c03, "ASUS UM3406HA", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1c23, "Asus X55U", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), + SND_PCI_QUIRK(0x1043, 0x1c33, "ASUS UX5304MA", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x1c43, "ASUS UX8406MA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1c62, "ASUS GU603", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x1c92, "ASUS ROG Strix G15", ALC285_FIXUP_ASUS_G533Z_PINS), SND_PCI_QUIRK(0x1043, 0x1c9f, "ASUS G614JI", ALC285_FIXUP_ASUS_HEADSET_MIC), From 9b1623f9609f3fa0a5d02ecd3a2f81164dc4d043 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 29 Nov 2023 13:38:43 -0500 Subject: [PATCH 0620/1397] dm-delay: fix a race between delay_presuspend and delay_bio [ Upstream commit 6fc45b6ed921dc00dfb264dc08c7d67ee63d2656 ] In delay_presuspend, we set the atomic variable may_delay and then stop the timer and flush pending bios. The intention here is to prevent the delay target from re-arming the timer again. However, this test is racy. Suppose that one thread goes to delay_bio, sees that dc->may_delay is one and proceeds; now, another thread executes delay_presuspend, it sets dc->may_delay to zero, deletes the timer and flushes pending bios. Then, the first thread continues and adds the bio to delayed->list despite the fact that dc->may_delay is false. Fix this bug by changing may_delay's type from atomic_t to bool and only access it while holding the delayed_bios_lock mutex. Note that we don't have to grab the mutex in delay_resume because there are no bios in flight at this point. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin --- drivers/md/dm-delay.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index 7433525e5985..3726fae3006e 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -31,7 +31,7 @@ struct delay_c { struct workqueue_struct *kdelayd_wq; struct work_struct flush_expired_bios; struct list_head delayed_bios; - atomic_t may_delay; + bool may_delay; struct delay_class read; struct delay_class write; @@ -192,7 +192,7 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) INIT_WORK(&dc->flush_expired_bios, flush_expired_bios); INIT_LIST_HEAD(&dc->delayed_bios); mutex_init(&dc->timer_lock); - atomic_set(&dc->may_delay, 1); + dc->may_delay = true; dc->argc = argc; ret = delay_class_ctr(ti, &dc->read, argv); @@ -247,7 +247,7 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio) struct dm_delay_info *delayed; unsigned long expires = 0; - if (!c->delay || !atomic_read(&dc->may_delay)) + if (!c->delay) return DM_MAPIO_REMAPPED; delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info)); @@ -256,6 +256,10 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio) delayed->expires = expires = jiffies + msecs_to_jiffies(c->delay); mutex_lock(&delayed_bios_lock); + if (unlikely(!dc->may_delay)) { + mutex_unlock(&delayed_bios_lock); + return DM_MAPIO_REMAPPED; + } c->ops++; list_add_tail(&delayed->list, &dc->delayed_bios); mutex_unlock(&delayed_bios_lock); @@ -269,7 +273,10 @@ static void delay_presuspend(struct dm_target *ti) { struct delay_c *dc = ti->private; - atomic_set(&dc->may_delay, 0); + mutex_lock(&delayed_bios_lock); + dc->may_delay = false; + mutex_unlock(&delayed_bios_lock); + del_timer_sync(&dc->delay_timer); flush_bios(flush_delayed_bios(dc, 1)); } @@ -278,7 +285,7 @@ static void delay_resume(struct dm_target *ti) { struct delay_c *dc = ti->private; - atomic_set(&dc->may_delay, 1); + dc->may_delay = true; } static int delay_map(struct dm_target *ti, struct bio *bio) From b74095a4945f29bfc3729d2becaa90a951569459 Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Tue, 14 Nov 2023 01:42:16 +0100 Subject: [PATCH 0621/1397] veth: Use tstats per-CPU traffic counters [ Upstream commit 6f2684bf2b4460c84d0d34612a939f78b96b03fc ] Currently veth devices use the lstats per-CPU traffic counters, which only cover TX traffic. veth_get_stats64() actually populates RX stats of a veth device from its peer's TX counters, based on the assumption that a veth device can _only_ receive packets from its peer, which is no longer true: For example, recent CNIs (like Cilium) can use the bpf_redirect_peer() BPF helper to redirect traffic from NIC's tc ingress to veth's tc ingress (in a different netns), skipping veth's peer device. Unfortunately, this kind of traffic isn't currently accounted for in veth's RX stats. In preparation for the fix, use tstats (instead of lstats) to maintain both RX and TX counters for each veth device. We'll use RX counters for bpf_redirect_peer() traffic, and keep using TX counters for the usual "peer-to-peer" traffic. In veth_get_stats64(), calculate RX stats by _adding_ RX count to peer's TX count, in order to cover both kinds of traffic. veth_stats_rx() might need a name change (perhaps to "veth_stats_xdp()") for less confusion, but let's leave it to another patch to keep the fix minimal. Signed-off-by: Peilin Ye Co-developed-by: Daniel Borkmann Signed-off-by: Daniel Borkmann Reviewed-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20231114004220.6495-5-daniel@iogearbox.net Signed-off-by: Martin KaFai Lau Signed-off-by: Sasha Levin --- drivers/net/veth.c | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index af326b91506e..0f798bcbe25c 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -373,7 +373,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) skb_tx_timestamp(skb); if (likely(veth_forward_skb(rcv, skb, rq, use_napi) == NET_RX_SUCCESS)) { if (!use_napi) - dev_lstats_add(dev, length); + dev_sw_netstats_tx_add(dev, 1, length); else __veth_xdp_flush(rq); } else { @@ -387,14 +387,6 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) return ret; } -static u64 veth_stats_tx(struct net_device *dev, u64 *packets, u64 *bytes) -{ - struct veth_priv *priv = netdev_priv(dev); - - dev_lstats_read(dev, packets, bytes); - return atomic64_read(&priv->dropped); -} - static void veth_stats_rx(struct veth_stats *result, struct net_device *dev) { struct veth_priv *priv = netdev_priv(dev); @@ -432,24 +424,24 @@ static void veth_get_stats64(struct net_device *dev, struct veth_priv *priv = netdev_priv(dev); struct net_device *peer; struct veth_stats rx; - u64 packets, bytes; - tot->tx_dropped = veth_stats_tx(dev, &packets, &bytes); - tot->tx_bytes = bytes; - tot->tx_packets = packets; + tot->tx_dropped = atomic64_read(&priv->dropped); + dev_fetch_sw_netstats(tot, dev->tstats); veth_stats_rx(&rx, dev); tot->tx_dropped += rx.xdp_tx_err; tot->rx_dropped = rx.rx_drops + rx.peer_tq_xdp_xmit_err; - tot->rx_bytes = rx.xdp_bytes; - tot->rx_packets = rx.xdp_packets; + tot->rx_bytes += rx.xdp_bytes; + tot->rx_packets += rx.xdp_packets; rcu_read_lock(); peer = rcu_dereference(priv->peer); if (peer) { - veth_stats_tx(peer, &packets, &bytes); - tot->rx_bytes += bytes; - tot->rx_packets += packets; + struct rtnl_link_stats64 tot_peer = {}; + + dev_fetch_sw_netstats(&tot_peer, peer->tstats); + tot->rx_bytes += tot_peer.tx_bytes; + tot->rx_packets += tot_peer.tx_packets; veth_stats_rx(&rx, peer); tot->tx_dropped += rx.peer_tq_xdp_xmit_err; @@ -1776,7 +1768,7 @@ static void veth_setup(struct net_device *dev) NETIF_F_HW_VLAN_STAG_RX); dev->needs_free_netdev = true; dev->priv_destructor = veth_dev_free; - dev->pcpu_stat_type = NETDEV_PCPU_STAT_LSTATS; + dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; dev->max_mtu = ETH_MAX_MTU; dev->hw_features = VETH_FEATURES; From 30ed07af096eb32eb4a62e542933b086457bd616 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Mon, 20 Nov 2023 13:24:55 +0800 Subject: [PATCH 0622/1397] bcache: check return value from btree_node_alloc_replacement() commit 777967e7e9f6f5f3e153abffb562bffaf4430d26 upstream. In btree_gc_rewrite_node(), pointer 'n' is not checked after it returns from btree_gc_rewrite_node(). There is potential possibility that 'n' is a non NULL ERR_PTR(), referencing such error code is not permitted in following code. Therefore a return value checking is necessary after 'n' is back from btree_node_alloc_replacement(). Signed-off-by: Coly Li Reported-by: Dan Carpenter Cc: Link: https://lore.kernel.org/r/20231120052503.6122-3-colyli@suse.de Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/btree.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index e922c6aae41b..418f85f1114f 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1527,6 +1527,8 @@ static int btree_gc_rewrite_node(struct btree *b, struct btree_op *op, return 0; n = btree_node_alloc_replacement(replace, NULL); + if (IS_ERR(n)) + return 0; /* recheck reserve after allocating replacement node */ if (btree_check_reserve(b, NULL)) { From 8ff253108ede353bfa9071a89c7e4b69e096a8b4 Mon Sep 17 00:00:00 2001 From: Rand Deeb Date: Mon, 20 Nov 2023 13:24:57 +0800 Subject: [PATCH 0623/1397] bcache: prevent potential division by zero error commit 2c7f497ac274a14330208b18f6f734000868ebf9 upstream. In SHOW(), the variable 'n' is of type 'size_t.' While there is a conditional check to verify that 'n' is not equal to zero before executing the 'do_div' macro, concerns arise regarding potential division by zero error in 64-bit environments. The concern arises when 'n' is 64 bits in size, greater than zero, and the lower 32 bits of it are zeros. In such cases, the conditional check passes because 'n' is non-zero, but the 'do_div' macro casts 'n' to 'uint32_t,' effectively truncating it to its lower 32 bits. Consequently, the 'n' value becomes zero. To fix this potential division by zero error and ensure precise division handling, this commit replaces the 'do_div' macro with div64_u64(). div64_u64() is designed to work with 64-bit operands, guaranteeing that division is performed correctly. This change enhances the robustness of the code, ensuring that division operations yield accurate results in all scenarios, eliminating the possibility of division by zero, and improving compatibility across different 64-bit environments. Found by Linux Verification Center (linuxtesting.org) with SVACE. Signed-off-by: Rand Deeb Cc: Signed-off-by: Coly Li Link: https://lore.kernel.org/r/20231120052503.6122-5-colyli@suse.de Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 0e2c1880f60b..18ac98dc8922 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -1103,7 +1103,7 @@ SHOW(__bch_cache) sum += INITIAL_PRIO - cached[i]; if (n) - do_div(sum, n); + sum = div64_u64(sum, n); for (i = 0; i < ARRAY_SIZE(q); i++) q[i] = INITIAL_PRIO - cached[n * (i + 1) / From 19aff881b80e457f413a0405857495f8bc4e64fa Mon Sep 17 00:00:00 2001 From: Mingzhe Zou Date: Mon, 20 Nov 2023 13:24:58 +0800 Subject: [PATCH 0624/1397] bcache: fixup init dirty data errors commit 7cc47e64d3d69786a2711a4767e26b26ba63d7ed upstream. We found that after long run, the dirty_data of the bcache device will have errors. This error cannot be eliminated unless re-register. We also found that reattach after detach, this error can accumulate. In bch_sectors_dirty_init(), all inode <= d->id keys will be recounted again. This is wrong, we only need to count the keys of the current device. Fixes: b144e45fc576 ("bcache: make bch_sectors_dirty_init() to be multithreaded") Signed-off-by: Mingzhe Zou Cc: Signed-off-by: Coly Li Link: https://lore.kernel.org/r/20231120052503.6122-6-colyli@suse.de Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/writeback.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index a6ddd0bb9220..e81bf2afd632 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -991,8 +991,11 @@ void bch_sectors_dirty_init(struct bcache_device *d) op.count = 0; for_each_key_filter(&c->root->keys, - k, &iter, bch_ptr_invalid) + k, &iter, bch_ptr_invalid) { + if (KEY_INODE(k) != op.inode) + continue; sectors_dirty_init_fn(&op.op, c->root, k); + } rw_unlock(0, c->root); return; From b163173dbfd7da017426f20f890a0e6aa89ae32f Mon Sep 17 00:00:00 2001 From: Mingzhe Zou Date: Mon, 20 Nov 2023 13:24:59 +0800 Subject: [PATCH 0625/1397] bcache: fixup lock c->root error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e34820f984512b433ee1fc291417e60c47d56727 upstream. We had a problem with io hung because it was waiting for c->root to release the lock. crash> cache_set.root -l cache_set.list ffffa03fde4c0050 root = 0xffff802ef454c800 crash> btree -o 0xffff802ef454c800 | grep rw_semaphore [ffff802ef454c858] struct rw_semaphore lock; crash> struct rw_semaphore ffff802ef454c858 struct rw_semaphore { count = { counter = -4294967297 }, wait_list = { next = 0xffff00006786fc28, prev = 0xffff00005d0efac8 }, wait_lock = { raw_lock = { { val = { counter = 0 }, { locked = 0 '\000', pending = 0 '\000' }, { locked_pending = 0, tail = 0 } } } }, osq = { tail = { counter = 0 } }, owner = 0xffffa03fdc586603 } The "counter = -4294967297" means that lock count is -1 and a write lock is being attempted. Then, we found that there is a btree with a counter of 1 in btree_cache_freeable. crash> cache_set -l cache_set.list ffffa03fde4c0050 -o|grep btree_cache [ffffa03fde4c1140] struct list_head btree_cache; [ffffa03fde4c1150] struct list_head btree_cache_freeable; [ffffa03fde4c1160] struct list_head btree_cache_freed; [ffffa03fde4c1170] unsigned int btree_cache_used; [ffffa03fde4c1178] wait_queue_head_t btree_cache_wait; [ffffa03fde4c1190] struct task_struct *btree_cache_alloc_lock; crash> list -H ffffa03fde4c1140|wc -l 973 crash> list -H ffffa03fde4c1150|wc -l 1123 crash> cache_set.btree_cache_used -l cache_set.list ffffa03fde4c0050 btree_cache_used = 2097 crash> list -s btree -l btree.list -H ffffa03fde4c1140|grep -E -A2 "^ lock = {" > btree_cache.txt crash> list -s btree -l btree.list -H ffffa03fde4c1150|grep -E -A2 "^ lock = {" > btree_cache_freeable.txt [root@node-3 127.0.0.1-2023-08-04-16:40:28]# pwd /var/crash/127.0.0.1-2023-08-04-16:40:28 [root@node-3 127.0.0.1-2023-08-04-16:40:28]# cat btree_cache.txt|grep counter|grep -v "counter = 0" [root@node-3 127.0.0.1-2023-08-04-16:40:28]# cat btree_cache_freeable.txt|grep counter|grep -v "counter = 0" counter = 1 We found that this is a bug in bch_sectors_dirty_init() when locking c->root: (1). Thread X has locked c->root(A) write. (2). Thread Y failed to lock c->root(A), waiting for the lock(c->root A). (3). Thread X bch_btree_set_root() changes c->root from A to B. (4). Thread X releases the lock(c->root A). (5). Thread Y successfully locks c->root(A). (6). Thread Y releases the lock(c->root B). down_write locked ---(1)----------------------┐ | | | down_read waiting ---(2)----┐ | | | ┌-------------┐ ┌-------------┐ bch_btree_set_root ===(3)========>> | c->root A | | c->root B | | | └-------------┘ └-------------┘ up_write ---(4)---------------------┘ | | | | | down_read locked ---(5)-----------┘ | | | up_read ---(6)-----------------------------┘ Since c->root may change, the correct steps to lock c->root should be the same as bch_root_usage(), compare after locking. static unsigned int bch_root_usage(struct cache_set *c) { unsigned int bytes = 0; struct bkey *k; struct btree *b; struct btree_iter iter; goto lock_root; do { rw_unlock(false, b); lock_root: b = c->root; rw_lock(false, b, b->level); } while (b != c->root); for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad) bytes += bkey_bytes(k); rw_unlock(false, b); return (bytes * 100) / btree_bytes(c); } Fixes: b144e45fc576 ("bcache: make bch_sectors_dirty_init() to be multithreaded") Signed-off-by: Mingzhe Zou Cc: Signed-off-by: Coly Li Link: https://lore.kernel.org/r/20231120052503.6122-7-colyli@suse.de Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/writeback.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index e81bf2afd632..d4432b3a6f96 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -977,14 +977,22 @@ static int bch_btre_dirty_init_thread_nr(void) void bch_sectors_dirty_init(struct bcache_device *d) { int i; + struct btree *b = NULL; struct bkey *k = NULL; struct btree_iter iter; struct sectors_dirty_init op; struct cache_set *c = d->c; struct bch_dirty_init_state state; +retry_lock: + b = c->root; + rw_lock(0, b, b->level); + if (b != c->root) { + rw_unlock(0, b); + goto retry_lock; + } + /* Just count root keys if no leaf node */ - rw_lock(0, c->root, c->root->level); if (c->root->level == 0) { bch_btree_op_init(&op.op, -1); op.inode = d->id; @@ -997,7 +1005,7 @@ void bch_sectors_dirty_init(struct bcache_device *d) sectors_dirty_init_fn(&op.op, c->root, k); } - rw_unlock(0, c->root); + rw_unlock(0, b); return; } @@ -1034,7 +1042,7 @@ void bch_sectors_dirty_init(struct bcache_device *d) out: /* Must wait for all threads to stop. */ wait_event(state.wait, atomic_read(&state.started) == 0); - rw_unlock(0, c->root); + rw_unlock(0, b); } void bch_cached_dev_writeback_init(struct cached_dev *dc) From 9c6ad38d4e0d1a03b64903db03149905997b1c75 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 3 Nov 2023 17:43:23 +0100 Subject: [PATCH 0626/1397] USB: xhci-plat: fix legacy PHY double init commit 16b7e0cccb243033de4406ffb4d892365041a1e7 upstream. Commits 7b8ef22ea547 ("usb: xhci: plat: Add USB phy support") and 9134c1fd0503 ("usb: xhci: plat: Add USB 3.0 phy support") added support for looking up legacy PHYs from the sysdev devicetree node and initialising them. This broke drivers such as dwc3 which manages PHYs themself as the PHYs would now be initialised twice, something which specifically can lead to resources being left enabled during suspend (e.g. with the usb_phy_generic PHY driver). As the dwc3 driver uses driver-name matching for the xhci platform device, fix this by only looking up and initialising PHYs for devices that have been matched using OF. Note that checking that the platform device has a devicetree node would currently be sufficient, but that could lead to subtle breakages in case anyone ever tries to reuse an ancestor's node. Fixes: 7b8ef22ea547 ("usb: xhci: plat: Add USB phy support") Fixes: 9134c1fd0503 ("usb: xhci: plat: Add USB 3.0 phy support") Cc: stable@vger.kernel.org # 4.1 Cc: Maxime Ripard Cc: Stanley Chang Signed-off-by: Johan Hovold Tested-by: Stefan Eichenberger Tested-by: Stanley Chang Link: https://lore.kernel.org/r/20231103164323.14294-1-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-plat.c | 50 +++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 20 deletions(-) diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index b93161374293..732cdeb73920 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -148,7 +149,7 @@ int xhci_plat_probe(struct platform_device *pdev, struct device *sysdev, const s int ret; int irq; struct xhci_plat_priv *priv = NULL; - + bool of_match; if (usb_disabled()) return -ENODEV; @@ -253,16 +254,23 @@ int xhci_plat_probe(struct platform_device *pdev, struct device *sysdev, const s &xhci->imod_interval); } - hcd->usb_phy = devm_usb_get_phy_by_phandle(sysdev, "usb-phy", 0); - if (IS_ERR(hcd->usb_phy)) { - ret = PTR_ERR(hcd->usb_phy); - if (ret == -EPROBE_DEFER) - goto disable_clk; - hcd->usb_phy = NULL; - } else { - ret = usb_phy_init(hcd->usb_phy); - if (ret) - goto disable_clk; + /* + * Drivers such as dwc3 manages PHYs themself (and rely on driver name + * matching for the xhci platform device). + */ + of_match = of_match_device(pdev->dev.driver->of_match_table, &pdev->dev); + if (of_match) { + hcd->usb_phy = devm_usb_get_phy_by_phandle(sysdev, "usb-phy", 0); + if (IS_ERR(hcd->usb_phy)) { + ret = PTR_ERR(hcd->usb_phy); + if (ret == -EPROBE_DEFER) + goto disable_clk; + hcd->usb_phy = NULL; + } else { + ret = usb_phy_init(hcd->usb_phy); + if (ret) + goto disable_clk; + } } hcd->tpl_support = of_usb_host_tpl_support(sysdev->of_node); @@ -285,15 +293,17 @@ int xhci_plat_probe(struct platform_device *pdev, struct device *sysdev, const s goto dealloc_usb2_hcd; } - xhci->shared_hcd->usb_phy = devm_usb_get_phy_by_phandle(sysdev, - "usb-phy", 1); - if (IS_ERR(xhci->shared_hcd->usb_phy)) { - xhci->shared_hcd->usb_phy = NULL; - } else { - ret = usb_phy_init(xhci->shared_hcd->usb_phy); - if (ret) - dev_err(sysdev, "%s init usb3phy fail (ret=%d)\n", - __func__, ret); + if (of_match) { + xhci->shared_hcd->usb_phy = devm_usb_get_phy_by_phandle(sysdev, + "usb-phy", 1); + if (IS_ERR(xhci->shared_hcd->usb_phy)) { + xhci->shared_hcd->usb_phy = NULL; + } else { + ret = usb_phy_init(xhci->shared_hcd->usb_phy); + if (ret) + dev_err(sysdev, "%s init usb3phy fail (ret=%d)\n", + __func__, ret); + } } xhci->shared_hcd->tpl_support = hcd->tpl_support; From 7c0244cc311a4038505b73682b7c8ceaa5c7a8c8 Mon Sep 17 00:00:00 2001 From: Niklas Neronin Date: Wed, 15 Nov 2023 14:13:25 +0200 Subject: [PATCH 0627/1397] usb: config: fix iteration issue in 'usb_get_bos_descriptor()' commit 974bba5c118f4c2baf00de0356e3e4f7928b4cbc upstream. The BOS descriptor defines a root descriptor and is the base descriptor for accessing a family of related descriptors. Function 'usb_get_bos_descriptor()' encounters an iteration issue when skipping the 'USB_DT_DEVICE_CAPABILITY' descriptor type. This results in the same descriptor being read repeatedly. To address this issue, a 'goto' statement is introduced to ensure that the pointer and the amount read is updated correctly. This ensures that the function iterates to the next descriptor instead of reading the same descriptor repeatedly. Cc: stable@vger.kernel.org Fixes: 3dd550a2d365 ("USB: usbcore: Fix slab-out-of-bounds bug during device reset") Signed-off-by: Niklas Neronin Acked-by: Mathias Nyman Reviewed-by: Alan Stern Link: https://lore.kernel.org/r/20231115121325.471454-1-niklas.neronin@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/config.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index b19e38d5fd10..7f8d33f92ddb 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -1047,7 +1047,7 @@ int usb_get_bos_descriptor(struct usb_device *dev) if (cap->bDescriptorType != USB_DT_DEVICE_CAPABILITY) { dev_notice(ddev, "descriptor type invalid, skip\n"); - continue; + goto skip_to_next_descriptor; } switch (cap_type) { @@ -1078,6 +1078,7 @@ int usb_get_bos_descriptor(struct usb_device *dev) break; } +skip_to_next_descriptor: total_len -= length; buffer += length; } From 15b8536daa36cea67d644dbe575f8e1b824c40cb Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Wed, 8 Nov 2023 10:31:25 +0100 Subject: [PATCH 0628/1397] usb: cdnsp: Fix deadlock issue during using NCM gadget commit 58f2fcb3a845fcbbad2f3196bb37d744e0506250 upstream. The interrupt service routine registered for the gadget is a primary handler which mask the interrupt source and a threaded handler which handles the source of the interrupt. Since the threaded handler is voluntary threaded, the IRQ-core does not disable bottom halves before invoke the handler like it does for the forced-threaded handler. Due to changes in networking it became visible that a network gadget's completions handler may schedule a softirq which remains unprocessed. The gadget's completion handler is usually invoked either in hard-IRQ or soft-IRQ context. In this context it is enough to just raise the softirq because the softirq itself will be handled once that context is left. In the case of the voluntary threaded handler, there is nothing that will process pending softirqs. Which means it remain queued until another random interrupt (on this CPU) fires and handles it on its exit path or another thread locks and unlocks a lock with the bh suffix. Worst case is that the CPU goes idle and the NOHZ complains about unhandled softirqs. Disable bottom halves before acquiring the lock (and disabling interrupts) and enable them after dropping the lock. This ensures that any pending softirqs will handled right away. cc: stable@vger.kernel.org Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Signed-off-by: Pawel Laszczak Acked-by: Peter Chen Link: https://lore.kernel.org/r/20231108093125.224963-1-pawell@cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdnsp-ring.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/cdns3/cdnsp-ring.c b/drivers/usb/cdns3/cdnsp-ring.c index 07f6068342d4..275a6a2fa671 100644 --- a/drivers/usb/cdns3/cdnsp-ring.c +++ b/drivers/usb/cdns3/cdnsp-ring.c @@ -1529,6 +1529,7 @@ irqreturn_t cdnsp_thread_irq_handler(int irq, void *data) unsigned long flags; int counter = 0; + local_bh_disable(); spin_lock_irqsave(&pdev->lock, flags); if (pdev->cdnsp_state & (CDNSP_STATE_HALTED | CDNSP_STATE_DYING)) { @@ -1541,6 +1542,7 @@ irqreturn_t cdnsp_thread_irq_handler(int irq, void *data) cdnsp_died(pdev); spin_unlock_irqrestore(&pdev->lock, flags); + local_bh_enable(); return IRQ_HANDLED; } @@ -1557,6 +1559,7 @@ irqreturn_t cdnsp_thread_irq_handler(int irq, void *data) cdnsp_update_erst_dequeue(pdev, event_ring_deq, 1); spin_unlock_irqrestore(&pdev->lock, flags); + local_bh_enable(); return IRQ_HANDLED; } From 2c89f65d2938c5f2af93660962b1183cf9eb782e Mon Sep 17 00:00:00 2001 From: Victor Fragoso Date: Tue, 21 Nov 2023 21:05:56 +0000 Subject: [PATCH 0629/1397] USB: serial: option: add Fibocom L7xx modules commit e389fe8b68137344562fb6e4d53d8a89ef6212dd upstream. Add support for Fibocom L716-EU module series. L716-EU is a Fibocom module based on ZTE's V3E/V3T chipset. Device creates multiple interfaces when connected to PC as follows: - Network Interface: ECM or RNDIS (set by FW or AT Command) - ttyUSB0: AT port - ttyUSB1: Modem port - ttyUSB2: AT2 port - ttyUSB3: Trace port for log information - ADB: ADB port for debugging. ("Driver=usbfs" when ADB server enabled) Here are the outputs of lsusb and usb-devices: $ ls /dev/ttyUSB* /dev/ttyUSB0 /dev/ttyUSB1 /dev/ttyUSB2 /dev/ttyUSB3 usb-devices: L716-EU (ECM mode): T: Bus=03 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#= 51 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2cb7 ProdID=0001 Rev= 1.00 S: Manufacturer=Fibocom,Incorporated S: Product=Fibocom Mobile Boardband S: SerialNumber=1234567890ABCDEF C:* #Ifs= 7 Cfg#= 1 Atr=e0 MxPwr=500mA A: FirstIf#= 0 IfCount= 2 Cls=02(comm.) Sub=06 Prot=00 I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=06 Prot=00 Driver=cdc_ether E: Ad=87(I) Atr=03(Int.) MxPS= 16 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether I:* If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 6 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=usbfs E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms L716-EU (RNDIS mode): T: Bus=03 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#= 49 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2cb7 ProdID=0001 Rev= 1.00 S: Manufacturer=Fibocom,Incorporated S: Product=Fibocom Mobile Boardband S: SerialNumber=1234567890ABCDEF C:* #Ifs= 7 Cfg#= 1 Atr=e0 MxPwr=500mA A: FirstIf#= 0 IfCount= 2 Cls=e0(wlcon) Sub=01 Prot=03 I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=02 Prot=ff Driver=rndis_host E: Ad=87(I) Atr=03(Int.) MxPS= 8 Ivl=32ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=rndis_host E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 6 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=usbfs E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Victor Fragoso Reviewed-by: Lars Melin Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index b76cb9a096f7..8f12e0a0384a 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -2251,6 +2251,7 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(4) | RSVD(5) | RSVD(6) }, { USB_DEVICE(0x1782, 0x4d10) }, /* Fibocom L610 (AT mode) */ { USB_DEVICE_INTERFACE_CLASS(0x1782, 0x4d11, 0xff) }, /* Fibocom L610 (ECM/RNDIS mode) */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x0001, 0xff, 0xff, 0xff) }, /* Fibocom L716-EU (ECM/RNDIS mode) */ { USB_DEVICE(0x2cb7, 0x0104), /* Fibocom NL678 series */ .driver_info = RSVD(4) | RSVD(5) }, { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0105, 0xff), /* Fibocom NL678 series */ From 7f52c70c0ce80f07ead668e840951b0f50fb941b Mon Sep 17 00:00:00 2001 From: Puliang Lu Date: Thu, 26 Oct 2023 20:35:06 +0800 Subject: [PATCH 0630/1397] USB: serial: option: fix FM101R-GL defines commit a1092619dd28ac0fcf23016160a2fdccd98ef935 upstream. Modify the definition of the two Fibocom FM101R-GL PID macros, which had their PIDs switched. The correct PIDs are: - VID:PID 413C:8213, FM101R-GL ESIM are laptop M.2 cards (with MBIM interfaces for Linux) - VID:PID 413C:8215, FM101R-GL are laptop M.2 cards (with MBIM interface for Linux) 0x8213: mbim, tty 0x8215: mbim, tty Signed-off-by: Puliang Lu Fixes: 52480e1f1a25 ("USB: serial: option: add Fibocom to DELL custom modem FM101R-GL") Link: https://lore.kernel.org/lkml/TYZPR02MB508845BAD7936A62A105CE5D89DFA@TYZPR02MB5088.apcprd02.prod.outlook.com/ Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 8f12e0a0384a..4d9b7ec5ac77 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -203,8 +203,8 @@ static void option_instat_callback(struct urb *urb); #define DELL_PRODUCT_5829E_ESIM 0x81e4 #define DELL_PRODUCT_5829E 0x81e6 -#define DELL_PRODUCT_FM101R 0x8213 -#define DELL_PRODUCT_FM101R_ESIM 0x8215 +#define DELL_PRODUCT_FM101R_ESIM 0x8213 +#define DELL_PRODUCT_FM101R 0x8215 #define KYOCERA_VENDOR_ID 0x0c88 #define KYOCERA_PRODUCT_KPC650 0x17da From 306f780bf6b819e6f940fbe9d7be7fb279e5d304 Mon Sep 17 00:00:00 2001 From: Lech Perczak Date: Sat, 18 Nov 2023 00:19:17 +0100 Subject: [PATCH 0631/1397] USB: serial: option: don't claim interface 4 for ZTE MF290 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8771127e25d6c20d458ad27cf32f7fcfc1755e05 upstream. Interface 4 is used by for QMI interface in stock firmware of MF28D, the router which uses MF290 modem. Free the interface up, to rebind it to qmi_wwan driver. The proper configuration is: Interface mapping is: 0: QCDM, 1: (unknown), 2: AT (PCUI), 2: AT (Modem), 4: QMI T: Bus=01 Lev=02 Prnt=02 Port=00 Cnt=01 Dev#= 4 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=19d2 ProdID=0189 Rev= 0.00 S: Manufacturer=ZTE, Incorporated S: Product=ZTE LTE Technologies MSM C:* #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=84(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan E: Ad=86(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms Cc: Bjørn Mork Signed-off-by: Lech Perczak Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 4d9b7ec5ac77..4dffcfefd62d 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1548,7 +1548,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0165, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0167, 0xff, 0xff, 0xff), .driver_info = RSVD(4) }, - { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0189, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0189, 0xff, 0xff, 0xff), + .driver_info = RSVD(4) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0191, 0xff, 0xff, 0xff), /* ZTE EuFi890 */ .driver_info = RSVD(4) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0196, 0xff, 0xff, 0xff) }, From a45e933d855eb22aa5bcad2a3ea6e260f4a24148 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Wed, 1 Nov 2023 01:28:45 +0000 Subject: [PATCH 0632/1397] usb: typec: tcpm: Fix sink caps op current check commit 187fb003c57c964ea61ac9fbfe41abf3ca9973eb upstream. TCPM checks for sink caps operational current even when PD is disabled. This incorrectly sets tcpm_set_charge() when PD is disabled. Check for sink caps only when PD is enabled. [ 97.572342] Start toggling [ 97.578949] CC1: 0 -> 0, CC2: 0 -> 0 [state TOGGLING, polarity 0, disconnected] [ 99.571648] CC1: 0 -> 0, CC2: 0 -> 4 [state TOGGLING, polarity 0, connected] [ 99.571658] state change TOGGLING -> SNK_ATTACH_WAIT [rev3 NONE_AMS] [ 99.571673] pending state change SNK_ATTACH_WAIT -> SNK_DEBOUNCED @ 170 ms [rev3 NONE_AMS] [ 99.741778] state change SNK_ATTACH_WAIT -> SNK_DEBOUNCED [delayed 170 ms] [ 99.789283] CC1: 0 -> 0, CC2: 4 -> 5 [state SNK_DEBOUNCED, polarity 0, connected] [ 99.789306] state change SNK_DEBOUNCED -> SNK_DEBOUNCED [rev3 NONE_AMS] [ 99.903584] VBUS on [ 99.903591] state change SNK_DEBOUNCED -> SNK_ATTACHED [rev3 NONE_AMS] [ 99.903600] polarity 1 [ 99.910155] enable vbus discharge ret:0 [ 99.910160] Requesting mux state 1, usb-role 2, orientation 2 [ 99.946791] state change SNK_ATTACHED -> SNK_STARTUP [rev3 NONE_AMS] [ 99.946798] state change SNK_STARTUP -> SNK_DISCOVERY [rev3 NONE_AMS] [ 99.946800] Setting voltage/current limit 5000 mV 500 mA [ 99.946803] vbus=0 charge:=1 [ 100.027139] state change SNK_DISCOVERY -> SNK_READY [rev3 NONE_AMS] [ 100.027145] Setting voltage/current limit 5000 mV 3000 mA [ 100.466830] VBUS on Cc: stable@vger.kernel.org Fixes: 803b1c8a0cea ("usb: typec: tcpm: not sink vbus if operational current is 0mA") Signed-off-by: Badhri Jagan Sridharan Reviewed-by: Guenter Roeck Acked-by: Heikki Krogerus Tested-by: Will McVicker Link: https://lore.kernel.org/r/20231101012845.2701348-1-badhri@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 25e7e14da2a7..a549b65362dc 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -4273,7 +4273,8 @@ static void run_state_machine(struct tcpm_port *port) current_lim = PD_P_SNK_STDBY_MW / 5; tcpm_set_current_limit(port, current_lim, 5000); /* Not sink vbus if operational current is 0mA */ - tcpm_set_charge(port, !!pdo_max_current(port->snk_pdo[0])); + tcpm_set_charge(port, !port->pd_supported || + pdo_max_current(port->snk_pdo[0])); if (!port->pd_supported) tcpm_set_state(port, SNK_READY, 0); From 07f2b6e81dfa21e0a662a71a1157315a7aeb4894 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Wed, 1 Nov 2023 02:19:09 +0000 Subject: [PATCH 0633/1397] usb: typec: tcpm: Skip hard reset when in error recovery commit a6fe37f428c19dd164c2111157d4a1029bd853aa upstream. Hard reset queued prior to error recovery (or) received during error recovery will make TCPM to prematurely exit error recovery sequence. Ignore hard resets received during error recovery (or) port reset sequence. ``` [46505.459688] state change SNK_READY -> ERROR_RECOVERY [rev3 NONE_AMS] [46505.459706] state change ERROR_RECOVERY -> PORT_RESET [rev3 NONE_AMS] [46505.460433] disable vbus discharge ret:0 [46505.461226] Setting usb_comm capable false [46505.467244] Setting voltage/current limit 0 mV 0 mA [46505.467262] polarity 0 [46505.470695] Requesting mux state 0, usb-role 0, orientation 0 [46505.475621] cc:=0 [46505.476012] pending state change PORT_RESET -> PORT_RESET_WAIT_OFF @ 100 ms [rev3 NONE_AMS] [46505.476020] Received hard reset [46505.476024] state change PORT_RESET -> HARD_RESET_START [rev3 HARD_RESET] ``` Cc: stable@vger.kernel.org Fixes: f0690a25a140 ("staging: typec: USB Type-C Port Manager (tcpm)") Signed-off-by: Badhri Jagan Sridharan Acked-by: Heikki Krogeus Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20231101021909.2962679-1-badhri@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index a549b65362dc..6d455ca76125 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -5392,6 +5392,15 @@ static void _tcpm_pd_hard_reset(struct tcpm_port *port) if (port->bist_request == BDO_MODE_TESTDATA && port->tcpc->set_bist_data) port->tcpc->set_bist_data(port->tcpc, false); + switch (port->state) { + case ERROR_RECOVERY: + case PORT_RESET: + case PORT_RESET_WAIT_OFF: + return; + default: + break; + } + if (port->ams != NONE_AMS) port->ams = NONE_AMS; if (port->hard_reset_count < PD_N_HARD_RESET_COUNT) From 92e44bdbec7d6e276b3a2fa8917a9b10dba9845d Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 15 Nov 2023 15:45:07 +0100 Subject: [PATCH 0634/1397] USB: dwc2: write HCINT with INTMASK applied commit 0583bc776ca5b5a3f5752869fc31cf7322df2b35 upstream. dwc2_hc_n_intr() writes back INTMASK as read but evaluates it with intmask applied. In stress testing this causes spurious interrupts like this: [Mon Aug 14 10:51:07 2023] dwc2 3f980000.usb: dwc2_hc_chhltd_intr_dma: Channel 7 - ChHltd set, but reason is unknown [Mon Aug 14 10:51:07 2023] dwc2 3f980000.usb: hcint 0x00000002, intsts 0x04600001 [Mon Aug 14 10:51:08 2023] dwc2 3f980000.usb: dwc2_hc_chhltd_intr_dma: Channel 0 - ChHltd set, but reason is unknown [Mon Aug 14 10:51:08 2023] dwc2 3f980000.usb: hcint 0x00000002, intsts 0x04600001 [Mon Aug 14 10:51:08 2023] dwc2 3f980000.usb: dwc2_hc_chhltd_intr_dma: Channel 4 - ChHltd set, but reason is unknown [Mon Aug 14 10:51:08 2023] dwc2 3f980000.usb: hcint 0x00000002, intsts 0x04600001 [Mon Aug 14 10:51:08 2023] dwc2 3f980000.usb: dwc2_update_urb_state_abn(): trimming xfer length Applying INTMASK prevents this. The issue exists in all versions of the driver. Signed-off-by: Oliver Neukum Tested-by: Ivan Ivanov Tested-by: Andrea della Porta Link: https://lore.kernel.org/r/20231115144514.15248-1-oneukum@suse.com Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/hcd_intr.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/usb/dwc2/hcd_intr.c b/drivers/usb/dwc2/hcd_intr.c index 0144ca8350c3..5c7538d498dd 100644 --- a/drivers/usb/dwc2/hcd_intr.c +++ b/drivers/usb/dwc2/hcd_intr.c @@ -2015,15 +2015,17 @@ static void dwc2_hc_n_intr(struct dwc2_hsotg *hsotg, int chnum) { struct dwc2_qtd *qtd; struct dwc2_host_chan *chan; - u32 hcint, hcintmsk; + u32 hcint, hcintraw, hcintmsk; chan = hsotg->hc_ptr_array[chnum]; - hcint = dwc2_readl(hsotg, HCINT(chnum)); + hcintraw = dwc2_readl(hsotg, HCINT(chnum)); hcintmsk = dwc2_readl(hsotg, HCINTMSK(chnum)); + hcint = hcintraw & hcintmsk; + dwc2_writel(hsotg, hcint, HCINT(chnum)); + if (!chan) { dev_err(hsotg->dev, "## hc_ptr_array for channel is NULL ##\n"); - dwc2_writel(hsotg, hcint, HCINT(chnum)); return; } @@ -2032,11 +2034,9 @@ static void dwc2_hc_n_intr(struct dwc2_hsotg *hsotg, int chnum) chnum); dev_vdbg(hsotg->dev, " hcint 0x%08x, hcintmsk 0x%08x, hcint&hcintmsk 0x%08x\n", - hcint, hcintmsk, hcint & hcintmsk); + hcintraw, hcintmsk, hcint); } - dwc2_writel(hsotg, hcint, HCINT(chnum)); - /* * If we got an interrupt after someone called * dwc2_hcd_endpoint_disable() we don't want to crash below @@ -2046,8 +2046,7 @@ static void dwc2_hc_n_intr(struct dwc2_hsotg *hsotg, int chnum) return; } - chan->hcint = hcint; - hcint &= hcintmsk; + chan->hcint = hcintraw; /* * If the channel was halted due to a dequeue, the qtd list might From 7985c3d7e879980127fb05620fb04fd4c9830ee2 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Wed, 25 Oct 2023 11:51:10 +0200 Subject: [PATCH 0635/1397] usb: dwc3: Fix default mode initialization commit 10d510abd096d620b9fda2dd3e0047c5efc4ad2b upstream. The default mode, configurable by DT, shall be set before usb role switch driver is registered. Otherwise there is a race between default mode and mode set by usb role switch driver. Fixes: 98ed256a4dbad ("usb: dwc3: Add support for role-switch-default-mode binding") Cc: stable Signed-off-by: Alexander Stein Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20231025095110.2405281-1-alexander.stein@ew.tq-group.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/drd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/drd.c b/drivers/usb/dwc3/drd.c index 039bf241769a..57ddd2e43022 100644 --- a/drivers/usb/dwc3/drd.c +++ b/drivers/usb/dwc3/drd.c @@ -505,6 +505,7 @@ static int dwc3_setup_role_switch(struct dwc3 *dwc) dwc->role_switch_default_mode = USB_DR_MODE_PERIPHERAL; mode = DWC3_GCTL_PRTCAP_DEVICE; } + dwc3_set_mode(dwc, mode); dwc3_role_switch.fwnode = dev_fwnode(dwc->dev); dwc3_role_switch.set = dwc3_usb_role_switch_set; @@ -526,7 +527,6 @@ static int dwc3_setup_role_switch(struct dwc3 *dwc) } } - dwc3_set_mode(dwc, mode); return 0; } #else From 6506e0228d3712881b3c405245c29d43d94356f1 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Fri, 27 Oct 2023 11:28:20 +0000 Subject: [PATCH 0636/1397] usb: dwc3: set the dma max_seg_size commit 8bbae288a85abed6a1cf7d185d8b9dc2f5dcb12c upstream. Allow devices to have dma operations beyond 4K, and avoid warnings such as: DMA-API: dwc3 a600000.usb: mapping sg segment longer than device claims to support [len=86016] [max=65536] Cc: stable@vger.kernel.org Fixes: 72246da40f37 ("usb: Introduce DesignWare USB3 DRD Driver") Reported-by: Zubin Mithra Signed-off-by: Ricardo Ribalda Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20231026-dwc3-v2-1-1d4fd5c3e067@chromium.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index d25490965b27..8d5af9ccb602 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -2006,6 +2006,8 @@ static int dwc3_probe(struct platform_device *pdev) pm_runtime_put(dev); + dma_set_max_seg_size(dev, UINT_MAX); + return 0; err_exit_debugfs: From 9f7e5ca18381e8e3e96e054c98fa11491f588776 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 17 Nov 2023 18:36:49 +0100 Subject: [PATCH 0637/1397] USB: dwc3: qcom: fix software node leak on probe errors commit 9feefbf57d92e8ee293dad67585d351c7d0b6e37 upstream. Make sure to remove the software node also on (ACPI) probe errors to avoid leaking the underlying resources. Note that the software node is only used for ACPI probe so the driver unbind tear down is updated to match probe. Fixes: 8dc6e6dd1bee ("usb: dwc3: qcom: Constify the software node") Cc: stable@vger.kernel.org # 5.12 Cc: Heikki Krogerus Signed-off-by: Johan Hovold Acked-by: Heikki Krogerus Acked-by: Andrew Halaney Link: https://lore.kernel.org/r/20231117173650.21161-3-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-qcom.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c index d6c81aa298b9..10fb481d943b 100644 --- a/drivers/usb/dwc3/dwc3-qcom.c +++ b/drivers/usb/dwc3/dwc3-qcom.c @@ -949,10 +949,12 @@ static int dwc3_qcom_probe(struct platform_device *pdev) interconnect_exit: dwc3_qcom_interconnect_exit(qcom); depopulate: - if (np) + if (np) { of_platform_depopulate(&pdev->dev); - else + } else { + device_remove_software_node(&qcom->dwc3->dev); platform_device_del(qcom->dwc3); + } platform_device_put(qcom->dwc3); free_urs: if (qcom->urs_usb) @@ -975,11 +977,12 @@ static void dwc3_qcom_remove(struct platform_device *pdev) struct device *dev = &pdev->dev; int i; - device_remove_software_node(&qcom->dwc3->dev); - if (np) + if (np) { of_platform_depopulate(&pdev->dev); - else + } else { + device_remove_software_node(&qcom->dwc3->dev); platform_device_del(qcom->dwc3); + } platform_device_put(qcom->dwc3); if (qcom->urs_usb) From 85e56a922b0795c5f47f978ae6bce0ef4489c1da Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 20 Nov 2023 17:16:06 +0100 Subject: [PATCH 0638/1397] USB: dwc3: qcom: fix wakeup after probe deferral commit 41f5a0973259db9e4e3c9963d36505f80107d1a0 upstream. The Qualcomm glue driver is overriding the interrupt trigger types defined by firmware when requesting the wakeup interrupts during probe. This can lead to a failure to map the DP/DM wakeup interrupts after a probe deferral as the firmware defined trigger types do not match the type used for the initial mapping: irq: type mismatch, failed to map hwirq-14 for interrupt-controller@b220000! irq: type mismatch, failed to map hwirq-15 for interrupt-controller@b220000! Fix this by not overriding the firmware provided trigger types when requesting the wakeup interrupts. Fixes: a4333c3a6ba9 ("usb: dwc3: Add Qualcomm DWC3 glue driver") Cc: stable@vger.kernel.org # 4.18 Signed-off-by: Johan Hovold Reviewed-by: Andrew Halaney Link: https://lore.kernel.org/r/20231120161607.7405-3-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-qcom.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c index 10fb481d943b..82544374110b 100644 --- a/drivers/usb/dwc3/dwc3-qcom.c +++ b/drivers/usb/dwc3/dwc3-qcom.c @@ -549,7 +549,7 @@ static int dwc3_qcom_setup_irq(struct platform_device *pdev) irq_set_status_flags(irq, IRQ_NOAUTOEN); ret = devm_request_threaded_irq(qcom->dev, irq, NULL, qcom_dwc3_resume_irq, - IRQF_TRIGGER_HIGH | IRQF_ONESHOT, + IRQF_ONESHOT, "qcom_dwc3 HS", qcom); if (ret) { dev_err(qcom->dev, "hs_phy_irq failed: %d\n", ret); @@ -564,7 +564,7 @@ static int dwc3_qcom_setup_irq(struct platform_device *pdev) irq_set_status_flags(irq, IRQ_NOAUTOEN); ret = devm_request_threaded_irq(qcom->dev, irq, NULL, qcom_dwc3_resume_irq, - IRQF_TRIGGER_HIGH | IRQF_ONESHOT, + IRQF_ONESHOT, "qcom_dwc3 DP_HS", qcom); if (ret) { dev_err(qcom->dev, "dp_hs_phy_irq failed: %d\n", ret); @@ -579,7 +579,7 @@ static int dwc3_qcom_setup_irq(struct platform_device *pdev) irq_set_status_flags(irq, IRQ_NOAUTOEN); ret = devm_request_threaded_irq(qcom->dev, irq, NULL, qcom_dwc3_resume_irq, - IRQF_TRIGGER_HIGH | IRQF_ONESHOT, + IRQF_ONESHOT, "qcom_dwc3 DM_HS", qcom); if (ret) { dev_err(qcom->dev, "dm_hs_phy_irq failed: %d\n", ret); @@ -594,7 +594,7 @@ static int dwc3_qcom_setup_irq(struct platform_device *pdev) irq_set_status_flags(irq, IRQ_NOAUTOEN); ret = devm_request_threaded_irq(qcom->dev, irq, NULL, qcom_dwc3_resume_irq, - IRQF_TRIGGER_HIGH | IRQF_ONESHOT, + IRQF_ONESHOT, "qcom_dwc3 SS", qcom); if (ret) { dev_err(qcom->dev, "ss_phy_irq failed: %d\n", ret); From 8f34f6b7b6b3260eb6312a19ececcc97908d15b7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 3 Dec 2023 07:33:10 +0100 Subject: [PATCH 0639/1397] Linux 6.6.4 Link: https://lore.kernel.org/r/20231130162140.298098091@linuxfoundation.org Tested-by: Ronald Warsow Tested-by: Florian Fainelli Tested-by: Takeshi Ogasawara Tested-by: Shuah Khan Tested-by: Harshit Mogalapalli Tested-by: Ron Economos Tested-by: Bagas Sanjaya Tested-by: Jon Hunter Tested-by: Frank Scheiner Tested-by: Linux Kernel Functional Testing Tested-by: Rudi Heitbaum Tested-by: Conor Dooley Tested-by: Justin M. Forbes Tested-by: Guenter Roeck Tested-by: SeongJae Park Tested-by: Ricardo B. Marliere Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 8ecebeb5642c..cbe63ba9126e 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 6 -SUBLEVEL = 3 +SUBLEVEL = 4 EXTRAVERSION = NAME = Hurr durr I'ma ninja sloth From 97bfff8c5f2340ad98059708bea36ca51c7ed925 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 21 Nov 2023 17:23:59 +0100 Subject: [PATCH 0640/1397] leds: class: Don't expose color sysfs entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8f2244c9af245ff72185c0473827125ee6b2d1a5 upstream. The commit c7d80059b086 ("leds: class: Store the color index in struct led_classdev") introduced a new sysfs entry "color" that is commonly created for the led classdev. Unfortunately, this conflicts with the "color" sysfs entry of already existing drivers such as Logitech HID or System76 ACPI drivers. The driver probe fails due to the conflict, hence it leads to a severe regression with the missing keyboard, for example. This patch reverts partially the change in the commit above for removing the led class color sysfs entries again for addressing the regressions. The newly introduced led_classdev.color field is kept as it's already used by other driver. Fixes: c7d80059b086 ("leds: class: Store the color index in struct led_classdev") Reported-by: Johannes Penßel Closes: https://lore.kernel.org/r/b5646db3-acff-45aa-baef-df3f660486fb@gmail.com Link: https://bugzilla.kernel.org/show_bug.cgi?id=218045 Link: https://bugzilla.kernel.org/show_bug.cgi?id=218155 Link: https://bugzilla.suse.com/show_bug.cgi?id=1217172 Signed-off-by: Takashi Iwai Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20231121162359.9332-1-tiwai@suse.de Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-class-led | 9 --------- drivers/leds/led-class.c | 14 -------------- 2 files changed, 23 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-class-led b/Documentation/ABI/testing/sysfs-class-led index b2ff0012c0f2..2e24ac3bd7ef 100644 --- a/Documentation/ABI/testing/sysfs-class-led +++ b/Documentation/ABI/testing/sysfs-class-led @@ -59,15 +59,6 @@ Description: brightness. Reading this file when no hw brightness change event has happened will return an ENODATA error. -What: /sys/class/leds//color -Date: June 2023 -KernelVersion: 6.5 -Description: - Color of the LED. - - This is a read-only file. Reading this file returns the color - of the LED as a string (e.g: "red", "green", "multicolor"). - What: /sys/class/leds//trigger Date: March 2006 KernelVersion: 2.6.17 diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c index 974b84f6bd6a..ba1be15cfd8e 100644 --- a/drivers/leds/led-class.c +++ b/drivers/leds/led-class.c @@ -75,19 +75,6 @@ static ssize_t max_brightness_show(struct device *dev, } static DEVICE_ATTR_RO(max_brightness); -static ssize_t color_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - const char *color_text = "invalid"; - struct led_classdev *led_cdev = dev_get_drvdata(dev); - - if (led_cdev->color < LED_COLOR_ID_MAX) - color_text = led_colors[led_cdev->color]; - - return sysfs_emit(buf, "%s\n", color_text); -} -static DEVICE_ATTR_RO(color); - #ifdef CONFIG_LEDS_TRIGGERS static BIN_ATTR(trigger, 0644, led_trigger_read, led_trigger_write, 0); static struct bin_attribute *led_trigger_bin_attrs[] = { @@ -102,7 +89,6 @@ static const struct attribute_group led_trigger_group = { static struct attribute *led_class_attrs[] = { &dev_attr_brightness.attr, &dev_attr_max_brightness.attr, - &dev_attr_color.attr, NULL, }; From 9b7e8fa648143515df0dd6fd82f0a7f98c9c7815 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 29 Nov 2023 16:56:17 +0000 Subject: [PATCH 0641/1397] cifs: Fix FALLOC_FL_ZERO_RANGE by setting i_size if EOF moved commit 83d5518b124dfd605f10a68128482c839a239f9d upstream. Fix the cifs filesystem implementations of FALLOC_FL_ZERO_RANGE, in smb3_zero_range(), to set i_size after extending the file on the server. Fixes: 72c419d9b073 ("cifs: fix smb3_zero_range so it can expand the file-size when required") Cc: stable@vger.kernel.org Signed-off-by: David Howells Acked-by: Paulo Alcantara cc: Shyam Prasad N cc: Rohith Surabattula cc: Jeff Layton cc: linux-cifs@vger.kernel.org cc: linux-mm@kvack.org Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/smb2ops.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 0f8fa78cd47b..16512f404926 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -3305,6 +3305,7 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, struct inode *inode = file_inode(file); struct cifsInodeInfo *cifsi = CIFS_I(inode); struct cifsFileInfo *cfile = file->private_data; + unsigned long long new_size; long rc; unsigned int xid; __le64 eof; @@ -3335,10 +3336,15 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, /* * do we also need to change the size of the file? */ - if (keep_size == false && i_size_read(inode) < offset + len) { - eof = cpu_to_le64(offset + len); + new_size = offset + len; + if (keep_size == false && (unsigned long long)i_size_read(inode) < new_size) { + eof = cpu_to_le64(new_size); rc = SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid, cfile->fid.volatile_fid, cfile->pid, &eof); + if (rc >= 0) { + truncate_setsize(inode, new_size); + fscache_resize_cookie(cifs_inode_cookie(inode), new_size); + } } zero_range_exit: From 514f89a35904e113a637b6a62b53e39185f9228d Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 29 Nov 2023 16:56:18 +0000 Subject: [PATCH 0642/1397] cifs: Fix FALLOC_FL_INSERT_RANGE by setting i_size after EOF moved commit 88010155f02b2c3b03c71609ba6ceeb457ece095 upstream. Fix the cifs filesystem implementations of FALLOC_FL_INSERT_RANGE, in smb3_insert_range(), to set i_size after extending the file on the server and before we do the copy to open the gap (as we don't clean up the EOF marker if the copy fails). Fixes: 7fe6fe95b936 ("cifs: add FALLOC_FL_INSERT_RANGE support") Cc: stable@vger.kernel.org Signed-off-by: David Howells Acked-by: Paulo Alcantara cc: Shyam Prasad N cc: Rohith Surabattula cc: Jeff Layton cc: linux-cifs@vger.kernel.org cc: linux-mm@kvack.org Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/smb2ops.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 16512f404926..b2a60aa6564f 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -3739,6 +3739,9 @@ static long smb3_insert_range(struct file *file, struct cifs_tcon *tcon, if (rc < 0) goto out_2; + truncate_setsize(inode, old_eof + len); + fscache_resize_cookie(cifs_inode_cookie(inode), i_size_read(inode)); + rc = smb2_copychunk_range(xid, cfile, cfile, off, count, off + len); if (rc < 0) goto out_2; From b6abe3380977e054a25f6fac82db2b0f18d3dc94 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Sat, 25 Nov 2023 23:55:10 -0300 Subject: [PATCH 0643/1397] smb: client: fix missing mode bits for SMB symlinks commit ef22bb800d967616c7638d204bc1b425beac7f5f upstream. When instantiating inodes for SMB symlinks, add the mode bits from @cifs_sb->ctx->file_mode as we already do for the other special files. Cc: stable@vger.kernel.org Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index c03a286ed418..2fc3690d8e93 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -748,7 +748,7 @@ bool cifs_reparse_point_to_fattr(struct cifs_sb_info *cifs_sb, case 0: /* SMB1 symlink */ case IO_REPARSE_TAG_SYMLINK: case IO_REPARSE_TAG_NFS: - fattr->cf_mode = S_IFLNK; + fattr->cf_mode = S_IFLNK | cifs_sb->ctx->file_mode; fattr->cf_dtype = DT_LNK; break; default: From 1de3dbd9a0213a946151bac7017812822bd4b7ca Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 28 Nov 2023 16:37:19 -0300 Subject: [PATCH 0644/1397] smb: client: report correct st_size for SMB and NFS symlinks commit 9d63509547a940225d06d7eba1dc412befae255d upstream. We can't rely on FILE_STANDARD_INFORMATION::EndOfFile for reparse points as they will be always zero. Set it to symlink target's length as specified by POSIX. This will make stat() family of syscalls return the correct st_size for such files. Cc: stable@vger.kernel.org Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/inode.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index 2fc3690d8e93..d6aa5e474d5e 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -823,6 +823,8 @@ static void cifs_open_info_to_fattr(struct cifs_fattr *fattr, out_reparse: if (S_ISLNK(fattr->cf_mode)) { + if (likely(data->symlink_target)) + fattr->cf_eof = strnlen(data->symlink_target, PATH_MAX); fattr->cf_symlink_target = data->symlink_target; data->symlink_target = NULL; } From 9ab2842cb7e1526e7fa5e465668e11b58c62ada2 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Mon, 20 Nov 2023 09:23:09 +0900 Subject: [PATCH 0645/1397] ksmbd: fix possible deadlock in smb2_open commit 864fb5d3716303a045c3ffb397f651bfd37bfb36 upstream. [ 8743.393379] ====================================================== [ 8743.393385] WARNING: possible circular locking dependency detected [ 8743.393391] 6.4.0-rc1+ #11 Tainted: G OE [ 8743.393397] ------------------------------------------------------ [ 8743.393402] kworker/0:2/12921 is trying to acquire lock: [ 8743.393408] ffff888127a14460 (sb_writers#8){.+.+}-{0:0}, at: ksmbd_vfs_setxattr+0x3d/0xd0 [ksmbd] [ 8743.393510] but task is already holding lock: [ 8743.393515] ffff8880360d97f0 (&type->i_mutex_dir_key#6/1){+.+.}-{3:3}, at: ksmbd_vfs_kern_path_locked+0x181/0x670 [ksmbd] [ 8743.393618] which lock already depends on the new lock. [ 8743.393623] the existing dependency chain (in reverse order) is: [ 8743.393628] -> #1 (&type->i_mutex_dir_key#6/1){+.+.}-{3:3}: [ 8743.393648] down_write_nested+0x9a/0x1b0 [ 8743.393660] filename_create+0x128/0x270 [ 8743.393670] do_mkdirat+0xab/0x1f0 [ 8743.393680] __x64_sys_mkdir+0x47/0x60 [ 8743.393690] do_syscall_64+0x5d/0x90 [ 8743.393701] entry_SYSCALL_64_after_hwframe+0x72/0xdc [ 8743.393711] -> #0 (sb_writers#8){.+.+}-{0:0}: [ 8743.393728] __lock_acquire+0x2201/0x3b80 [ 8743.393737] lock_acquire+0x18f/0x440 [ 8743.393746] mnt_want_write+0x5f/0x240 [ 8743.393755] ksmbd_vfs_setxattr+0x3d/0xd0 [ksmbd] [ 8743.393839] ksmbd_vfs_set_dos_attrib_xattr+0xcc/0x110 [ksmbd] [ 8743.393924] compat_ksmbd_vfs_set_dos_attrib_xattr+0x39/0x50 [ksmbd] [ 8743.394010] smb2_open+0x3432/0x3cc0 [ksmbd] [ 8743.394099] handle_ksmbd_work+0x2c9/0x7b0 [ksmbd] [ 8743.394187] process_one_work+0x65a/0xb30 [ 8743.394198] worker_thread+0x2cf/0x700 [ 8743.394209] kthread+0x1ad/0x1f0 [ 8743.394218] ret_from_fork+0x29/0x50 This patch add mnt_want_write() above parent inode lock and remove nested mnt_want_write calls in smb2_open(). Fixes: 40b268d384a2 ("ksmbd: add mnt_want_write to ksmbd vfs functions") Cc: stable@vger.kernel.org Reported-by: Marios Makassikis Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/smb2pdu.c | 47 +++++++++++++--------------- fs/smb/server/smbacl.c | 7 +++-- fs/smb/server/smbacl.h | 2 +- fs/smb/server/vfs.c | 68 +++++++++++++++++++++++++---------------- fs/smb/server/vfs.h | 10 ++++-- 5 files changed, 75 insertions(+), 59 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 93262ca3f58a..269fbfb3cd67 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -2380,7 +2380,8 @@ static int smb2_set_ea(struct smb2_ea_info *eabuf, unsigned int buf_len, rc = 0; } else { rc = ksmbd_vfs_setxattr(idmap, path, attr_name, value, - le16_to_cpu(eabuf->EaValueLength), 0); + le16_to_cpu(eabuf->EaValueLength), + 0, true); if (rc < 0) { ksmbd_debug(SMB, "ksmbd_vfs_setxattr is failed(%d)\n", @@ -2443,7 +2444,7 @@ static noinline int smb2_set_stream_name_xattr(const struct path *path, return -EBADF; } - rc = ksmbd_vfs_setxattr(idmap, path, xattr_stream_name, NULL, 0, 0); + rc = ksmbd_vfs_setxattr(idmap, path, xattr_stream_name, NULL, 0, 0, false); if (rc < 0) pr_err("Failed to store XATTR stream name :%d\n", rc); return 0; @@ -2518,7 +2519,7 @@ static void smb2_new_xattrs(struct ksmbd_tree_connect *tcon, const struct path * da.flags = XATTR_DOSINFO_ATTRIB | XATTR_DOSINFO_CREATE_TIME | XATTR_DOSINFO_ITIME; - rc = ksmbd_vfs_set_dos_attrib_xattr(mnt_idmap(path->mnt), path, &da); + rc = ksmbd_vfs_set_dos_attrib_xattr(mnt_idmap(path->mnt), path, &da, false); if (rc) ksmbd_debug(SMB, "failed to store file attribute into xattr\n"); } @@ -2608,7 +2609,7 @@ static int smb2_create_sd_buffer(struct ksmbd_work *work, sizeof(struct create_sd_buf_req)) return -EINVAL; return set_info_sec(work->conn, work->tcon, path, &sd_buf->ntsd, - le32_to_cpu(sd_buf->ccontext.DataLength), true); + le32_to_cpu(sd_buf->ccontext.DataLength), true, false); } static void ksmbd_acls_fattr(struct smb_fattr *fattr, @@ -3152,7 +3153,8 @@ int smb2_open(struct ksmbd_work *work) idmap, &path, pntsd, - pntsd_size); + pntsd_size, + false); kfree(pntsd); if (rc) pr_err("failed to store ntacl in xattr : %d\n", @@ -3228,12 +3230,6 @@ int smb2_open(struct ksmbd_work *work) if (req->CreateOptions & FILE_DELETE_ON_CLOSE_LE) ksmbd_fd_set_delete_on_close(fp, file_info); - if (need_truncate) { - rc = smb2_create_truncate(&path); - if (rc) - goto err_out; - } - if (req->CreateContextsOffset) { struct create_alloc_size_req *az_req; @@ -3398,11 +3394,12 @@ int smb2_open(struct ksmbd_work *work) } err_out: - if (file_present || created) { - inode_unlock(d_inode(parent_path.dentry)); - path_put(&path); - path_put(&parent_path); - } + if (file_present || created) + ksmbd_vfs_kern_path_unlock(&parent_path, &path); + + if (fp && need_truncate) + rc = smb2_create_truncate(&fp->filp->f_path); + ksmbd_revert_fsids(work); err_out1: if (!rc) { @@ -5537,7 +5534,7 @@ static int smb2_rename(struct ksmbd_work *work, rc = ksmbd_vfs_setxattr(file_mnt_idmap(fp->filp), &fp->filp->f_path, xattr_stream_name, - NULL, 0, 0); + NULL, 0, 0, true); if (rc < 0) { pr_err("failed to store stream name in xattr: %d\n", rc); @@ -5630,11 +5627,9 @@ static int smb2_create_link(struct ksmbd_work *work, if (rc) rc = -EINVAL; out: - if (file_present) { - inode_unlock(d_inode(parent_path.dentry)); - path_put(&path); - path_put(&parent_path); - } + if (file_present) + ksmbd_vfs_kern_path_unlock(&parent_path, &path); + if (!IS_ERR(link_name)) kfree(link_name); kfree(pathname); @@ -5701,7 +5696,8 @@ static int set_file_basic_info(struct ksmbd_file *fp, da.flags = XATTR_DOSINFO_ATTRIB | XATTR_DOSINFO_CREATE_TIME | XATTR_DOSINFO_ITIME; - rc = ksmbd_vfs_set_dos_attrib_xattr(idmap, &filp->f_path, &da); + rc = ksmbd_vfs_set_dos_attrib_xattr(idmap, &filp->f_path, &da, + true); if (rc) ksmbd_debug(SMB, "failed to restore file attribute in EA\n"); @@ -6013,7 +6009,7 @@ static int smb2_set_info_sec(struct ksmbd_file *fp, int addition_info, fp->saccess |= FILE_SHARE_DELETE_LE; return set_info_sec(fp->conn, fp->tcon, &fp->filp->f_path, pntsd, - buf_len, false); + buf_len, false, true); } /** @@ -7582,7 +7578,8 @@ static inline int fsctl_set_sparse(struct ksmbd_work *work, u64 id, da.attr = le32_to_cpu(fp->f_ci->m_fattr); ret = ksmbd_vfs_set_dos_attrib_xattr(idmap, - &fp->filp->f_path, &da); + &fp->filp->f_path, + &da, true); if (ret) fp->f_ci->m_fattr = old_fattr; } diff --git a/fs/smb/server/smbacl.c b/fs/smb/server/smbacl.c index 51b8bfab7481..1164365533f0 100644 --- a/fs/smb/server/smbacl.c +++ b/fs/smb/server/smbacl.c @@ -1185,7 +1185,7 @@ int smb_inherit_dacl(struct ksmbd_conn *conn, pntsd_size += sizeof(struct smb_acl) + nt_size; } - ksmbd_vfs_set_sd_xattr(conn, idmap, path, pntsd, pntsd_size); + ksmbd_vfs_set_sd_xattr(conn, idmap, path, pntsd, pntsd_size, false); kfree(pntsd); } @@ -1377,7 +1377,7 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, const struct path *path, int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon, const struct path *path, struct smb_ntsd *pntsd, int ntsd_len, - bool type_check) + bool type_check, bool get_write) { int rc; struct smb_fattr fattr = {{0}}; @@ -1437,7 +1437,8 @@ int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon, if (test_share_config_flag(tcon->share_conf, KSMBD_SHARE_FLAG_ACL_XATTR)) { /* Update WinACL in xattr */ ksmbd_vfs_remove_sd_xattrs(idmap, path); - ksmbd_vfs_set_sd_xattr(conn, idmap, path, pntsd, ntsd_len); + ksmbd_vfs_set_sd_xattr(conn, idmap, path, pntsd, ntsd_len, + get_write); } out: diff --git a/fs/smb/server/smbacl.h b/fs/smb/server/smbacl.h index 49a8c292bd2e..2b52861707d8 100644 --- a/fs/smb/server/smbacl.h +++ b/fs/smb/server/smbacl.h @@ -207,7 +207,7 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, const struct path *path, __le32 *pdaccess, int uid); int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon, const struct path *path, struct smb_ntsd *pntsd, int ntsd_len, - bool type_check); + bool type_check, bool get_write); void id_to_sid(unsigned int cid, uint sidtype, struct smb_sid *ssid); void ksmbd_init_domain(u32 *sub_auth); diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 9919c07035d8..5a41c0b4e933 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -97,6 +97,13 @@ static int ksmbd_vfs_path_lookup_locked(struct ksmbd_share_config *share_conf, return -ENOENT; } + err = mnt_want_write(parent_path->mnt); + if (err) { + path_put(parent_path); + putname(filename); + return -ENOENT; + } + inode_lock_nested(parent_path->dentry->d_inode, I_MUTEX_PARENT); d = lookup_one_qstr_excl(&last, parent_path->dentry, 0); if (IS_ERR(d)) @@ -123,6 +130,7 @@ static int ksmbd_vfs_path_lookup_locked(struct ksmbd_share_config *share_conf, err_out: inode_unlock(d_inode(parent_path->dentry)); + mnt_drop_write(parent_path->mnt); path_put(parent_path); putname(filename); return -ENOENT; @@ -451,7 +459,8 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos, fp->stream.name, (void *)stream_buf, size, - 0); + 0, + true); if (err < 0) goto out; @@ -593,10 +602,6 @@ int ksmbd_vfs_remove_file(struct ksmbd_work *work, const struct path *path) goto out_err; } - err = mnt_want_write(path->mnt); - if (err) - goto out_err; - idmap = mnt_idmap(path->mnt); if (S_ISDIR(d_inode(path->dentry)->i_mode)) { err = vfs_rmdir(idmap, d_inode(parent), path->dentry); @@ -607,7 +612,6 @@ int ksmbd_vfs_remove_file(struct ksmbd_work *work, const struct path *path) if (err) ksmbd_debug(VFS, "unlink failed, err %d\n", err); } - mnt_drop_write(path->mnt); out_err: ksmbd_revert_fsids(work); @@ -907,18 +911,22 @@ ssize_t ksmbd_vfs_getxattr(struct mnt_idmap *idmap, * @attr_value: xattr value to set * @attr_size: size of xattr value * @flags: destination buffer length + * @get_write: get write access to a mount * * Return: 0 on success, otherwise error */ int ksmbd_vfs_setxattr(struct mnt_idmap *idmap, const struct path *path, const char *attr_name, - void *attr_value, size_t attr_size, int flags) + void *attr_value, size_t attr_size, int flags, + bool get_write) { int err; - err = mnt_want_write(path->mnt); - if (err) - return err; + if (get_write == true) { + err = mnt_want_write(path->mnt); + if (err) + return err; + } err = vfs_setxattr(idmap, path->dentry, @@ -928,7 +936,8 @@ int ksmbd_vfs_setxattr(struct mnt_idmap *idmap, flags); if (err) ksmbd_debug(VFS, "setxattr failed, err %d\n", err); - mnt_drop_write(path->mnt); + if (get_write == true) + mnt_drop_write(path->mnt); return err; } @@ -1251,6 +1260,13 @@ int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name, } if (!err) { + err = mnt_want_write(parent_path->mnt); + if (err) { + path_put(path); + path_put(parent_path); + return err; + } + err = ksmbd_vfs_lock_parent(parent_path->dentry, path->dentry); if (err) { path_put(path); @@ -1260,6 +1276,14 @@ int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name, return err; } +void ksmbd_vfs_kern_path_unlock(struct path *parent_path, struct path *path) +{ + inode_unlock(d_inode(parent_path->dentry)); + mnt_drop_write(parent_path->mnt); + path_put(path); + path_put(parent_path); +} + struct dentry *ksmbd_vfs_kern_path_create(struct ksmbd_work *work, const char *name, unsigned int flags, @@ -1414,7 +1438,8 @@ static struct xattr_smb_acl *ksmbd_vfs_make_xattr_posix_acl(struct mnt_idmap *id int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn, struct mnt_idmap *idmap, const struct path *path, - struct smb_ntsd *pntsd, int len) + struct smb_ntsd *pntsd, int len, + bool get_write) { int rc; struct ndr sd_ndr = {0}, acl_ndr = {0}; @@ -1474,7 +1499,7 @@ int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn, rc = ksmbd_vfs_setxattr(idmap, path, XATTR_NAME_SD, sd_ndr.data, - sd_ndr.offset, 0); + sd_ndr.offset, 0, get_write); if (rc < 0) pr_err("Failed to store XATTR ntacl :%d\n", rc); @@ -1563,7 +1588,8 @@ int ksmbd_vfs_get_sd_xattr(struct ksmbd_conn *conn, int ksmbd_vfs_set_dos_attrib_xattr(struct mnt_idmap *idmap, const struct path *path, - struct xattr_dos_attrib *da) + struct xattr_dos_attrib *da, + bool get_write) { struct ndr n; int err; @@ -1573,7 +1599,7 @@ int ksmbd_vfs_set_dos_attrib_xattr(struct mnt_idmap *idmap, return err; err = ksmbd_vfs_setxattr(idmap, path, XATTR_NAME_DOS_ATTRIBUTE, - (void *)n.data, n.offset, 0); + (void *)n.data, n.offset, 0, get_write); if (err) ksmbd_debug(SMB, "failed to store dos attribute in xattr\n"); kfree(n.data); @@ -1845,10 +1871,6 @@ int ksmbd_vfs_set_init_posix_acl(struct mnt_idmap *idmap, } posix_state_to_acl(&acl_state, acls->a_entries); - rc = mnt_want_write(path->mnt); - if (rc) - goto out_err; - rc = set_posix_acl(idmap, dentry, ACL_TYPE_ACCESS, acls); if (rc < 0) ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_ACCESS) failed, rc : %d\n", @@ -1860,9 +1882,7 @@ int ksmbd_vfs_set_init_posix_acl(struct mnt_idmap *idmap, ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_DEFAULT) failed, rc : %d\n", rc); } - mnt_drop_write(path->mnt); -out_err: free_acl_state(&acl_state); posix_acl_release(acls); return rc; @@ -1892,10 +1912,6 @@ int ksmbd_vfs_inherit_posix_acl(struct mnt_idmap *idmap, } } - rc = mnt_want_write(path->mnt); - if (rc) - goto out_err; - rc = set_posix_acl(idmap, dentry, ACL_TYPE_ACCESS, acls); if (rc < 0) ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_ACCESS) failed, rc : %d\n", @@ -1907,9 +1923,7 @@ int ksmbd_vfs_inherit_posix_acl(struct mnt_idmap *idmap, ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_DEFAULT) failed, rc : %d\n", rc); } - mnt_drop_write(path->mnt); -out_err: posix_acl_release(acls); return rc; } diff --git a/fs/smb/server/vfs.h b/fs/smb/server/vfs.h index 00968081856e..cfe1c8092f23 100644 --- a/fs/smb/server/vfs.h +++ b/fs/smb/server/vfs.h @@ -109,7 +109,8 @@ ssize_t ksmbd_vfs_casexattr_len(struct mnt_idmap *idmap, int attr_name_len); int ksmbd_vfs_setxattr(struct mnt_idmap *idmap, const struct path *path, const char *attr_name, - void *attr_value, size_t attr_size, int flags); + void *attr_value, size_t attr_size, int flags, + bool get_write); int ksmbd_vfs_xattr_stream_name(char *stream_name, char **xattr_stream_name, size_t *xattr_stream_name_size, int s_type); int ksmbd_vfs_remove_xattr(struct mnt_idmap *idmap, @@ -117,6 +118,7 @@ int ksmbd_vfs_remove_xattr(struct mnt_idmap *idmap, int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name, unsigned int flags, struct path *parent_path, struct path *path, bool caseless); +void ksmbd_vfs_kern_path_unlock(struct path *parent_path, struct path *path); struct dentry *ksmbd_vfs_kern_path_create(struct ksmbd_work *work, const char *name, unsigned int flags, @@ -144,14 +146,16 @@ int ksmbd_vfs_remove_sd_xattrs(struct mnt_idmap *idmap, const struct path *path) int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn, struct mnt_idmap *idmap, const struct path *path, - struct smb_ntsd *pntsd, int len); + struct smb_ntsd *pntsd, int len, + bool get_write); int ksmbd_vfs_get_sd_xattr(struct ksmbd_conn *conn, struct mnt_idmap *idmap, struct dentry *dentry, struct smb_ntsd **pntsd); int ksmbd_vfs_set_dos_attrib_xattr(struct mnt_idmap *idmap, const struct path *path, - struct xattr_dos_attrib *da); + struct xattr_dos_attrib *da, + bool get_write); int ksmbd_vfs_get_dos_attrib_xattr(struct mnt_idmap *idmap, struct dentry *dentry, struct xattr_dos_attrib *da); From 106d555614ce17677240c9c0ecb481da6d389f3e Mon Sep 17 00:00:00 2001 From: Maria Yu Date: Wed, 15 Nov 2023 18:28:24 +0800 Subject: [PATCH 0646/1397] pinctrl: avoid reload of p state in list iteration commit 4198a9b571065978632276264e01d71d68000ac5 upstream. When in the list_for_each_entry iteration, reload of p->state->settings with a local setting from old_state will turn the list iteration into an infinite loop. The typical symptom when the issue happens, will be a printk message like: "not freeing pin xx (xxx) as part of deactivating group xxx - it is already used for some other setting". This is a compiler-dependent problem, one instance occurred using Clang version 10.0 on the arm64 architecture with linux version 4.19. Fixes: 6e5e959dde0d ("pinctrl: API changes to support multiple states per device") Signed-off-by: Maria Yu Cc: Link: https://lore.kernel.org/r/20231115102824.23727-1-quic_aiquny@quicinc.com Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index e9dc9638120a..184ec92241ca 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -1253,17 +1253,17 @@ static void pinctrl_link_add(struct pinctrl_dev *pctldev, static int pinctrl_commit_state(struct pinctrl *p, struct pinctrl_state *state) { struct pinctrl_setting *setting, *setting2; - struct pinctrl_state *old_state = p->state; + struct pinctrl_state *old_state = READ_ONCE(p->state); int ret; - if (p->state) { + if (old_state) { /* * For each pinmux setting in the old state, forget SW's record * of mux owner for that pingroup. Any pingroups which are * still owned by the new state will be re-acquired by the call * to pinmux_enable_setting() in the loop below. */ - list_for_each_entry(setting, &p->state->settings, node) { + list_for_each_entry(setting, &old_state->settings, node) { if (setting->type != PIN_MAP_TYPE_MUX_GROUP) continue; pinmux_disable_setting(setting); From 94725787ba91f8bc86935039feb6c481d4a19294 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 29 Nov 2023 17:34:08 +0800 Subject: [PATCH 0647/1397] firewire: core: fix possible memory leak in create_units() commit 891e0eab32a57fca4d36c5162628eb0bcb1f0edf upstream. If device_register() fails, the refcount of device is not 0, the name allocated in dev_set_name() is leaked. To fix this by calling put_device(), so that it will be freed in callback function kobject_cleanup(). unreferenced object 0xffff9d99035c7a90 (size 8): comm "systemd-udevd", pid 168, jiffies 4294672386 (age 152.089s) hex dump (first 8 bytes): 66 77 30 2e 30 00 ff ff fw0.0... backtrace: [<00000000e1d62bac>] __kmem_cache_alloc_node+0x1e9/0x360 [<00000000bbeaff31>] __kmalloc_node_track_caller+0x44/0x1a0 [<00000000491f2fb4>] kvasprintf+0x67/0xd0 [<000000005b960ddc>] kobject_set_name_vargs+0x1e/0x90 [<00000000427ac591>] dev_set_name+0x4e/0x70 [<000000003b4e447d>] create_units+0xc5/0x110 fw_unit_release() will be called in the error path, move fw_device_get() before calling device_register() to keep balanced with fw_device_put() in fw_unit_release(). Cc: stable@vger.kernel.org Fixes: 1fa5ae857bb1 ("driver core: get rid of struct device's bus_id string array") Fixes: a1f64819fe9f ("firewire: struct device - replace bus_id with dev_name(), dev_set_name()") Signed-off-by: Yang Yingliang Signed-off-by: Takashi Sakamoto Signed-off-by: Greg Kroah-Hartman --- drivers/firewire/core-device.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c index aa597cda0d88..2828e9573e90 100644 --- a/drivers/firewire/core-device.c +++ b/drivers/firewire/core-device.c @@ -717,14 +717,11 @@ static void create_units(struct fw_device *device) fw_unit_attributes, &unit->attribute_group); - if (device_register(&unit->device) < 0) - goto skip_unit; - fw_device_get(device); - continue; - - skip_unit: - kfree(unit); + if (device_register(&unit->device) < 0) { + put_device(&unit->device); + continue; + } } } From a488e3e5b11178a18b431e2defea4808332aee70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kornel=20Dul=C4=99ba?= Date: Tue, 14 Nov 2023 11:54:49 +0000 Subject: [PATCH 0648/1397] mmc: sdhci-pci-gli: Disable LPM during initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d9ed644f58670865cf067351deb71010bd87a52f upstream. To address IO performance commit f9e5b33934ce ("mmc: host: Improve I/O read/write performance for GL9763E") limited LPM negotiation to runtime suspend state. The problem is that it only flips the switch in the runtime PM resume/suspend logic. Disable LPM negotiation in gl9763e_add_host. This helps in two ways: 1. It was found that the LPM switch stays in the same position after warm reboot. Having it set in init helps with consistency. 2. Disabling LPM during the first runtime resume leaves us susceptible to the performance issue in the time window between boot and the first runtime suspend. Fixes: f9e5b33934ce ("mmc: host: Improve I/O read/write performance for GL9763E") Cc: stable@vger.kernel.org Signed-off-by: Kornel Dulęba Reviewed-by: Sven van Ashbrook Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/20231114115516.1585361-1-korneld@chromium.org Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-pci-gli.c | 54 +++++++++++++++++--------------- 1 file changed, 29 insertions(+), 25 deletions(-) diff --git a/drivers/mmc/host/sdhci-pci-gli.c b/drivers/mmc/host/sdhci-pci-gli.c index d8a991b349a8..77911a57b12c 100644 --- a/drivers/mmc/host/sdhci-pci-gli.c +++ b/drivers/mmc/host/sdhci-pci-gli.c @@ -1189,6 +1189,32 @@ static void gl9763e_hs400_enhanced_strobe(struct mmc_host *mmc, sdhci_writel(host, val, SDHCI_GLI_9763E_HS400_ES_REG); } +static void gl9763e_set_low_power_negotiation(struct sdhci_pci_slot *slot, + bool enable) +{ + struct pci_dev *pdev = slot->chip->pdev; + u32 value; + + pci_read_config_dword(pdev, PCIE_GLI_9763E_VHS, &value); + value &= ~GLI_9763E_VHS_REV; + value |= FIELD_PREP(GLI_9763E_VHS_REV, GLI_9763E_VHS_REV_W); + pci_write_config_dword(pdev, PCIE_GLI_9763E_VHS, value); + + pci_read_config_dword(pdev, PCIE_GLI_9763E_CFG, &value); + + if (enable) + value &= ~GLI_9763E_CFG_LPSN_DIS; + else + value |= GLI_9763E_CFG_LPSN_DIS; + + pci_write_config_dword(pdev, PCIE_GLI_9763E_CFG, value); + + pci_read_config_dword(pdev, PCIE_GLI_9763E_VHS, &value); + value &= ~GLI_9763E_VHS_REV; + value |= FIELD_PREP(GLI_9763E_VHS_REV, GLI_9763E_VHS_REV_R); + pci_write_config_dword(pdev, PCIE_GLI_9763E_VHS, value); +} + static void sdhci_set_gl9763e_signaling(struct sdhci_host *host, unsigned int timing) { @@ -1297,6 +1323,9 @@ static int gl9763e_add_host(struct sdhci_pci_slot *slot) if (ret) goto cleanup; + /* Disable LPM negotiation to avoid entering L1 state. */ + gl9763e_set_low_power_negotiation(slot, false); + return 0; cleanup: @@ -1340,31 +1369,6 @@ static void gli_set_gl9763e(struct sdhci_pci_slot *slot) } #ifdef CONFIG_PM -static void gl9763e_set_low_power_negotiation(struct sdhci_pci_slot *slot, bool enable) -{ - struct pci_dev *pdev = slot->chip->pdev; - u32 value; - - pci_read_config_dword(pdev, PCIE_GLI_9763E_VHS, &value); - value &= ~GLI_9763E_VHS_REV; - value |= FIELD_PREP(GLI_9763E_VHS_REV, GLI_9763E_VHS_REV_W); - pci_write_config_dword(pdev, PCIE_GLI_9763E_VHS, value); - - pci_read_config_dword(pdev, PCIE_GLI_9763E_CFG, &value); - - if (enable) - value &= ~GLI_9763E_CFG_LPSN_DIS; - else - value |= GLI_9763E_CFG_LPSN_DIS; - - pci_write_config_dword(pdev, PCIE_GLI_9763E_CFG, value); - - pci_read_config_dword(pdev, PCIE_GLI_9763E_VHS, &value); - value &= ~GLI_9763E_VHS_REV; - value |= FIELD_PREP(GLI_9763E_VHS_REV, GLI_9763E_VHS_REV_R); - pci_write_config_dword(pdev, PCIE_GLI_9763E_VHS, value); -} - static int gl9763e_runtime_suspend(struct sdhci_pci_chip *chip) { struct sdhci_pci_slot *slot = chip->slots[0]; From cb42d0dc74da670a1b77c5e797f30618b95bee5d Mon Sep 17 00:00:00 2001 From: Wenchao Chen Date: Wed, 15 Nov 2023 16:34:06 +0800 Subject: [PATCH 0649/1397] mmc: sdhci-sprd: Fix vqmmc not shutting down after the card was pulled commit 477865af60b2117ceaa1d558e03559108c15c78c upstream. With cat regulator_summary, we found that vqmmc was not shutting down after the card was pulled. cat /sys/kernel/debug/regulator/regulator_summary 1.before fix 1)Insert SD card vddsdio 1 1 0 unknown 3500mV 0mA 1200mV 3750mV 71100000.mmc-vqmmc 1 0mA 3500mV 3600mV 2)Pull out the SD card vddsdio 1 1 0 unknown 3500mV 0mA 1200mV 3750mV 71100000.mmc-vqmmc 1 0mA 3500mV 3600mV 2.after fix 1)Insert SD cardt vddsdio 1 1 0 unknown 3500mV 0mA 1200mV 3750mV 71100000.mmc-vqmmc 1 0mA 3500mV 3600mV 2)Pull out the SD card vddsdio 0 1 0 unknown 3500mV 0mA 1200mV 3750mV 71100000.mmc-vqmmc 0 0mA 3500mV 3600mV Fixes: fb8bd90f83c4 ("mmc: sdhci-sprd: Add Spreadtrum's initial host controller") Signed-off-by: Wenchao Chen Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231115083406.7368-1-wenchao.chen@unisoc.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-sprd.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/mmc/host/sdhci-sprd.c b/drivers/mmc/host/sdhci-sprd.c index 6b84ba27e6ab..6b8a57e2d20f 100644 --- a/drivers/mmc/host/sdhci-sprd.c +++ b/drivers/mmc/host/sdhci-sprd.c @@ -416,12 +416,33 @@ static void sdhci_sprd_request_done(struct sdhci_host *host, mmc_request_done(host->mmc, mrq); } +static void sdhci_sprd_set_power(struct sdhci_host *host, unsigned char mode, + unsigned short vdd) +{ + struct mmc_host *mmc = host->mmc; + + switch (mode) { + case MMC_POWER_OFF: + mmc_regulator_set_ocr(host->mmc, mmc->supply.vmmc, 0); + + mmc_regulator_disable_vqmmc(mmc); + break; + case MMC_POWER_ON: + mmc_regulator_enable_vqmmc(mmc); + break; + case MMC_POWER_UP: + mmc_regulator_set_ocr(host->mmc, mmc->supply.vmmc, vdd); + break; + } +} + static struct sdhci_ops sdhci_sprd_ops = { .read_l = sdhci_sprd_readl, .write_l = sdhci_sprd_writel, .write_w = sdhci_sprd_writew, .write_b = sdhci_sprd_writeb, .set_clock = sdhci_sprd_set_clock, + .set_power = sdhci_sprd_set_power, .get_max_clock = sdhci_sprd_get_max_clock, .get_min_clock = sdhci_sprd_get_min_clock, .set_bus_width = sdhci_set_bus_width, @@ -823,6 +844,10 @@ static int sdhci_sprd_probe(struct platform_device *pdev) host->caps1 &= ~(SDHCI_SUPPORT_SDR50 | SDHCI_SUPPORT_SDR104 | SDHCI_SUPPORT_DDR50); + ret = mmc_regulator_get_supply(host->mmc); + if (ret) + goto pm_runtime_disable; + ret = sdhci_setup_host(host); if (ret) goto pm_runtime_disable; From a2e069094d9e95c9590cec11be0eb21ec27e4b9d Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 3 Nov 2023 10:47:16 +0200 Subject: [PATCH 0650/1397] mmc: cqhci: Increase recovery halt timeout commit b578d5d18e929aa7c007a98cce32657145dde219 upstream. Failing to halt complicates the recovery. Additionally, unless the card or controller are stuck, which is expected to be very rare, then the halt should succeed, so it is better to wait. Set a large timeout. Fixes: a4080225f51d ("mmc: cqhci: support for command queue enabled host") Cc: stable@vger.kernel.org Signed-off-by: Adrian Hunter Reviewed-by: Avri Altman Link: https://lore.kernel.org/r/20231103084720.6886-3-adrian.hunter@intel.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/cqhci-core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/host/cqhci-core.c b/drivers/mmc/host/cqhci-core.c index b3d7d6d8d654..15f5a069af1f 100644 --- a/drivers/mmc/host/cqhci-core.c +++ b/drivers/mmc/host/cqhci-core.c @@ -984,10 +984,10 @@ static bool cqhci_halt(struct mmc_host *mmc, unsigned int timeout) /* * After halting we expect to be able to use the command line. We interpret the * failure to halt to mean the data lines might still be in use (and the upper - * layers will need to send a STOP command), so we set the timeout based on a - * generous command timeout. + * layers will need to send a STOP command), however failing to halt complicates + * the recovery, so set a timeout that would reasonably allow I/O to complete. */ -#define CQHCI_START_HALT_TIMEOUT 5 +#define CQHCI_START_HALT_TIMEOUT 500 static void cqhci_recovery_start(struct mmc_host *mmc) { From 25b14a7eecfbc57b953825bdd67f88aa902dd15e Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 3 Nov 2023 10:47:19 +0200 Subject: [PATCH 0651/1397] mmc: cqhci: Warn of halt or task clear failure commit 35597bdb04ec27ef3b1cea007dc69f8ff5df75a5 upstream. A correctly operating controller should successfully halt and clear tasks. Failure may result in errors elsewhere, so promote messages from debug to warnings. Fixes: a4080225f51d ("mmc: cqhci: support for command queue enabled host") Cc: stable@vger.kernel.org Signed-off-by: Adrian Hunter Reviewed-by: Avri Altman Reviewed-by: Avri Altman Link: https://lore.kernel.org/r/20231103084720.6886-6-adrian.hunter@intel.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/cqhci-core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/host/cqhci-core.c b/drivers/mmc/host/cqhci-core.c index 15f5a069af1f..948799a0980c 100644 --- a/drivers/mmc/host/cqhci-core.c +++ b/drivers/mmc/host/cqhci-core.c @@ -942,8 +942,8 @@ static bool cqhci_clear_all_tasks(struct mmc_host *mmc, unsigned int timeout) ret = cqhci_tasks_cleared(cq_host); if (!ret) - pr_debug("%s: cqhci: Failed to clear tasks\n", - mmc_hostname(mmc)); + pr_warn("%s: cqhci: Failed to clear tasks\n", + mmc_hostname(mmc)); return ret; } @@ -976,7 +976,7 @@ static bool cqhci_halt(struct mmc_host *mmc, unsigned int timeout) ret = cqhci_halted(cq_host); if (!ret) - pr_debug("%s: cqhci: Failed to halt\n", mmc_hostname(mmc)); + pr_warn("%s: cqhci: Failed to halt\n", mmc_hostname(mmc)); return ret; } From c7cf5f0bfd9ed73971ac4b5463af2d3f1d78e231 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 3 Nov 2023 10:47:20 +0200 Subject: [PATCH 0652/1397] mmc: cqhci: Fix task clearing in CQE error recovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 1de1b77982e1a1df9707cb11f9b1789e6b8919d4 upstream. If a task completion notification (TCN) is received when there is no outstanding task, the cqhci driver issues a "spurious TCN" warning. This was observed to happen right after CQE error recovery. When an error interrupt is received the driver runs recovery logic. It halts the controller, clears all pending tasks, and then re-enables it. On some platforms, like Intel Jasper Lake, a stale task completion event was observed, regardless of the CQHCI_CLEAR_ALL_TASKS bit being set. This results in either: a) Spurious TC completion event for an empty slot. b) Corrupted data being passed up the stack, as a result of premature completion for a newly added task. Rather than add a quirk for affected controllers, ensure tasks are cleared by toggling CQHCI_ENABLE, which would happen anyway if cqhci_clear_all_tasks() timed out. This is simpler and should be safe and effective for all controllers. Fixes: a4080225f51d ("mmc: cqhci: support for command queue enabled host") Cc: stable@vger.kernel.org Reported-by: Kornel Dulęba Tested-by: Kornel Dulęba Co-developed-by: Kornel Dulęba Signed-off-by: Kornel Dulęba Signed-off-by: Adrian Hunter Reviewed-by: Avri Altman Link: https://lore.kernel.org/r/20231103084720.6886-7-adrian.hunter@intel.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/cqhci-core.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/mmc/host/cqhci-core.c b/drivers/mmc/host/cqhci-core.c index 948799a0980c..41e94cd14109 100644 --- a/drivers/mmc/host/cqhci-core.c +++ b/drivers/mmc/host/cqhci-core.c @@ -1075,28 +1075,28 @@ static void cqhci_recovery_finish(struct mmc_host *mmc) ok = cqhci_halt(mmc, CQHCI_FINISH_HALT_TIMEOUT); - if (!cqhci_clear_all_tasks(mmc, CQHCI_CLEAR_TIMEOUT)) - ok = false; - /* * The specification contradicts itself, by saying that tasks cannot be * cleared if CQHCI does not halt, but if CQHCI does not halt, it should * be disabled/re-enabled, but not to disable before clearing tasks. * Have a go anyway. */ - if (!ok) { - pr_debug("%s: cqhci: disable / re-enable\n", mmc_hostname(mmc)); - cqcfg = cqhci_readl(cq_host, CQHCI_CFG); - cqcfg &= ~CQHCI_ENABLE; - cqhci_writel(cq_host, cqcfg, CQHCI_CFG); - cqcfg |= CQHCI_ENABLE; - cqhci_writel(cq_host, cqcfg, CQHCI_CFG); - /* Be sure that there are no tasks */ - ok = cqhci_halt(mmc, CQHCI_FINISH_HALT_TIMEOUT); - if (!cqhci_clear_all_tasks(mmc, CQHCI_CLEAR_TIMEOUT)) - ok = false; - WARN_ON(!ok); - } + if (!cqhci_clear_all_tasks(mmc, CQHCI_CLEAR_TIMEOUT)) + ok = false; + + /* Disable to make sure tasks really are cleared */ + cqcfg = cqhci_readl(cq_host, CQHCI_CFG); + cqcfg &= ~CQHCI_ENABLE; + cqhci_writel(cq_host, cqcfg, CQHCI_CFG); + + cqcfg = cqhci_readl(cq_host, CQHCI_CFG); + cqcfg |= CQHCI_ENABLE; + cqhci_writel(cq_host, cqcfg, CQHCI_CFG); + + cqhci_halt(mmc, CQHCI_FINISH_HALT_TIMEOUT); + + if (!ok) + cqhci_clear_all_tasks(mmc, CQHCI_CLEAR_TIMEOUT); cqhci_recover_mrqs(cq_host); From 3e04ce4b2572e6288456757a1a4b077be0efc87b Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 3 Nov 2023 10:47:18 +0200 Subject: [PATCH 0653/1397] mmc: block: Retry commands in CQE error recovery commit 8155d1fa3a747baad5caff5f8303321d68ddd48c upstream. It is important that MMC_CMDQ_TASK_MGMT command to discard the queue is successful because otherwise a subsequent reset might fail to flush the cache first. Retry it and the previous STOP command. Fixes: 72a5af554df8 ("mmc: core: Add support for handling CQE requests") Cc: stable@vger.kernel.org Signed-off-by: Adrian Hunter Reviewed-by: Avri Altman Link: https://lore.kernel.org/r/20231103084720.6886-5-adrian.hunter@intel.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 3d3e0ca52614..66b4198d0e81 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -551,7 +551,7 @@ int mmc_cqe_recovery(struct mmc_host *host) cmd.flags = MMC_RSP_R1B | MMC_CMD_AC; cmd.flags &= ~MMC_RSP_CRC; /* Ignore CRC */ cmd.busy_timeout = MMC_CQE_RECOVERY_TIMEOUT; - mmc_wait_for_cmd(host, &cmd, 0); + mmc_wait_for_cmd(host, &cmd, MMC_CMD_RETRIES); memset(&cmd, 0, sizeof(cmd)); cmd.opcode = MMC_CMDQ_TASK_MGMT; @@ -559,10 +559,13 @@ int mmc_cqe_recovery(struct mmc_host *host) cmd.flags = MMC_RSP_R1B | MMC_CMD_AC; cmd.flags &= ~MMC_RSP_CRC; /* Ignore CRC */ cmd.busy_timeout = MMC_CQE_RECOVERY_TIMEOUT; - err = mmc_wait_for_cmd(host, &cmd, 0); + err = mmc_wait_for_cmd(host, &cmd, MMC_CMD_RETRIES); host->cqe_ops->cqe_recovery_finish(host); + if (err) + err = mmc_wait_for_cmd(host, &cmd, MMC_CMD_RETRIES); + mmc_retune_release(host); return err; From c77590974d9633d612252ac7be7d514d80f83a21 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 3 Nov 2023 10:47:15 +0200 Subject: [PATCH 0654/1397] mmc: block: Do not lose cache flush during CQE error recovery commit 174925d340aac55296318e43fd96c0e1d196e105 upstream. During CQE error recovery, error-free data commands get requeued if there is any data left to transfer, but non-data commands are completed even though they have not been processed. Requeue them instead. Note the only non-data command is cache flush, which would have resulted in a cache flush being lost if it was queued at the time of CQE recovery. Fixes: 1e8e55b67030 ("mmc: block: Add CQE support") Cc: stable@vger.kernel.org Signed-off-by: Adrian Hunter Reviewed-by: Avri Altman Link: https://lore.kernel.org/r/20231103084720.6886-2-adrian.hunter@intel.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/block.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 152dfe593c43..f9a5cffa64b1 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -1482,6 +1482,8 @@ static void mmc_blk_cqe_complete_rq(struct mmc_queue *mq, struct request *req) blk_mq_requeue_request(req, true); else __blk_mq_end_request(req, BLK_STS_OK); + } else if (mq->in_recovery) { + blk_mq_requeue_request(req, true); } else { blk_mq_end_request(req, BLK_STS_OK); } From 1e77b3a0e6872aff9118b8b99ead7ca70449ec69 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 3 Nov 2023 10:47:17 +0200 Subject: [PATCH 0655/1397] mmc: block: Be sure to wait while busy in CQE error recovery commit c616696a902987352426fdaeec1b0b3240949e6b upstream. STOP command does not guarantee to wait while busy, but subsequent command MMC_CMDQ_TASK_MGMT to discard the queue will fail if the card is busy, so be sure to wait by employing mmc_poll_for_busy(). Fixes: 72a5af554df8 ("mmc: core: Add support for handling CQE requests") Cc: stable@vger.kernel.org Signed-off-by: Adrian Hunter Reviewed-by: Avri Altman Reviewed-by: Christian Loehle Link: https://lore.kernel.org/r/20231103084720.6886-4-adrian.hunter@intel.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 66b4198d0e81..a8c17b4cd737 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -553,6 +553,8 @@ int mmc_cqe_recovery(struct mmc_host *host) cmd.busy_timeout = MMC_CQE_RECOVERY_TIMEOUT; mmc_wait_for_cmd(host, &cmd, MMC_CMD_RETRIES); + mmc_poll_for_busy(host->card, MMC_CQE_RECOVERY_TIMEOUT, true, MMC_BUSY_IO); + memset(&cmd, 0, sizeof(cmd)); cmd.opcode = MMC_CMDQ_TASK_MGMT; cmd.arg = 1; /* Discard entire queue */ From 517e1355bbbe4382a0d1798b0561802c4c9be906 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 14 Nov 2023 16:23:33 +0200 Subject: [PATCH 0656/1397] drm/i915: Also check for VGA converter in eDP probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f76f83a83c8fdbb62acbf8bd945f10821768145b upstream. Unfortunately even the HPD based detection added in commit cfe5bdfb27fa ("drm/i915: Check HPD live state during eDP probe") fails to detect that the VBT's eDP/DDI-A is a ghost on Asus B360M-A (CFL+CNP). On that board eDP/DDI-A has its HPD asserted despite nothing being actually connected there :( The straps/fuses also indicate that the eDP port is present. So if one boots with a VGA monitor connected the eDP probe will mistake the DP->VGA converter hooked to DDI-E for an eDP panel on DDI-A. As a last resort check what kind of DP device we've detected, and if it looks like a DP->VGA converter then conclude that the eDP port should be ignored. Cc: stable@vger.kernel.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/9636 Fixes: cfe5bdfb27fa ("drm/i915: Check HPD live state during eDP probe") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231114142333.15799-1-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit fcd479a79120bf0cd507d85f898297a3b868dda6) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/display/intel_dp.c | 28 +++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index b4583d1f9666..119a4de7fe6f 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -5517,8 +5517,7 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, * (eg. Acer Chromebook C710), so we'll check it only if multiple * ports are attempting to use the same AUX CH, according to VBT. */ - if (intel_bios_dp_has_shared_aux_ch(encoder->devdata) && - !intel_digital_port_connected(encoder)) { + if (intel_bios_dp_has_shared_aux_ch(encoder->devdata)) { /* * If this fails, presume the DPCD answer came * from some other port using the same AUX CH. @@ -5526,10 +5525,27 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, * FIXME maybe cleaner to check this before the * DPCD read? Would need sort out the VDD handling... */ - drm_info(&dev_priv->drm, - "[ENCODER:%d:%s] HPD is down, disabling eDP\n", - encoder->base.base.id, encoder->base.name); - goto out_vdd_off; + if (!intel_digital_port_connected(encoder)) { + drm_info(&dev_priv->drm, + "[ENCODER:%d:%s] HPD is down, disabling eDP\n", + encoder->base.base.id, encoder->base.name); + goto out_vdd_off; + } + + /* + * Unfortunately even the HPD based detection fails on + * eg. Asus B360M-A (CFL+CNP), so as a last resort fall + * back to checking for a VGA branch device. Only do this + * on known affected platforms to minimize false positives. + */ + if (DISPLAY_VER(dev_priv) == 9 && drm_dp_is_branch(intel_dp->dpcd) && + (intel_dp->dpcd[DP_DOWNSTREAMPORT_PRESENT] & DP_DWN_STRM_PORT_TYPE_MASK) == + DP_DWN_STRM_PORT_TYPE_ANALOG) { + drm_info(&dev_priv->drm, + "[ENCODER:%d:%s] VGA converter detected, disabling eDP\n", + encoder->base.base.id, encoder->base.name); + goto out_vdd_off; + } } mutex_lock(&dev_priv->drm.mode_config.mutex); From 95c06d0d9e9022c42823a9d1e832d647a1d30634 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 30 Nov 2023 16:13:21 +0100 Subject: [PATCH 0657/1397] ALSA: hda: Disable power-save on KONTRON SinglePC commit a337c355719c42a6c5b67e985ad753590ed844fb upstream. It's been reported that the runtime PM on KONTRON SinglePC (PCI SSID 1734:1232) caused a stall of playback after a bunch of invocations. (FWIW, this looks like an timing issue, and the stall happens rather on the controller side.) As a workaround, disable the default power-save on this platform. Cc: Link: https://lore.kernel.org/r/20231130151321.9813-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_intel.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index ca765ac4765f..75148485b755 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2218,6 +2218,8 @@ static const struct snd_pci_quirk power_save_denylist[] = { SND_PCI_QUIRK(0x17aa, 0x36a7, "Lenovo C50 All in one", 0), /* https://bugs.launchpad.net/bugs/1821663 */ SND_PCI_QUIRK(0x1631, 0xe017, "Packard Bell NEC IMEDIA 5204", 0), + /* KONTRON SinglePC may cause a stall at runtime resume */ + SND_PCI_QUIRK(0x1734, 0x1232, "KONTRON SinglePC", 0), {} }; #endif /* CONFIG_PM */ From f18a35aaedf5331c224e49901acaa997d41a7165 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Wed, 25 Oct 2023 15:24:06 +0800 Subject: [PATCH 0658/1397] ALSA: hda/realtek: Headset Mic VREF to 100% commit baaacbff64d9f34b64f294431966d035aeadb81c upstream. This platform need to set Mic VREF to 100%. Signed-off-by: Kailang Yang Cc: Link: https://lore.kernel.org/r/0916af40f08a4348a3298a9a59e6967e@realtek.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 87bc1d2f8a43..bd276a83993c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -1986,6 +1986,7 @@ enum { ALC887_FIXUP_ASUS_AUDIO, ALC887_FIXUP_ASUS_HMIC, ALCS1200A_FIXUP_MIC_VREF, + ALC888VD_FIXUP_MIC_100VREF, }; static void alc889_fixup_coef(struct hda_codec *codec, @@ -2539,6 +2540,13 @@ static const struct hda_fixup alc882_fixups[] = { {} } }, + [ALC888VD_FIXUP_MIC_100VREF] = { + .type = HDA_FIXUP_PINCTLS, + .v.pins = (const struct hda_pintbl[]) { + { 0x18, PIN_VREF100 }, /* headset mic */ + {} + } + }, }; static const struct snd_pci_quirk alc882_fixup_tbl[] = { @@ -2608,6 +2616,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x106b, 0x4a00, "Macbook 5,2", ALC889_FIXUP_MBA11_VREF), SND_PCI_QUIRK(0x1071, 0x8258, "Evesham Voyaeger", ALC882_FIXUP_EAPD), + SND_PCI_QUIRK(0x10ec, 0x12d8, "iBase Elo Touch", ALC888VD_FIXUP_MIC_100VREF), SND_PCI_QUIRK(0x13fe, 0x1009, "Advantech MIT-W101", ALC886_FIXUP_EAPD), SND_PCI_QUIRK(0x1458, 0xa002, "Gigabyte EP45-DS3/Z87X-UD3H", ALC889_FIXUP_FRONT_HP_NO_PRESENCE), SND_PCI_QUIRK(0x1458, 0xa0b8, "Gigabyte AZ370-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS), From 2d5eb4e5afadd0eb678589b2d675af72e04d96e5 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Wed, 29 Nov 2023 15:38:40 +0800 Subject: [PATCH 0659/1397] ALSA: hda/realtek: Add supported ALC257 for ChromeOS commit cae2bdb579ecc9d4219c58a7d3fde1958118dc1d upstream. ChromeOS want to support ALC257. Add codec ID to some relation function. Signed-off-by: Kailang Yang Cc: Link: https://lore.kernel.org/r/99a88a7dbdb045fd9d934abeb6cec15f@realtek.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index bd276a83993c..758abe9dffd6 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3264,6 +3264,7 @@ static void alc_disable_headset_jack_key(struct hda_codec *codec) case 0x10ec0230: case 0x10ec0236: case 0x10ec0256: + case 0x10ec0257: case 0x19e58326: alc_write_coef_idx(codec, 0x48, 0x0); alc_update_coef_idx(codec, 0x49, 0x0045, 0x0); @@ -3293,6 +3294,7 @@ static void alc_enable_headset_jack_key(struct hda_codec *codec) case 0x10ec0230: case 0x10ec0236: case 0x10ec0256: + case 0x10ec0257: case 0x19e58326: alc_write_coef_idx(codec, 0x48, 0xd011); alc_update_coef_idx(codec, 0x49, 0x007f, 0x0045); @@ -6504,6 +6506,7 @@ static void alc_combo_jack_hp_jd_restart(struct hda_codec *codec) case 0x10ec0236: case 0x10ec0255: case 0x10ec0256: + case 0x10ec0257: case 0x19e58326: alc_update_coef_idx(codec, 0x1b, 0x8000, 1 << 15); /* Reset HP JD */ alc_update_coef_idx(codec, 0x1b, 0x8000, 0 << 15); From 5368a515a37fdbc65f85d3c08374e667a40a7981 Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Tue, 28 Nov 2023 17:59:28 +0800 Subject: [PATCH 0660/1397] net: libwx: fix memory leak on msix entry commit 91fdb30ddfdb651509914d3ed0a0302712540fed upstream. Since pci_free_irq_vectors() set pdev->msix_enabled as 0 in the calling of pci_msix_shutdown(), wx->msix_entries is never freed. Reordering the lines to fix the memory leak. Cc: stable@vger.kernel.org Fixes: 3f703186113f ("net: libwx: Add irq flow functions") Signed-off-by: Jiawen Wu Reviewed-by: Kalesh AP Link: https://lore.kernel.org/r/20231128095928.1083292-1-jiawenwu@trustnetic.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/wangxun/libwx/wx_lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c index e04d4a5eed7b..21505920136c 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c @@ -1965,11 +1965,11 @@ void wx_reset_interrupt_capability(struct wx *wx) if (!pdev->msi_enabled && !pdev->msix_enabled) return; - pci_free_irq_vectors(wx->pdev); if (pdev->msix_enabled) { kfree(wx->msix_entries); wx->msix_entries = NULL; } + pci_free_irq_vectors(wx->pdev); } EXPORT_SYMBOL(wx_reset_interrupt_capability); From 99277dd29051e2c28d865c5100e355d32803a997 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 28 Nov 2023 14:50:23 +0100 Subject: [PATCH 0661/1397] dm-verity: align struct dm_verity_fec_io properly commit 38bc1ab135db87577695816b190e7d6d8ec75879 upstream. dm_verity_fec_io is placed after the end of two hash digests. If the hash digest has unaligned length, struct dm_verity_fec_io could be unaligned. This commit fixes the placement of struct dm_verity_fec_io, so that it's aligned. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Fixes: a739ff3f543a ("dm verity: add support for forward error correction") Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-verity-fec.c | 3 ++- drivers/md/dm-verity.h | 6 ------ 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index 2099c755119e..b475200d8586 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c @@ -24,7 +24,8 @@ bool verity_fec_is_enabled(struct dm_verity *v) */ static inline struct dm_verity_fec_io *fec_io(struct dm_verity_io *io) { - return (struct dm_verity_fec_io *) verity_io_digest_end(io->v, io); + return (struct dm_verity_fec_io *) + ((char *)io + io->v->ti->per_io_data_size - sizeof(struct dm_verity_fec_io)); } /* diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index f96f4e281ee4..f9d522c870e6 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -115,12 +115,6 @@ static inline u8 *verity_io_want_digest(struct dm_verity *v, return (u8 *)(io + 1) + v->ahash_reqsize + v->digest_size; } -static inline u8 *verity_io_digest_end(struct dm_verity *v, - struct dm_verity_io *io) -{ - return verity_io_want_digest(v, io) + v->digest_size; -} - extern int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io, struct bvec_iter *iter, int (*process)(struct dm_verity *v, From 80fb13782beceb5f1cada5f77901d45db1d6f6a5 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 21 Nov 2023 07:56:30 +0900 Subject: [PATCH 0662/1397] scsi: Change SCSI device boolean fields to single bit flags commit 6371be7aeb986905bb60ec73d002fc02343393b4 upstream. Commit 3cc2ffe5c16d ("scsi: sd: Differentiate system and runtime start/stop management") changed the single bit manage_start_stop flag into 2 boolean fields of the SCSI device structure. Commit 24eca2dce0f8 ("scsi: sd: Introduce manage_shutdown device flag") introduced the manage_shutdown boolean field for the same structure. Together, these 2 commits increase the size of struct scsi_device by 8 bytes by using booleans instead of defining the manage_xxx fields as single bit flags, similarly to other flags of this structure. Avoid this unnecessary structure size increase and be consistent with the definition of other flags by reverting the definitions of the manage_xxx fields as single bit flags. Fixes: 3cc2ffe5c16d ("scsi: sd: Differentiate system and runtime start/stop management") Fixes: 24eca2dce0f8 ("scsi: sd: Introduce manage_shutdown device flag") Cc: Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20231120225631.37938-2-dlemoal@kernel.org Reviewed-by: Niklas Cassel Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-scsi.c | 4 ++-- drivers/firewire/sbp2.c | 6 +++--- include/scsi/scsi_device.h | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 3a957c4da409..87fe8a85a2a6 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -1056,8 +1056,8 @@ int ata_scsi_dev_config(struct scsi_device *sdev, struct ata_device *dev) * and resume and shutdown only. For system level suspend/resume, * devices power state is handled directly by libata EH. */ - sdev->manage_runtime_start_stop = true; - sdev->manage_shutdown = true; + sdev->manage_runtime_start_stop = 1; + sdev->manage_shutdown = 1; } /* diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c index 7edf2c95282f..e779d866022b 100644 --- a/drivers/firewire/sbp2.c +++ b/drivers/firewire/sbp2.c @@ -1519,9 +1519,9 @@ static int sbp2_scsi_slave_configure(struct scsi_device *sdev) sdev->use_10_for_rw = 1; if (sbp2_param_exclusive_login) { - sdev->manage_system_start_stop = true; - sdev->manage_runtime_start_stop = true; - sdev->manage_shutdown = true; + sdev->manage_system_start_stop = 1; + sdev->manage_runtime_start_stop = 1; + sdev->manage_shutdown = 1; } if (sdev->type == TYPE_ROM) diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 65e49fae8da7..b919ed24220d 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -167,19 +167,19 @@ struct scsi_device { * power state for system suspend/resume (suspend to RAM and * hibernation) operations. */ - bool manage_system_start_stop; + unsigned manage_system_start_stop:1; /* * If true, let the high-level device driver (sd) manage the device * power state for runtime device suspand and resume operations. */ - bool manage_runtime_start_stop; + unsigned manage_runtime_start_stop:1; /* * If true, let the high-level device driver (sd) manage the device * power state for system shutdown (power off) operations. */ - bool manage_shutdown; + unsigned manage_shutdown:1; unsigned removable:1; unsigned changed:1; /* Data invalid due to media change */ From b32c0a7cb93f9c5e726b45faf61311d90a7b5319 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 21 Nov 2023 07:56:31 +0900 Subject: [PATCH 0663/1397] scsi: sd: Fix system start for ATA devices commit b09d7f8fd50f6e93cbadd8d27fde178f745b42a1 upstream. It is not always possible to keep a device in the runtime suspended state when a system level suspend/resume cycle is executed. E.g. for ATA devices connected to AHCI adapters, system resume resets the ATA ports, which causes connected devices to spin up. In such case, a runtime suspended disk will incorrectly be seen with a suspended runtime state because the device is not resumed by sd_resume_system(). The power state seen by the user is different than the actual device physical power state. Fix this issue by introducing the struct scsi_device flag force_runtime_start_on_system_start. When set, this flag causes sd_resume_system() to request a runtime resume operation for runtime suspended devices. This results in the user seeing the device runtime_state as active after a system resume, thus correctly reflecting the device physical power state. Fixes: 9131bff6a9f1 ("scsi: core: pm: Only runtime resume if necessary") Cc: Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20231120225631.37938-3-dlemoal@kernel.org Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-scsi.c | 5 +++++ drivers/scsi/sd.c | 9 ++++++++- include/scsi/scsi_device.h | 6 ++++++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 87fe8a85a2a6..4209fb39f644 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -1055,9 +1055,14 @@ int ata_scsi_dev_config(struct scsi_device *sdev, struct ata_device *dev) * Ask the sd driver to issue START STOP UNIT on runtime suspend * and resume and shutdown only. For system level suspend/resume, * devices power state is handled directly by libata EH. + * Given that disks are always spun up on system resume, also + * make sure that the sd driver forces runtime suspended disks + * to be resumed to correctly reflect the power state of the + * device. */ sdev->manage_runtime_start_stop = 1; sdev->manage_shutdown = 1; + sdev->force_runtime_start_on_system_start = 1; } /* diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 6effa13039f3..e17509f0b3fa 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3953,8 +3953,15 @@ static int sd_resume(struct device *dev, bool runtime) static int sd_resume_system(struct device *dev) { - if (pm_runtime_suspended(dev)) + if (pm_runtime_suspended(dev)) { + struct scsi_disk *sdkp = dev_get_drvdata(dev); + struct scsi_device *sdp = sdkp ? sdkp->device : NULL; + + if (sdp && sdp->force_runtime_start_on_system_start) + pm_request_resume(dev); + return 0; + } return sd_resume(dev, false); } diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index b919ed24220d..8fa1153f37cb 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -181,6 +181,12 @@ struct scsi_device { */ unsigned manage_shutdown:1; + /* + * If set and if the device is runtime suspended, ask the high-level + * device driver (sd) to force a runtime resume of the device. + */ + unsigned force_runtime_start_on_system_start:1; + unsigned removable:1; unsigned changed:1; /* Data invalid due to media change */ unsigned busy:1; /* Used to prevent races */ From ff7699d3620763b0dfe2ff93df4528880bf903a8 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Wed, 15 Nov 2023 21:10:24 +0800 Subject: [PATCH 0664/1397] scsi: ufs: core: Clear cmd if abort succeeds in MCQ mode commit 93e6c0e19d5bb12b49534a411c85e21d333731fa upstream. In MCQ mode, if cmd is pending in device and abort succeeds, response will not be returned by device. So we need clear the cmd, otherwise timeout will happen and next time we use same tag we will get a WARN_ON(lrbp->cmd). Below is error log: <3>[ 2277.447611][T21376] ufshcd-mtk 112b0000.ufshci: ufshcd_try_to_abort_task: cmd pending in the device. tag = 7 <3>[ 2277.476954][T21376] ufshcd-mtk 112b0000.ufshci: Aborting tag 7 / CDB 0x2a succeeded <6>[ 2307.551263][T30974] ufshcd-mtk 112b0000.ufshci: ufshcd_abort: Device abort task at tag 7 <4>[ 2307.623264][ T327] WARNING: CPU: 5 PID: 327 at source/drivers/ufs/core/ufshcd.c:3021 ufshcd_queuecommand+0x66c/0xe34 Fixes: ab248643d3d6 ("scsi: ufs: core: Add error handling for MCQ mode") Cc: Signed-off-by: Peter Wang Link: https://lore.kernel.org/r/20231115131024.15829-1-peter.wang@mediatek.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/ufs/core/ufshcd.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index a4b483a393c9..170fbd5715b2 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -6347,11 +6347,24 @@ static bool ufshcd_abort_one(struct request *rq, void *priv) struct scsi_device *sdev = cmd->device; struct Scsi_Host *shost = sdev->host; struct ufs_hba *hba = shost_priv(shost); + struct ufshcd_lrb *lrbp = &hba->lrb[tag]; + struct ufs_hw_queue *hwq; + unsigned long flags; *ret = ufshcd_try_to_abort_task(hba, tag); dev_err(hba->dev, "Aborting tag %d / CDB %#02x %s\n", tag, hba->lrb[tag].cmd ? hba->lrb[tag].cmd->cmnd[0] : -1, *ret ? "failed" : "succeeded"); + + /* Release cmd in MCQ mode if abort succeeds */ + if (is_mcq_enabled(hba) && (*ret == 0)) { + hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(lrbp->cmd)); + spin_lock_irqsave(&hwq->cq_lock, flags); + if (ufshcd_cmd_inflight(lrbp->cmd)) + ufshcd_release_scsi_cmd(hba, lrbp); + spin_unlock_irqrestore(&hwq->cq_lock, flags); + } + return *ret == 0; } From 49227bea27ebcd260f0c94a3055b14bbd8605c5e Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 24 Nov 2023 09:56:32 -0600 Subject: [PATCH 0665/1397] drm/amd: Enable PCIe PME from D3 commit 6967741d26c87300a51b5e50d4acd104bc1a9759 upstream. When dGPU is put into BOCO it may be in D3cold but still able send PME on display hotplug event. For this to work it must be enabled as wake source from D3. When runpm is enabled use pci_wake_from_d3() to mark wakeup as enabled by default. Cc: stable@vger.kernel.org # 6.1+ Signed-off-by: Mario Limonciello Acked-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 81edf66dbea8..2c35036e4ba2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2195,6 +2195,8 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); + pci_wake_from_d3(pdev, TRUE); + /* * For runpm implemented via BACO, PMFW will handle the * timing for BACO in and out: From f6f4be2c39dd841f5fb88fb1248283d37237a6c9 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Wed, 8 Nov 2023 14:38:29 +0800 Subject: [PATCH 0666/1397] drm/amdgpu: correct the amdgpu runtime dereference usage count commit c6df7f313794c3ad41a49b9a7c95da369db607f3 upstream. Fix the amdgpu runpm dereference usage count. Signed-off-by: Prike Liang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 363e6a2cad8c..578aeba49ea8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -340,14 +340,11 @@ int amdgpu_display_crtc_set_config(struct drm_mode_set *set, adev->have_disp_power_ref = true; return ret; } - /* if we have no active crtcs, then drop the power ref - * we got before + /* if we have no active crtcs, then go to + * drop the power ref we got before */ - if (!active && adev->have_disp_power_ref) { - pm_runtime_put_autosuspend(dev->dev); + if (!active && adev->have_disp_power_ref) adev->have_disp_power_ref = false; - } - out: /* drop the power reference we got coming in here */ pm_runtime_put_autosuspend(dev->dev); From 5ffd8c73d1d0f38706951c4ab8710cb6c6cdde45 Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Mon, 20 Nov 2023 11:31:32 -0600 Subject: [PATCH 0667/1397] drm/amdgpu: Force order between a read and write to the same address commit 4b27a33c3b173bef1d19ba89e0b9b812b4fddd25 upstream. Setting register to force ordering to prevent read/write or write/read hazards for un-cached modes. Signed-off-by: Alex Sierra Acked-by: Alex Deucher Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 8 ++++++++ .../gpu/drm/amd/include/asic_reg/gc/gc_11_0_0_offset.h | 2 ++ 2 files changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 43d066bc5245..82e01bf407fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -83,6 +83,10 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin"); +static const struct soc15_reg_golden golden_settings_gc_11_0[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL, 0x20000000, 0x20000000) +}; + static const struct soc15_reg_golden golden_settings_gc_11_0_1[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010), @@ -275,6 +279,10 @@ static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev) default: break; } + soc15_program_register_sequence(adev, + golden_settings_gc_11_0, + (const u32)ARRAY_SIZE(golden_settings_gc_11_0)); + } static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_0_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_0_0_offset.h index c92c4b83253f..4bff1ef8a9a6 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_0_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_0_0_offset.h @@ -6369,6 +6369,8 @@ #define regTCP_INVALIDATE_BASE_IDX 1 #define regTCP_STATUS 0x19a1 #define regTCP_STATUS_BASE_IDX 1 +#define regTCP_CNTL 0x19a2 +#define regTCP_CNTL_BASE_IDX 1 #define regTCP_CNTL2 0x19a3 #define regTCP_CNTL2_BASE_IDX 1 #define regTCP_DEBUG_INDEX 0x19a5 From 6dfccebfecc4b99f64d25351bb6ac69f1a6926e2 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Tue, 21 Nov 2023 11:06:51 +0800 Subject: [PATCH 0668/1397] drm/amdgpu: fix memory overflow in the IB test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6b0b7789a7a5f3e69185449f891beea58e563f9b upstream. Fix a memory overflow issue in the gfx IB test for some ASICs. At least 20 bytes are needed for the IB test packet. v2: correct code indentation errors. (Christian) Signed-off-by: Tim Huang Reviewed-by: Yifan Zhang Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 82e01bf407fc..b346eb0a0db1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -398,7 +398,7 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); cpu_ptr = &adev->wb.wb[index]; - r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib); + r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); if (r) { DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); goto err1; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 885ebd703260..1943beb135c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -883,8 +883,8 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) gpu_addr = adev->wb.gpu_addr + (index * 4); adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); memset(&ib, 0, sizeof(ib)); - r = amdgpu_ib_get(adev, NULL, 16, - AMDGPU_IB_POOL_DIRECT, &ib); + + r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); if (r) goto err1; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index fd61574a737c..2e23d08b45f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1039,8 +1039,8 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) gpu_addr = adev->wb.gpu_addr + (index * 4); adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); memset(&ib, 0, sizeof(ib)); - r = amdgpu_ib_get(adev, NULL, 16, - AMDGPU_IB_POOL_DIRECT, &ib); + + r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); if (r) goto err1; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 18ce5fe45f6f..e481ef73af6e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -296,8 +296,8 @@ static int gfx_v9_4_3_ring_test_ib(struct amdgpu_ring *ring, long timeout) gpu_addr = adev->wb.gpu_addr + (index * 4); adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); memset(&ib, 0, sizeof(ib)); - r = amdgpu_ib_get(adev, NULL, 16, - AMDGPU_IB_POOL_DIRECT, &ib); + + r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); if (r) goto err1; From c4307207dc42c80e34eb9c496ac8dbd2bfff1778 Mon Sep 17 00:00:00 2001 From: Candice Li Date: Fri, 24 Nov 2023 09:33:47 +0800 Subject: [PATCH 0669/1397] drm/amdgpu: Update EEPROM I2C address for smu v13_0_0 commit e0409021e34af50e7b6f31635c8d21583d7c43dd upstream. Check smu v13_0_0 SKU type to select EEPROM I2C address. Signed-off-by: Candice Li Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 595d5e535aca..9d82701d365b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -214,6 +214,12 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev, control->i2c_address = EEPROM_I2C_MADDR_0; return true; case IP_VERSION(13, 0, 0): + if (strnstr(atom_ctx->vbios_pn, "D707", + sizeof(atom_ctx->vbios_pn))) + control->i2c_address = EEPROM_I2C_MADDR_0; + else + control->i2c_address = EEPROM_I2C_MADDR_4; + return true; case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 10): control->i2c_address = EEPROM_I2C_MADDR_4; From 8ffa40285eb7561a2ce12ff31c7f59e0ca395d87 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Mon, 6 Nov 2023 11:20:15 -0500 Subject: [PATCH 0670/1397] drm/amd/display: Include udelay when waiting for INBOX0 ACK commit 3c9ea68cb61bd7e5bd312c06a12adada74ff5805 upstream. When waiting for the ACK for INBOX0 message, we have to ensure to include the udelay for proper wait time Cc: stable@vger.kernel.org # 6.1+ Reviewed-by: Samson Tam Acked-by: Hamza Mahfooz Signed-off-by: Alvin Lee Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c index d1922dde5b8b..6c45e216c709 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c @@ -997,6 +997,7 @@ enum dmub_status dmub_srv_wait_for_inbox0_ack(struct dmub_srv *dmub, uint32_t ti ack = dmub->hw_funcs.read_inbox0_ack_register(dmub); if (ack) return DMUB_STATUS_OK; + udelay(1); } return DMUB_STATUS_TIMEOUT; } From 7fa19a72cc79c1fa62af0818a73f47e68d57612f Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 8 Nov 2023 10:55:53 -0500 Subject: [PATCH 0671/1397] drm/amd/display: Remove min_dst_y_next_start check for Z8 commit 08448812acb2ab701cd5ff7e1a1dc97f7f10260c upstream. [Why] Flickering occurs on DRR supported panels when engaged in DRR due to min_dst_y_next becoming larger than the frame size itself. [How] In general, we should be able to enter Z8 when this is engaged but it might be a net power loss even if the calculation wasn't bugged. Don't support enabling Z8 during the DRR region. Cc: stable@vger.kernel.org # 6.1+ Reviewed-by: Syed Hassan Acked-by: Hamza Mahfooz Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- .../gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c index 5805fb02af14..f2de0c758494 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -948,10 +948,8 @@ static enum dcn_zstate_support_state decide_zstate_support(struct dc *dc, struc { int plane_count; int i; - unsigned int min_dst_y_next_start_us; plane_count = 0; - min_dst_y_next_start_us = 0; for (i = 0; i < dc->res_pool->pipe_count; i++) { if (context->res_ctx.pipe_ctx[i].plane_state) plane_count++; @@ -973,26 +971,15 @@ static enum dcn_zstate_support_state decide_zstate_support(struct dc *dc, struc else if (context->stream_count == 1 && context->streams[0]->signal == SIGNAL_TYPE_EDP) { struct dc_link *link = context->streams[0]->sink->link; struct dc_stream_status *stream_status = &context->stream_status[0]; - struct dc_stream_state *current_stream = context->streams[0]; int minmum_z8_residency = dc->debug.minimum_z8_residency_time > 0 ? dc->debug.minimum_z8_residency_time : 1000; bool allow_z8 = context->bw_ctx.dml.vba.StutterPeriod > (double)minmum_z8_residency; bool is_pwrseq0 = link->link_index == 0; - bool isFreesyncVideo; - - isFreesyncVideo = current_stream->adjust.v_total_min == current_stream->adjust.v_total_max; - isFreesyncVideo = isFreesyncVideo && current_stream->timing.v_total < current_stream->adjust.v_total_min; - for (i = 0; i < dc->res_pool->pipe_count; i++) { - if (context->res_ctx.pipe_ctx[i].stream == current_stream && isFreesyncVideo) { - min_dst_y_next_start_us = context->res_ctx.pipe_ctx[i].dlg_regs.min_dst_y_next_start_us; - break; - } - } /* Don't support multi-plane configurations */ if (stream_status->plane_count > 1) return DCN_ZSTATE_SUPPORT_DISALLOW; - if (is_pwrseq0 && (context->bw_ctx.dml.vba.StutterPeriod > 5000.0 || min_dst_y_next_start_us > 5000)) + if (is_pwrseq0 && context->bw_ctx.dml.vba.StutterPeriod > 5000.0) return DCN_ZSTATE_SUPPORT_ALLOW; else if (is_pwrseq0 && link->psr_settings.psr_version == DC_PSR_VERSION_1 && !link->panel_config.psr.disable_psr) return allow_z8 ? DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY : DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY; From 1191c0f6e938aa16f01edd72f5735cfb5a732180 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Tue, 7 Nov 2023 17:01:49 -0500 Subject: [PATCH 0672/1397] drm/amd/display: Use DRAM speed from validation for dummy p-state commit 9be601135ba8ac69880c01606c82140f2dde105e upstream. [Description] When choosing which dummy p-state latency to use, we need to use the DRAM speed from validation. The DRAMSpeed DML variable can change because we use different input params to DML when populating watermarks set B. Cc: stable@vger.kernel.org # 6.1+ Reviewed-by: Samson Tam Acked-by: Hamza Mahfooz Signed-off-by: Alvin Lee Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 711d4085b33b..cf3b400c8619 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -1964,6 +1964,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, int i, pipe_idx, vlevel_temp = 0; double dcfclk = dcn3_2_soc.clock_limits[0].dcfclk_mhz; double dcfclk_from_validation = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; + double dram_speed_from_validation = context->bw_ctx.dml.vba.DRAMSpeed; double dcfclk_from_fw_based_mclk_switching = dcfclk_from_validation; bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] != dm_dram_clock_change_unsupported; @@ -2151,7 +2152,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, } if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) { - min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed; + min_dram_speed_mts = dram_speed_from_validation; min_dram_speed_mts_margin = 160; context->bw_ctx.dml.soc.dram_clock_change_latency_us = From 11831e89aae642e8991adfda46c20286ff629348 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 8 Nov 2023 10:59:00 -0500 Subject: [PATCH 0673/1397] drm/amd/display: Update min Z8 residency time to 2100 for DCN314 commit 4636a211980052ca0df90265c8a3ed2d46099091 upstream. [Why] Some panels with residency period of 2054 exhibit flickering with Z8 at the end of the frame. [How] As a workaround, increase the limit to block these panels. Cc: stable@vger.kernel.org # 6.1+ Reviewed-by: Syed Hassan Acked-by: Hamza Mahfooz Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c index 004beed9bd44..3e65e683db0a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c @@ -869,7 +869,7 @@ static const struct dc_plane_cap plane_cap = { static const struct dc_debug_options debug_defaults_drv = { .disable_z10 = false, .enable_z9_disable_interface = true, - .minimum_z8_residency_time = 2000, + .minimum_z8_residency_time = 2100, .psr_skip_crtc_disable = true, .replay_skip_crtc_disabled = true, .disable_dmcu = true, From 316d6072bcc07aad792718054092f5eb542293ff Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Wed, 22 Nov 2023 14:50:34 -0500 Subject: [PATCH 0674/1397] drm/amd/display: fix ABM disablement commit b9f46f0b98784e40288ee393f863f553fde062fa upstream. On recent versions of DMUB firmware, if we want to completely disable ABM we have to pass ABM_LEVEL_IMMEDIATE_DISABLE as the requested ABM level to DMUB. Otherwise, LCD eDP displays are unable to reach their maximum brightness levels. So, to fix this whenever the user requests an ABM level of 0 pass ABM_LEVEL_IMMEDIATE_DISABLE to DMUB instead. Also, to keep the user's experience consistent map ABM_LEVEL_IMMEDIATE_DISABLE to 0 when a user tries to read the requested ABM level. Cc: stable@vger.kernel.org # 6.1+ Reviewed-by: Harry Wentland Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index deedcd997845..f5fdb61c821d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -6236,7 +6236,7 @@ int amdgpu_dm_connector_atomic_set_property(struct drm_connector *connector, dm_new_state->underscan_enable = val; ret = 0; } else if (property == adev->mode_info.abm_level_property) { - dm_new_state->abm_level = val; + dm_new_state->abm_level = val ?: ABM_LEVEL_IMMEDIATE_DISABLE; ret = 0; } @@ -6281,7 +6281,8 @@ int amdgpu_dm_connector_atomic_get_property(struct drm_connector *connector, *val = dm_state->underscan_enable; ret = 0; } else if (property == adev->mode_info.abm_level_property) { - *val = dm_state->abm_level; + *val = (dm_state->abm_level != ABM_LEVEL_IMMEDIATE_DISABLE) ? + dm_state->abm_level : 0; ret = 0; } @@ -6354,7 +6355,8 @@ void amdgpu_dm_connector_funcs_reset(struct drm_connector *connector) state->pbn = 0; if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) - state->abm_level = amdgpu_dm_abm_level; + state->abm_level = amdgpu_dm_abm_level ?: + ABM_LEVEL_IMMEDIATE_DISABLE; __drm_atomic_helper_connector_reset(connector, &state->base); } From a29a8e06dc8e7f95f97de112ef43c53cdc1a72cd Mon Sep 17 00:00:00 2001 From: Zhongwei Date: Wed, 8 Nov 2023 16:34:36 +0800 Subject: [PATCH 0675/1397] drm/amd/display: force toggle rate wa for first link training for a retimer commit eb28018943fed7639dfea1c9ec9c756ec692b99a upstream. [WHY] Handover from DMUB to driver does not perform link rate toggle. It might cause link training failure for boot up. [HOW] Force toggle rate wa for first link train. link->vendor_specific_lttpr_link_rate_wa should be zero then. Cc: stable@vger.kernel.org # 6.1+ Reviewed-by: Michael Strauss Acked-by: Hamza Mahfooz Signed-off-by: Zhongwei Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- .../link/protocols/link_dp_training_fixed_vs_pe_retimer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c index fd8f6f198146..68096d12f52f 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c @@ -115,7 +115,7 @@ static enum link_training_result perform_fixed_vs_pe_nontransparent_training_seq lt_settings->cr_pattern_time = 16000; /* Fixed VS/PE specific: Toggle link rate */ - apply_toggle_rate_wa = (link->vendor_specific_lttpr_link_rate_wa == target_rate); + apply_toggle_rate_wa = ((link->vendor_specific_lttpr_link_rate_wa == target_rate) || (link->vendor_specific_lttpr_link_rate_wa == 0)); target_rate = get_dpcd_link_rate(<_settings->link_settings); toggle_rate = (target_rate == 0x6) ? 0xA : 0x6; @@ -271,7 +271,7 @@ enum link_training_result dp_perform_fixed_vs_pe_training_sequence_legacy( /* Vendor specific: Toggle link rate */ toggle_rate = (rate == 0x6) ? 0xA : 0x6; - if (link->vendor_specific_lttpr_link_rate_wa == rate) { + if (link->vendor_specific_lttpr_link_rate_wa == rate || link->vendor_specific_lttpr_link_rate_wa == 0) { core_link_write_dpcd( link, DP_LINK_BW_SET, @@ -617,7 +617,7 @@ enum link_training_result dp_perform_fixed_vs_pe_training_sequence( /* Vendor specific: Toggle link rate */ toggle_rate = (rate == 0x6) ? 0xA : 0x6; - if (link->vendor_specific_lttpr_link_rate_wa == rate) { + if (link->vendor_specific_lttpr_link_rate_wa == rate || link->vendor_specific_lttpr_link_rate_wa == 0) { core_link_write_dpcd( link, DP_LINK_BW_SET, From 590df5e4e86551263702cf65f609918a5e485449 Mon Sep 17 00:00:00 2001 From: Wu Bo Date: Tue, 21 Nov 2023 20:51:49 -0700 Subject: [PATCH 0676/1397] dm verity: initialize fec io before freeing it commit 7be05bdfb4efc1396f7692562c7161e2b9f595f1 upstream. If BIO error, verity_end_io() can call verity_finish_io() before verity_fec_init_io(). Therefore, fec_io->rs is not initialized and may crash when doing memory freeing in verity_fec_finish_io(). Crash call stack: die+0x90/0x2b8 __do_kernel_fault+0x260/0x298 do_bad_area+0x2c/0xdc do_translation_fault+0x3c/0x54 do_mem_abort+0x54/0x118 el1_abort+0x38/0x5c el1h_64_sync_handler+0x50/0x90 el1h_64_sync+0x64/0x6c free_rs+0x18/0xac fec_rs_free+0x10/0x24 mempool_free+0x58/0x148 verity_fec_finish_io+0x4c/0xb0 verity_end_io+0xb8/0x150 Cc: stable@vger.kernel.org # v6.0+ Fixes: 5721d4e5a9cd ("dm verity: Add optional "try_verify_in_tasklet" feature") Signed-off-by: Wu Bo Reviewed-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-verity-target.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index e115fcfe723c..beec14b6b044 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -642,7 +642,6 @@ static void verity_work(struct work_struct *w) io->in_tasklet = false; - verity_fec_init_io(io); verity_finish_io(io, errno_to_blk_status(verity_verify_io(io))); } @@ -792,6 +791,8 @@ static int verity_map(struct dm_target *ti, struct bio *bio) bio->bi_private = io; io->iter = bio->bi_iter; + verity_fec_init_io(io); + verity_submit_prefetch(v, io); submit_bio_noacct(bio); From 7ec7652671502d99e9774af581b2a99c25fc8ab2 Mon Sep 17 00:00:00 2001 From: Wu Bo Date: Tue, 21 Nov 2023 20:51:50 -0700 Subject: [PATCH 0677/1397] dm verity: don't perform FEC for failed readahead IO commit 0193e3966ceeeef69e235975918b287ab093082b upstream. We found an issue under Android OTA scenario that many BIOs have to do FEC where the data under dm-verity is 100% complete and no corruption. Android OTA has many dm-block layers, from upper to lower: dm-verity dm-snapshot dm-origin & dm-cow dm-linear ufs DM tables have to change 2 times during Android OTA merging process. When doing table change, the dm-snapshot will be suspended for a while. During this interval, many readahead IOs are submitted to dm_verity from filesystem. Then the kverity works are busy doing FEC process which cost too much time to finish dm-verity IO. This causes needless delay which feels like system is hung. After adding debugging it was found that each readahead IO needed around 10s to finish when this situation occurred. This is due to IO amplification: dm-snapshot suspend erofs_readahead // 300+ io is submitted dm_submit_bio (dm_verity) dm_submit_bio (dm_snapshot) bio return EIO bio got nothing, it's empty verity_end_io verity_verify_io forloop range(0, io->n_blocks) // each io->nblocks ~= 20 verity_fec_decode fec_decode_rsb fec_read_bufs forloop range(0, v->fec->rsn) // v->fec->rsn = 253 new_read submit_bio (dm_snapshot) end loop end loop dm-snapshot resume Readahead BIOs get nothing while dm-snapshot is suspended, so all of them will cause verity's FEC. Each readahead BIO needs to verify ~20 (io->nblocks) blocks. Each block needs to do FEC, and every block needs to do 253 (v->fec->rsn) reads. So during the suspend interval(~200ms), 300 readahead BIOs trigger ~1518000 (300*20*253) IOs to dm-snapshot. As readahead IO is not required by userspace, and to fix this issue, it is best to pass readahead errors to upper layer to handle it. Cc: stable@vger.kernel.org Fixes: a739ff3f543a ("dm verity: add support for forward error correction") Signed-off-by: Wu Bo Reviewed-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-verity-target.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index beec14b6b044..14e58ae70521 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -667,7 +667,9 @@ static void verity_end_io(struct bio *bio) struct dm_verity_io *io = bio->bi_private; if (bio->bi_status && - (!verity_fec_is_enabled(io->v) || verity_is_system_shutting_down())) { + (!verity_fec_is_enabled(io->v) || + verity_is_system_shutting_down() || + (bio->bi_opf & REQ_RAHEAD))) { verity_finish_io(io, bio->bi_status); return; } From 2bda53caf1eda7f0a54fd446df66bb251522bd09 Mon Sep 17 00:00:00 2001 From: "Ewan D. Milne" Date: Mon, 27 Nov 2023 15:56:57 -0500 Subject: [PATCH 0678/1397] nvme: check for valid nvme_identify_ns() before using it commit d8b90d600aff181936457f032d116dbd8534db06 upstream. When scanning namespaces, it is possible to get valid data from the first call to nvme_identify_ns() in nvme_alloc_ns(), but not from the second call in nvme_update_ns_info_block(). In particular, if the NSID becomes inactive between the two commands, a storage device may return a buffer filled with zero as per 4.1.5.1. In this case, we can get a kernel crash due to a divide-by-zero in blk_stack_limits() because ns->lba_shift will be set to zero. PID: 326 TASK: ffff95fec3cd8000 CPU: 29 COMMAND: "kworker/u98:10" #0 [ffffad8f8702f9e0] machine_kexec at ffffffff91c76ec7 #1 [ffffad8f8702fa38] __crash_kexec at ffffffff91dea4fa #2 [ffffad8f8702faf8] crash_kexec at ffffffff91deb788 #3 [ffffad8f8702fb00] oops_end at ffffffff91c2e4bb #4 [ffffad8f8702fb20] do_trap at ffffffff91c2a4ce #5 [ffffad8f8702fb70] do_error_trap at ffffffff91c2a595 #6 [ffffad8f8702fbb0] exc_divide_error at ffffffff928506e6 #7 [ffffad8f8702fbd0] asm_exc_divide_error at ffffffff92a00926 [exception RIP: blk_stack_limits+434] RIP: ffffffff92191872 RSP: ffffad8f8702fc80 RFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff95efa0c91800 RCX: 0000000000000001 RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000001 RBP: 00000000ffffffff R8: ffff95fec7df35a8 R9: 0000000000000000 R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff95fed33c09a8 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #8 [ffffad8f8702fce0] nvme_update_ns_info_block at ffffffffc06d3533 [nvme_core] #9 [ffffad8f8702fd18] nvme_scan_ns at ffffffffc06d6fa7 [nvme_core] This happened when the check for valid data was moved out of nvme_identify_ns() into one of the callers. Fix this by checking in both callers. Link: https://bugzilla.kernel.org/show_bug.cgi?id=218186 Fixes: 0dd6fff2aad4 ("nvme: bring back auto-removal of deleted namespaces during sequential scan") Cc: stable@vger.kernel.org Signed-off-by: Ewan D. Milne Signed-off-by: Keith Busch Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/core.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 21783aa2ee8e..c09048984a27 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2026,6 +2026,13 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, if (ret) return ret; + if (id->ncap == 0) { + /* namespace not allocated or attached */ + info->is_removed = true; + ret = -ENODEV; + goto error; + } + blk_mq_freeze_queue(ns->disk->queue); lbaf = nvme_lbaf_index(id->flbas); ns->lba_shift = id->lbaf[lbaf].ds; @@ -2083,6 +2090,8 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, set_bit(NVME_NS_READY, &ns->flags); ret = 0; } + +error: kfree(id); return ret; } From fbe43b7f02a2b578d547cec93dbca81e94c0dbb5 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 26 Nov 2023 19:36:46 +0100 Subject: [PATCH 0679/1397] r8169: fix deadlock on RTL8125 in jumbo mtu mode commit 59d395ed606d8df14615712b0cdcdadb2d962175 upstream. The original change results in a deadlock if jumbo mtu mode is used. Reason is that the phydev lock is held when rtl_reset_work() is called here, and rtl_jumbo_config() calls phy_start_aneg() which also tries to acquire the phydev lock. Fix this by calling rtl_reset_work() asynchronously. Fixes: 621735f59064 ("r8169: fix rare issue with broken rx after link-down on RTL8125") Reported-by: Ian Chen Tested-by: Ian Chen Cc: stable@vger.kernel.org Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/caf6a487-ef8c-4570-88f9-f47a659faf33@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/realtek/r8169_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 295366a85c63..a43e33e4b25e 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -579,6 +579,7 @@ struct rtl8169_tc_offsets { enum rtl_flag { RTL_FLAG_TASK_ENABLED = 0, RTL_FLAG_TASK_RESET_PENDING, + RTL_FLAG_TASK_RESET_NO_QUEUE_WAKE, RTL_FLAG_TASK_TX_TIMEOUT, RTL_FLAG_MAX }; @@ -4582,6 +4583,8 @@ static void rtl_task(struct work_struct *work) reset: rtl_reset_work(tp); netif_wake_queue(tp->dev); + } else if (test_and_clear_bit(RTL_FLAG_TASK_RESET_NO_QUEUE_WAKE, tp->wk.flags)) { + rtl_reset_work(tp); } out_unlock: rtnl_unlock(); @@ -4615,7 +4618,7 @@ static void r8169_phylink_handler(struct net_device *ndev) } else { /* In few cases rx is broken after link-down otherwise */ if (rtl_is_8125(tp)) - rtl_reset_work(tp); + rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_NO_QUEUE_WAKE); pm_runtime_idle(d); } From 0d82c163e909b7fd7cef8be44451597aaf4e94a7 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 27 Nov 2023 15:37:41 +0100 Subject: [PATCH 0680/1397] ACPI: video: Use acpi_video_device for cooling-dev driver data commit 172c48caed91a978bca078042222d09baea13717 upstream. The acpi_video code was storing the acpi_video_device as driver_data in the acpi_device children of the acpi_video_bus acpi_device. But the acpi_video driver only binds to the bus acpi_device. It uses, but does not bind to, the children. Since it is not the driver it should not be using the driver_data of the children's acpi_device-s. Since commit 0d16710146a1 ("ACPI: bus: Set driver_data to NULL every time .add() fails") the childen's driver_data ends up getting set to NULL after a driver fails to bind to the children leading to a NULL pointer deref in video_get_max_state when registering the cooling-dev: [ 3.148958] BUG: kernel NULL pointer dereference, address: 0000000000000090 [ 3.149015] Hardware name: Sony Corporation VPCSB2X9R/VAIO, BIOS R2087H4 06/15/2012 [ 3.149021] RIP: 0010:video_get_max_state+0x17/0x30 [video] [ 3.149105] Call Trace: [ 3.149110] [ 3.149114] ? __die+0x23/0x70 [ 3.149126] ? page_fault_oops+0x171/0x4e0 [ 3.149137] ? exc_page_fault+0x7f/0x180 [ 3.149147] ? asm_exc_page_fault+0x26/0x30 [ 3.149158] ? video_get_max_state+0x17/0x30 [video 9b6f3f0d19d7b4a0e2df17a2d8b43bc19c2ed71f] [ 3.149176] ? __pfx_video_get_max_state+0x10/0x10 [video 9b6f3f0d19d7b4a0e2df17a2d8b43bc19c2ed71f] [ 3.149192] __thermal_cooling_device_register.part.0+0xf2/0x2f0 [ 3.149205] acpi_video_bus_register_backlight.part.0.isra.0+0x414/0x570 [video 9b6f3f0d19d7b4a0e2df17a2d8b43bc19c2ed71f] [ 3.149227] acpi_video_register_backlight+0x57/0x80 [video 9b6f3f0d19d7b4a0e2df17a2d8b43bc19c2ed71f] [ 3.149245] intel_acpi_video_register+0x68/0x90 [i915 1f3a758130b32ef13d301d4f8f78c7d766d57f2a] [ 3.149669] intel_display_driver_register+0x28/0x50 [i915 1f3a758130b32ef13d301d4f8f78c7d766d57f2a] [ 3.150064] i915_driver_probe+0x790/0xb90 [i915 1f3a758130b32ef13d301d4f8f78c7d766d57f2a] [ 3.150402] local_pci_probe+0x45/0xa0 [ 3.150412] pci_device_probe+0xc1/0x260 Fix this by directly using the acpi_video_device as devdata for the cooling-device, which avoids the need to set driver-data on the children at all. Fixes: 0d16710146a1 ("ACPI: bus: Set driver_data to NULL every time .add() fails") Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/9718 Cc: 6.6+ # 6.6+ Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpi_video.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index 4e868454b38d..35f071ad9532 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -253,8 +253,7 @@ static const struct backlight_ops acpi_backlight_ops = { static int video_get_max_state(struct thermal_cooling_device *cooling_dev, unsigned long *state) { - struct acpi_device *device = cooling_dev->devdata; - struct acpi_video_device *video = acpi_driver_data(device); + struct acpi_video_device *video = cooling_dev->devdata; *state = video->brightness->count - ACPI_VIDEO_FIRST_LEVEL - 1; return 0; @@ -263,8 +262,7 @@ static int video_get_max_state(struct thermal_cooling_device *cooling_dev, static int video_get_cur_state(struct thermal_cooling_device *cooling_dev, unsigned long *state) { - struct acpi_device *device = cooling_dev->devdata; - struct acpi_video_device *video = acpi_driver_data(device); + struct acpi_video_device *video = cooling_dev->devdata; unsigned long long level; int offset; @@ -283,8 +281,7 @@ static int video_get_cur_state(struct thermal_cooling_device *cooling_dev, static int video_set_cur_state(struct thermal_cooling_device *cooling_dev, unsigned long state) { - struct acpi_device *device = cooling_dev->devdata; - struct acpi_video_device *video = acpi_driver_data(device); + struct acpi_video_device *video = cooling_dev->devdata; int level; if (state >= video->brightness->count - ACPI_VIDEO_FIRST_LEVEL) @@ -1125,7 +1122,6 @@ static int acpi_video_bus_get_one_device(struct acpi_device *device, void *arg) strcpy(acpi_device_name(device), ACPI_VIDEO_DEVICE_NAME); strcpy(acpi_device_class(device), ACPI_VIDEO_CLASS); - device->driver_data = data; data->device_id = device_id; data->video = video; @@ -1747,8 +1743,8 @@ static void acpi_video_dev_register_backlight(struct acpi_video_device *device) device->backlight->props.brightness = acpi_video_get_brightness(device->backlight); - device->cooling_dev = thermal_cooling_device_register("LCD", - device->dev, &video_cooling_ops); + device->cooling_dev = thermal_cooling_device_register("LCD", device, + &video_cooling_ops); if (IS_ERR(device->cooling_dev)) { /* * Set cooling_dev to NULL so we don't crash trying to free it. From 4be625ba36bcfdfb9c72b712acd4ef943a269ebb Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 24 Nov 2023 21:02:01 -0700 Subject: [PATCH 0681/1397] io_uring: don't allow discontig pages for IORING_SETUP_NO_MMAP commit 820d070feb668aab5bc9413c285a1dda2a70e076 upstream. io_sqes_map() is used rather than io_mem_alloc(), if the application passes in memory for mapping rather than have the kernel allocate it and then mmap(2) the ranges. This then calls __io_uaddr_map() to perform the page mapping and pinning, which checks if we end up with the same pages, if more than one page is mapped. But this check is incorrect and only checks if the first and last pages are the same, where it really should be checking if the mapped pages are contigous. This allows mapping a single normal page, or a huge page range. Down the line we can add support for remapping pages to be virtually contigous, which is really all that io_uring cares about. Cc: stable@vger.kernel.org Fixes: 03d89a2de25b ("io_uring: support for user allocated memory for rings/sqes") Reported-by: Jann Horn Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 8d1bc6cdfe71..44c8512a0355 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2690,6 +2690,7 @@ static void *__io_uaddr_map(struct page ***pages, unsigned short *npages, { struct page **page_array; unsigned int nr_pages; + void *page_addr; int ret, i; *npages = 0; @@ -2711,27 +2712,29 @@ static void *__io_uaddr_map(struct page ***pages, unsigned short *npages, io_pages_free(&page_array, ret > 0 ? ret : 0); return ret < 0 ? ERR_PTR(ret) : ERR_PTR(-EFAULT); } - /* - * Should be a single page. If the ring is small enough that we can - * use a normal page, that is fine. If we need multiple pages, then - * userspace should use a huge page. That's the only way to guarantee - * that we get contigious memory, outside of just being lucky or - * (currently) having low memory fragmentation. - */ - if (page_array[0] != page_array[ret - 1]) - goto err; - /* - * Can't support mapping user allocated ring memory on 32-bit archs - * where it could potentially reside in highmem. Just fail those with - * -EINVAL, just like we did on kernels that didn't support this - * feature. - */ + page_addr = page_address(page_array[0]); for (i = 0; i < nr_pages; i++) { - if (PageHighMem(page_array[i])) { - ret = -EINVAL; + ret = -EINVAL; + + /* + * Can't support mapping user allocated ring memory on 32-bit + * archs where it could potentially reside in highmem. Just + * fail those with -EINVAL, just like we did on kernels that + * didn't support this feature. + */ + if (PageHighMem(page_array[i])) goto err; - } + + /* + * No support for discontig pages for now, should either be a + * single normal page, or a huge page. Later on we can add + * support for remapping discontig pages, for now we will + * just fail them with EINVAL. + */ + if (page_address(page_array[i]) != page_addr) + goto err; + page_addr += PAGE_SIZE; } *pages = page_array; From d964a58c62cf86c9ab63c15fb9601838ce48c835 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 22 Nov 2023 11:26:07 +0800 Subject: [PATCH 0682/1397] iommu/vt-d: Fix incorrect cache invalidation for mm notification commit e7ad6c2a4b1aa710db94060b716f53c812cef565 upstream. Commit 6bbd42e2df8f ("mmu_notifiers: call invalidate_range() when invalidating TLBs") moved the secondary TLB invalidations into the TLB invalidation functions to ensure that all secondary TLB invalidations happen at the same time as the CPU invalidation and added a flush-all type of secondary TLB invalidation for the batched mode, where a range of [0, -1UL) is used to indicates that the range extends to the end of the address space. However, using an end address of -1UL caused an overflow in the Intel IOMMU driver, where the end address was rounded up to the next page. As a result, both the IOTLB and device ATC were not invalidated correctly. Add a flush all helper function and call it when the invalidation range is from 0 to -1UL, ensuring that the entire caches are invalidated correctly. Fixes: 6bbd42e2df8f ("mmu_notifiers: call invalidate_range() when invalidating TLBs") Cc: stable@vger.kernel.org Cc: Huang Ying Cc: Alistair Popple Tested-by: Luo Yuzhang # QAT Tested-by: Tony Zhu # DSA Reviewed-by: Jason Gunthorpe Reviewed-by: Alistair Popple Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20231117090933.75267-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/intel/svm.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 50a481c895b8..ac12f76c1212 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -216,6 +216,27 @@ static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address, rcu_read_unlock(); } +static void intel_flush_svm_all(struct intel_svm *svm) +{ + struct device_domain_info *info; + struct intel_svm_dev *sdev; + + rcu_read_lock(); + list_for_each_entry_rcu(sdev, &svm->devs, list) { + info = dev_iommu_priv_get(sdev->dev); + + qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, 0, -1UL, 0); + if (info->ats_enabled) { + qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid, + svm->pasid, sdev->qdep, + 0, 64 - VTD_PAGE_SHIFT); + quirk_extra_dev_tlb_flush(info, 0, 64 - VTD_PAGE_SHIFT, + svm->pasid, sdev->qdep); + } + } + rcu_read_unlock(); +} + /* Pages have been freed at this point */ static void intel_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn, struct mm_struct *mm, @@ -223,6 +244,11 @@ static void intel_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn, { struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); + if (start == 0 && end == -1UL) { + intel_flush_svm_all(svm); + return; + } + intel_flush_svm_range(svm, start, (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0); } From 09f7520048eaaee9709091cd2787966f807da7c5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 27 Nov 2023 17:54:40 -0700 Subject: [PATCH 0683/1397] io_uring: free io_buffer_list entries via RCU commit 5cf4f52e6d8aa2d3b7728f568abbf9d42a3af252 upstream. mmap_lock nests under uring_lock out of necessity, as we may be doing user copies with uring_lock held. However, for mmap of provided buffer rings, we attempt to grab uring_lock with mmap_lock already held from do_mmap(). This makes lockdep, rightfully, complain: WARNING: possible circular locking dependency detected 6.7.0-rc1-00009-gff3337ebaf94-dirty #4438 Not tainted ------------------------------------------------------ buf-ring.t/442 is trying to acquire lock: ffff00020e1480a8 (&ctx->uring_lock){+.+.}-{3:3}, at: io_uring_validate_mmap_request.isra.0+0x4c/0x140 but task is already holding lock: ffff0000dc226190 (&mm->mmap_lock){++++}-{3:3}, at: vm_mmap_pgoff+0x124/0x264 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&mm->mmap_lock){++++}-{3:3}: __might_fault+0x90/0xbc io_register_pbuf_ring+0x94/0x488 __arm64_sys_io_uring_register+0x8dc/0x1318 invoke_syscall+0x5c/0x17c el0_svc_common.constprop.0+0x108/0x130 do_el0_svc+0x2c/0x38 el0_svc+0x4c/0x94 el0t_64_sync_handler+0x118/0x124 el0t_64_sync+0x168/0x16c -> #0 (&ctx->uring_lock){+.+.}-{3:3}: __lock_acquire+0x19a0/0x2d14 lock_acquire+0x2e0/0x44c __mutex_lock+0x118/0x564 mutex_lock_nested+0x20/0x28 io_uring_validate_mmap_request.isra.0+0x4c/0x140 io_uring_mmu_get_unmapped_area+0x3c/0x98 get_unmapped_area+0xa4/0x158 do_mmap+0xec/0x5b4 vm_mmap_pgoff+0x158/0x264 ksys_mmap_pgoff+0x1d4/0x254 __arm64_sys_mmap+0x80/0x9c invoke_syscall+0x5c/0x17c el0_svc_common.constprop.0+0x108/0x130 do_el0_svc+0x2c/0x38 el0_svc+0x4c/0x94 el0t_64_sync_handler+0x118/0x124 el0t_64_sync+0x168/0x16c From that mmap(2) path, we really just need to ensure that the buffer list doesn't go away from underneath us. For the lower indexed entries, they never go away until the ring is freed and we can always sanely reference those as long as the caller has a file reference. For the higher indexed ones in our xarray, we just need to ensure that the buffer list remains valid while we return the address of it. Free the higher indexed io_buffer_list entries via RCU. With that we can avoid needing ->uring_lock inside mmap(2), and simply hold the RCU read lock around the buffer list lookup and address check. To ensure that the arrayed lookup either returns a valid fully formulated entry via RCU lookup, add an 'is_ready' flag that we access with store and release memory ordering. This isn't needed for the xarray lookups, but doesn't hurt either. Since this isn't a fast path, retain it across both types. Similarly, for the allocated array inside the ctx, ensure we use the proper load/acquire as setup could in theory be running in parallel with mmap. While in there, add a few lockdep checks for documentation purposes. Cc: stable@vger.kernel.org Fixes: c56e022c0a27 ("io_uring: add support for user mapped provided buffer ring") Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 4 +-- io_uring/kbuf.c | 64 ++++++++++++++++++++++++++++++++++++--------- io_uring/kbuf.h | 3 +++ 3 files changed, 56 insertions(+), 15 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 44c8512a0355..efd53316be6b 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -3452,9 +3452,9 @@ static void *io_uring_validate_mmap_request(struct file *file, unsigned int bgid; bgid = (offset & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT; - mutex_lock(&ctx->uring_lock); + rcu_read_lock(); ptr = io_pbuf_get_address(ctx, bgid); - mutex_unlock(&ctx->uring_lock); + rcu_read_unlock(); if (!ptr) return ERR_PTR(-EINVAL); break; diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index f6e5ae026e4b..cd8c688c1a97 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -31,19 +31,35 @@ struct io_provide_buf { __u16 bid; }; +static struct io_buffer_list *__io_buffer_get_list(struct io_ring_ctx *ctx, + struct io_buffer_list *bl, + unsigned int bgid) +{ + if (bl && bgid < BGID_ARRAY) + return &bl[bgid]; + + return xa_load(&ctx->io_bl_xa, bgid); +} + static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx, unsigned int bgid) { - if (ctx->io_bl && bgid < BGID_ARRAY) - return &ctx->io_bl[bgid]; + lockdep_assert_held(&ctx->uring_lock); - return xa_load(&ctx->io_bl_xa, bgid); + return __io_buffer_get_list(ctx, ctx->io_bl, bgid); } static int io_buffer_add_list(struct io_ring_ctx *ctx, struct io_buffer_list *bl, unsigned int bgid) { + /* + * Store buffer group ID and finally mark the list as visible. + * The normal lookup doesn't care about the visibility as we're + * always under the ->uring_lock, but the RCU lookup from mmap does. + */ bl->bgid = bgid; + smp_store_release(&bl->is_ready, 1); + if (bgid < BGID_ARRAY) return 0; @@ -194,18 +210,19 @@ void __user *io_buffer_select(struct io_kiocb *req, size_t *len, static __cold int io_init_bl_list(struct io_ring_ctx *ctx) { + struct io_buffer_list *bl; int i; - ctx->io_bl = kcalloc(BGID_ARRAY, sizeof(struct io_buffer_list), - GFP_KERNEL); - if (!ctx->io_bl) + bl = kcalloc(BGID_ARRAY, sizeof(struct io_buffer_list), GFP_KERNEL); + if (!bl) return -ENOMEM; for (i = 0; i < BGID_ARRAY; i++) { - INIT_LIST_HEAD(&ctx->io_bl[i].buf_list); - ctx->io_bl[i].bgid = i; + INIT_LIST_HEAD(&bl[i].buf_list); + bl[i].bgid = i; } + smp_store_release(&ctx->io_bl, bl); return 0; } @@ -270,7 +287,7 @@ void io_destroy_buffers(struct io_ring_ctx *ctx) xa_for_each(&ctx->io_bl_xa, index, bl) { xa_erase(&ctx->io_bl_xa, bl->bgid); __io_remove_buffers(ctx, bl, -1U); - kfree(bl); + kfree_rcu(bl, rcu); } while (!list_empty(&ctx->io_buffers_pages)) { @@ -455,7 +472,16 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags) INIT_LIST_HEAD(&bl->buf_list); ret = io_buffer_add_list(ctx, bl, p->bgid); if (ret) { - kfree(bl); + /* + * Doesn't need rcu free as it was never visible, but + * let's keep it consistent throughout. Also can't + * be a lower indexed array group, as adding one + * where lookup failed cannot happen. + */ + if (p->bgid >= BGID_ARRAY) + kfree_rcu(bl, rcu); + else + WARN_ON_ONCE(1); goto err; } } @@ -550,6 +576,8 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) struct io_buffer_list *bl, *free_bl = NULL; int ret; + lockdep_assert_held(&ctx->uring_lock); + if (copy_from_user(®, arg, sizeof(reg))) return -EFAULT; @@ -604,7 +632,7 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) return 0; } - kfree(free_bl); + kfree_rcu(free_bl, rcu); return ret; } @@ -613,6 +641,8 @@ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) struct io_uring_buf_reg reg; struct io_buffer_list *bl; + lockdep_assert_held(&ctx->uring_lock); + if (copy_from_user(®, arg, sizeof(reg))) return -EFAULT; if (reg.resv[0] || reg.resv[1] || reg.resv[2]) @@ -629,7 +659,7 @@ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) __io_remove_buffers(ctx, bl, -1U); if (bl->bgid >= BGID_ARRAY) { xa_erase(&ctx->io_bl_xa, bl->bgid); - kfree(bl); + kfree_rcu(bl, rcu); } return 0; } @@ -638,7 +668,15 @@ void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid) { struct io_buffer_list *bl; - bl = io_buffer_get_list(ctx, bgid); + bl = __io_buffer_get_list(ctx, smp_load_acquire(&ctx->io_bl), bgid); + + /* + * Ensure the list is fully setup. Only strictly needed for RCU lookup + * via mmap, and in that case only for the array indexed groups. For + * the xarray lookups, it's either visible and ready, or not at all. + */ + if (!smp_load_acquire(&bl->is_ready)) + return NULL; if (!bl || !bl->is_mmap) return NULL; diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h index d14345ef61fc..274f8b7fb8ab 100644 --- a/io_uring/kbuf.h +++ b/io_uring/kbuf.h @@ -15,6 +15,7 @@ struct io_buffer_list { struct page **buf_pages; struct io_uring_buf_ring *buf_ring; }; + struct rcu_head rcu; }; __u16 bgid; @@ -28,6 +29,8 @@ struct io_buffer_list { __u8 is_mapped; /* ring mapped provided buffers, but mmap'ed by application */ __u8 is_mmap; + /* bl is visible from an RCU point of view for lookup */ + __u8 is_ready; }; struct io_buffer { From 59ae50ebf09e55672d7fae31ee7ed414916116ec Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 11 Aug 2023 13:15:20 +1000 Subject: [PATCH 0684/1397] nouveau: find the smallest page allocation to cover a buffer alloc. commit e9ba37d9f9a6872b069dd893bd86a7d77ba8c153 upstream. With the new uapi we don't have the comp flags on the allocation, so we shouldn't be using the first size that works, we should be iterating until we get the correct one. This reduces allocations from 2MB to 64k in lots of places. Fixes dEQP-VK.memory.allocation.basic.size_8KiB.forward.count_4000 on my ampere/gsp system. Cc: stable@vger.kernel.org # v6.6 Signed-off-by: Dave Airlie Reviewed-by: Faith Ekstrand Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20230811031520.248341-1-airlied@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nouveau_bo.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 0f3bd187ede6..280d1d9a559b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -318,8 +318,9 @@ nouveau_bo_alloc(struct nouveau_cli *cli, u64 *size, int *align, u32 domain, (!vmm->page[i].host || vmm->page[i].shift > PAGE_SHIFT)) continue; - if (pi < 0) - pi = i; + /* pick the last one as it will be smallest. */ + pi = i; + /* Stop once the buffer is larger than the current page size. */ if (*size >= 1ULL << vmm->page[i].shift) break; From 0376fd5fce9a22e88e445b636d3df82bb02a0760 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Mon, 27 Nov 2023 09:28:19 +0000 Subject: [PATCH 0685/1397] powercap: DTPM: Fix unneeded conversions to micro-Watts commit b817f1488fca548fe50e2654d84a1956a16a1a8a upstream. The power values coming from the Energy Model are already in uW. The PowerCap and DTPM frameworks operate on uW, so all places should just use the values from the EM. Fix the code by removing all of the conversion to uW still present in it. Fixes: ae6ccaa65038 (PM: EM: convert power field to micro-Watts precision and align drivers) Cc: 5.19+ # v5.19+ Signed-off-by: Lukasz Luba [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/powercap/dtpm_cpu.c | 6 +----- drivers/powercap/dtpm_devfreq.c | 11 +++-------- 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c index 2ff7717530bf..8a2f18fa3faf 100644 --- a/drivers/powercap/dtpm_cpu.c +++ b/drivers/powercap/dtpm_cpu.c @@ -24,7 +24,6 @@ #include #include #include -#include struct dtpm_cpu { struct dtpm dtpm; @@ -104,8 +103,7 @@ static u64 get_pd_power_uw(struct dtpm *dtpm) if (pd->table[i].frequency < freq) continue; - return scale_pd_power_uw(pd_mask, pd->table[i].power * - MICROWATT_PER_MILLIWATT); + return scale_pd_power_uw(pd_mask, pd->table[i].power); } return 0; @@ -122,11 +120,9 @@ static int update_pd_power_uw(struct dtpm *dtpm) nr_cpus = cpumask_weight(&cpus); dtpm->power_min = em->table[0].power; - dtpm->power_min *= MICROWATT_PER_MILLIWATT; dtpm->power_min *= nr_cpus; dtpm->power_max = em->table[em->nr_perf_states - 1].power; - dtpm->power_max *= MICROWATT_PER_MILLIWATT; dtpm->power_max *= nr_cpus; return 0; diff --git a/drivers/powercap/dtpm_devfreq.c b/drivers/powercap/dtpm_devfreq.c index 91276761a31d..612c3b59dd5b 100644 --- a/drivers/powercap/dtpm_devfreq.c +++ b/drivers/powercap/dtpm_devfreq.c @@ -39,10 +39,8 @@ static int update_pd_power_uw(struct dtpm *dtpm) struct em_perf_domain *pd = em_pd_get(dev); dtpm->power_min = pd->table[0].power; - dtpm->power_min *= MICROWATT_PER_MILLIWATT; dtpm->power_max = pd->table[pd->nr_perf_states - 1].power; - dtpm->power_max *= MICROWATT_PER_MILLIWATT; return 0; } @@ -54,13 +52,10 @@ static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit) struct device *dev = devfreq->dev.parent; struct em_perf_domain *pd = em_pd_get(dev); unsigned long freq; - u64 power; int i; for (i = 0; i < pd->nr_perf_states; i++) { - - power = pd->table[i].power * MICROWATT_PER_MILLIWATT; - if (power > power_limit) + if (pd->table[i].power > power_limit) break; } @@ -68,7 +63,7 @@ static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit) dev_pm_qos_update_request(&dtpm_devfreq->qos_req, freq); - power_limit = pd->table[i - 1].power * MICROWATT_PER_MILLIWATT; + power_limit = pd->table[i - 1].power; return power_limit; } @@ -110,7 +105,7 @@ static u64 get_pd_power_uw(struct dtpm *dtpm) if (pd->table[i].frequency < freq) continue; - power = pd->table[i].power * MICROWATT_PER_MILLIWATT; + power = pd->table[i].power; power *= status.busy_time; power >>= 10; From 8ebebfc3121cb51a602be07c08a3de188e69ff58 Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Mon, 27 Nov 2023 16:41:21 +0530 Subject: [PATCH 0686/1397] cpufreq/amd-pstate: Fix the return value of amd_pstate_fast_switch() commit bb87be267b8ee9b40917fb5bf51be5ddb33c37c2 upstream. cpufreq_driver->fast_switch() callback expects a frequency as a return value. amd_pstate_fast_switch() was returning the return value of amd_pstate_update_freq(), which only indicates a success or failure. Fix this by making amd_pstate_fast_switch() return the target_freq when the call to amd_pstate_update_freq() is successful, and return the current frequency from policy->cur when the call to amd_pstate_update_freq() is unsuccessful. Fixes: 4badf2eb1e98 ("cpufreq: amd-pstate: Add ->fast_switch() callback") Acked-by: Huang Rui Reviewed-by: Wyes Karny Reviewed-by: Perry Yuan Cc: 6.4+ # v6.4+ Signed-off-by: Gautham R. Shenoy Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/amd-pstate.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 9a1e194d5cf8..300f81d36291 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -518,7 +518,9 @@ static int amd_pstate_target(struct cpufreq_policy *policy, static unsigned int amd_pstate_fast_switch(struct cpufreq_policy *policy, unsigned int target_freq) { - return amd_pstate_update_freq(policy, target_freq, true); + if (!amd_pstate_update_freq(policy, target_freq, true)) + return target_freq; + return policy->cur; } static void amd_pstate_adjust_perf(unsigned int cpu, From 5f1a233efee8f4c98985210e54f0746d7d6ade17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 14 Nov 2023 13:37:09 +0100 Subject: [PATCH 0687/1397] dma-buf: fix check in dma_resv_add_fence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 95ba893c9f4feb836ddce627efd0bb6af6667031 upstream. It's valid to add the same fence multiple times to a dma-resv object and we shouldn't need one extra slot for each. Signed-off-by: Christian König Reviewed-by: Thomas Hellström Fixes: a3f7c10a269d5 ("dma-buf/dma-resv: check if the new fence is really later") Cc: stable@vger.kernel.org # v5.19+ Link: https://patchwork.freedesktop.org/patch/msgid/20231115093035.1889-1-christian.koenig@amd.com Signed-off-by: Greg Kroah-Hartman --- drivers/dma-buf/dma-resv.c | 2 +- include/linux/dma-fence.h | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 38b4110378de..eb8b733065b2 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -301,7 +301,7 @@ void dma_resv_add_fence(struct dma_resv *obj, struct dma_fence *fence, dma_resv_list_entry(fobj, i, obj, &old, &old_usage); if ((old->context == fence->context && old_usage >= usage && - dma_fence_is_later(fence, old)) || + dma_fence_is_later_or_same(fence, old)) || dma_fence_is_signaled(old)) { dma_resv_list_set(fobj, i, fence, usage); dma_fence_put(old); diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index ebe78bd3d121..b3772edca2e6 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -498,6 +498,21 @@ static inline bool dma_fence_is_later(struct dma_fence *f1, return __dma_fence_is_later(f1->seqno, f2->seqno, f1->ops); } +/** + * dma_fence_is_later_or_same - return true if f1 is later or same as f2 + * @f1: the first fence from the same context + * @f2: the second fence from the same context + * + * Returns true if f1 is chronologically later than f2 or the same fence. Both + * fences must be from the same context, since a seqno is not re-used across + * contexts. + */ +static inline bool dma_fence_is_later_or_same(struct dma_fence *f1, + struct dma_fence *f2) +{ + return f1 == f2 || dma_fence_is_later(f1, f2); +} + /** * dma_fence_later - return the chronologically later fence * @f1: the first fence from the same context From b797b7f83e537875cf22b89f673baee492407589 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 27 Nov 2023 17:08:19 -0700 Subject: [PATCH 0688/1397] io_uring: don't guard IORING_OFF_PBUF_RING with SETUP_NO_MMAP commit 6f007b1406637d3d73d42e41d7e8d9b245185e69 upstream. This flag only applies to the SQ and CQ rings, it's perfectly valid to use a mmap approach for the provided ring buffers. Move the check into where it belongs. Cc: stable@vger.kernel.org Fixes: 03d89a2de25b ("io_uring: support for user allocated memory for rings/sqes") Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index efd53316be6b..5570c155128f 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -3436,16 +3436,18 @@ static void *io_uring_validate_mmap_request(struct file *file, struct page *page; void *ptr; - /* Don't allow mmap if the ring was setup without it */ - if (ctx->flags & IORING_SETUP_NO_MMAP) - return ERR_PTR(-EINVAL); - switch (offset & IORING_OFF_MMAP_MASK) { case IORING_OFF_SQ_RING: case IORING_OFF_CQ_RING: + /* Don't allow mmap if the ring was setup without it */ + if (ctx->flags & IORING_SETUP_NO_MMAP) + return ERR_PTR(-EINVAL); ptr = ctx->rings; break; case IORING_OFF_SQES: + /* Don't allow mmap if the ring was setup without it */ + if (ctx->flags & IORING_SETUP_NO_MMAP) + return ERR_PTR(-EINVAL); ptr = ctx->sq_sqes; break; case IORING_OFF_PBUF_RING: { From c1114090cebd788989edfc786a7632891b4f7efb Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 15 Nov 2023 18:25:44 +0000 Subject: [PATCH 0689/1397] iommu: Avoid more races around device probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a2e7e59a94269484a83386972ca07c22fd188854 upstream. It turns out there are more subtle races beyond just the main part of __iommu_probe_device() itself running in parallel - the dev_iommu_free() on the way out of an unsuccessful probe can still manage to trip up concurrent accesses to a device's fwspec. Thus, extend the scope of iommu_probe_device_lock() to also serialise fwspec creation and initial retrieval. Reported-by: Zhenhua Huang Link: https://lore.kernel.org/linux-iommu/e2e20e1c-6450-4ac5-9804-b0000acdf7de@quicinc.com/ Fixes: 01657bc14a39 ("iommu: Avoid races around device probe") Signed-off-by: Robin Murphy Acked-by: Greg Kroah-Hartman Reviewed-by: André Draszik Tested-by: André Draszik Link: https://lore.kernel.org/r/16f433658661d7cadfea51e7c65da95826112a2b.1700071477.git.robin.murphy@arm.com Cc: stable@vger.kernel.org Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/scan.c | 7 ++++++- drivers/iommu/iommu.c | 20 ++++++++++---------- drivers/iommu/of_iommu.c | 12 +++++++++--- include/linux/iommu.h | 1 + 4 files changed, 26 insertions(+), 14 deletions(-) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 691d4b7686ee..1d249d0f61ae 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1568,17 +1568,22 @@ static const struct iommu_ops *acpi_iommu_configure_id(struct device *dev, int err; const struct iommu_ops *ops; + /* Serialise to make dev->iommu stable under our potential fwspec */ + mutex_lock(&iommu_probe_device_lock); /* * If we already translated the fwspec there is nothing left to do, * return the iommu_ops. */ ops = acpi_iommu_fwspec_ops(dev); - if (ops) + if (ops) { + mutex_unlock(&iommu_probe_device_lock); return ops; + } err = iort_iommu_configure_id(dev, id_in); if (err && err != -EPROBE_DEFER) err = viot_iommu_configure(dev); + mutex_unlock(&iommu_probe_device_lock); /* * If we have reason to believe the IOMMU driver missed the initial diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index c146378c7d03..3a67e636287a 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -479,11 +479,12 @@ static void iommu_deinit_device(struct device *dev) dev_iommu_free(dev); } +DEFINE_MUTEX(iommu_probe_device_lock); + static int __iommu_probe_device(struct device *dev, struct list_head *group_list) { const struct iommu_ops *ops = dev->bus->iommu_ops; struct iommu_group *group; - static DEFINE_MUTEX(iommu_probe_device_lock); struct group_device *gdev; int ret; @@ -496,17 +497,15 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list * probably be able to use device_lock() here to minimise the scope, * but for now enforcing a simple global ordering is fine. */ - mutex_lock(&iommu_probe_device_lock); + lockdep_assert_held(&iommu_probe_device_lock); /* Device is probed already if in a group */ - if (dev->iommu_group) { - ret = 0; - goto out_unlock; - } + if (dev->iommu_group) + return 0; ret = iommu_init_device(dev, ops); if (ret) - goto out_unlock; + return ret; group = dev->iommu_group; gdev = iommu_group_alloc_device(group, dev); @@ -542,7 +541,6 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list list_add_tail(&group->entry, group_list); } mutex_unlock(&group->mutex); - mutex_unlock(&iommu_probe_device_lock); if (dev_is_pci(dev)) iommu_dma_set_pci_32bit_workaround(dev); @@ -556,8 +554,6 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list iommu_deinit_device(dev); mutex_unlock(&group->mutex); iommu_group_put(group); -out_unlock: - mutex_unlock(&iommu_probe_device_lock); return ret; } @@ -567,7 +563,9 @@ int iommu_probe_device(struct device *dev) const struct iommu_ops *ops; int ret; + mutex_lock(&iommu_probe_device_lock); ret = __iommu_probe_device(dev, NULL); + mutex_unlock(&iommu_probe_device_lock); if (ret) return ret; @@ -1783,7 +1781,9 @@ static int probe_iommu_group(struct device *dev, void *data) struct list_head *group_list = data; int ret; + mutex_lock(&iommu_probe_device_lock); ret = __iommu_probe_device(dev, group_list); + mutex_unlock(&iommu_probe_device_lock); if (ret == -ENODEV) ret = 0; diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index 157b286e36bf..c25b4ae6aeee 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -112,16 +112,20 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, const u32 *id) { const struct iommu_ops *ops = NULL; - struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + struct iommu_fwspec *fwspec; int err = NO_IOMMU; if (!master_np) return NULL; + /* Serialise to make dev->iommu stable under our potential fwspec */ + mutex_lock(&iommu_probe_device_lock); + fwspec = dev_iommu_fwspec_get(dev); if (fwspec) { - if (fwspec->ops) + if (fwspec->ops) { + mutex_unlock(&iommu_probe_device_lock); return fwspec->ops; - + } /* In the deferred case, start again from scratch */ iommu_fwspec_free(dev); } @@ -155,6 +159,8 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, fwspec = dev_iommu_fwspec_get(dev); ops = fwspec->ops; } + mutex_unlock(&iommu_probe_device_lock); + /* * If we have reason to believe the IOMMU driver missed the initial * probe for dev, replay it to get things in order. diff --git a/include/linux/iommu.h b/include/linux/iommu.h index c50a769d569a..0225cf7445de 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -703,6 +703,7 @@ static inline void dev_iommu_priv_set(struct device *dev, void *priv) dev->iommu->priv = priv; } +extern struct mutex iommu_probe_device_lock; int iommu_probe_device(struct device *dev); int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f); From 0daf3b0fe4a731f17635d703e0befdbfe9d494de Mon Sep 17 00:00:00 2001 From: Markus Weippert Date: Fri, 24 Nov 2023 16:14:37 +0100 Subject: [PATCH 0690/1397] bcache: revert replacing IS_ERR_OR_NULL with IS_ERR commit bb6cc253861bd5a7cf8439e2118659696df9619f upstream. Commit 028ddcac477b ("bcache: Remove unnecessary NULL point check in node allocations") replaced IS_ERR_OR_NULL by IS_ERR. This leads to a NULL pointer dereference. BUG: kernel NULL pointer dereference, address: 0000000000000080 Call Trace: ? __die_body.cold+0x1a/0x1f ? page_fault_oops+0xd2/0x2b0 ? exc_page_fault+0x70/0x170 ? asm_exc_page_fault+0x22/0x30 ? btree_node_free+0xf/0x160 [bcache] ? up_write+0x32/0x60 btree_gc_coalesce+0x2aa/0x890 [bcache] ? bch_extent_bad+0x70/0x170 [bcache] btree_gc_recurse+0x130/0x390 [bcache] ? btree_gc_mark_node+0x72/0x230 [bcache] bch_btree_gc+0x5da/0x600 [bcache] ? cpuusage_read+0x10/0x10 ? bch_btree_gc+0x600/0x600 [bcache] bch_gc_thread+0x135/0x180 [bcache] The relevant code starts with: new_nodes[0] = NULL; for (i = 0; i < nodes; i++) { if (__bch_keylist_realloc(&keylist, bkey_u64s(&r[i].b->key))) goto out_nocoalesce; // ... out_nocoalesce: // ... for (i = 0; i < nodes; i++) if (!IS_ERR(new_nodes[i])) { // IS_ERR_OR_NULL before 028ddcac477b btree_node_free(new_nodes[i]); // new_nodes[0] is NULL rw_unlock(true, new_nodes[i]); } This patch replaces IS_ERR() by IS_ERR_OR_NULL() to fix this. Fixes: 028ddcac477b ("bcache: Remove unnecessary NULL point check in node allocations") Link: https://lore.kernel.org/all/3DF4A87A-2AC1-4893-AE5F-E921478419A9@suse.de/ Cc: stable@vger.kernel.org Cc: Zheng Wang Cc: Coly Li Signed-off-by: Markus Weippert Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/btree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 418f85f1114f..3084c57248f6 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1510,7 +1510,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, bch_keylist_free(&keylist); for (i = 0; i < nodes; i++) - if (!IS_ERR(new_nodes[i])) { + if (!IS_ERR_OR_NULL(new_nodes[i])) { btree_node_free(new_nodes[i]); rw_unlock(true, new_nodes[i]); } From c5b09a65b4bac4e1c31ce94d3cdf5ea547ea1ff5 Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Wed, 22 Nov 2023 14:32:15 +0530 Subject: [PATCH 0691/1397] ext2: Fix ki_pos update for DIO buffered-io fallback case commit 8abc712ea4867a81c860853048f24e511bbc20f2 upstream. Commit "filemap: update ki_pos in generic_perform_write", made updating of ki_pos into common code in generic_perform_write() function. This also causes generic/091 to fail. This happened due to an in-flight collision with: fb5de4358e1a ("ext2: Move direct-io to use iomap"). I have chosen fixes tag based on which commit got landed later to upstream kernel. Fixes: 182c25e9c157 ("filemap: update ki_pos in generic_perform_write") Cc: stable@vger.kernel.org Reviewed-by: Christoph Hellwig Signed-off-by: Ritesh Harjani (IBM) Signed-off-by: Jan Kara Message-Id: Signed-off-by: Greg Kroah-Hartman --- fs/ext2/file.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 1039e5bf90af..4ddc36f4dbd4 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -258,7 +258,6 @@ static ssize_t ext2_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) goto out_unlock; } - iocb->ki_pos += status; ret += status; endbyte = pos + status - 1; ret2 = filemap_write_and_wait_range(inode->i_mapping, pos, From 38b3542305d05aa70bc2b5e2dfc3b66ea0349bbe Mon Sep 17 00:00:00 2001 From: "Abdul Halim, Mohd Syazwan" Date: Wed, 22 Nov 2023 11:26:06 +0800 Subject: [PATCH 0692/1397] iommu/vt-d: Add MTL to quirk list to skip TE disabling commit 85b80fdffa867d75dfb9084a839e7949e29064e8 upstream. The VT-d spec requires (10.4.4 Global Command Register, TE field) that: Hardware implementations supporting DMA draining must drain any in-flight DMA read/write requests queued within the Root-Complex before switching address translation on or off and reflecting the status of the command through the TES field in the Global Status register. Unfortunately, some integrated graphic devices fail to do so after some kind of power state transition. As the result, the system might stuck in iommu_disable_translation(), waiting for the completion of TE transition. Add MTL to the quirk list for those devices and skips TE disabling if the qurik hits. Fixes: b1012ca8dc4f ("iommu/vt-d: Skip TE disabling on quirky gfx dedicated iommu") Cc: stable@vger.kernel.org Signed-off-by: Abdul Halim, Mohd Syazwan Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20231116022324.30120-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/intel/iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 3685ba90ec88..8e878c3beec5 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4928,7 +4928,7 @@ static void quirk_igfx_skip_te_disable(struct pci_dev *dev) ver = (dev->device >> 8) & 0xff; if (ver != 0x45 && ver != 0x46 && ver != 0x4c && ver != 0x4e && ver != 0x8a && ver != 0x98 && - ver != 0x9a && ver != 0xa7) + ver != 0x9a && ver != 0xa7 && ver != 0x7d) return; if (risky_device(dev)) From 10098448a51c1c1f27df9b9b2b4a2860855c3d53 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 22 Nov 2023 12:58:11 +1000 Subject: [PATCH 0693/1397] KVM: PPC: Book3S HV: Fix KVM_RUN clobbering FP/VEC user registers commit dc158d23b33df9033bcc8e7117e8591dd2f9d125 upstream. Before running a guest, the host process (e.g., QEMU) FP/VEC registers are saved if they were being used, similarly to when the kernel uses FP registers. The guest values are then loaded into regs, and the host process registers will be restored lazily when it uses FP/VEC. KVM HV has a bug here: the host process registers do get saved, but the user MSR bits remain enabled, which indicates the registers are valid for the process. After they are clobbered by running the guest, this valid indication causes the host process to take on the FP/VEC register values of the guest. Fixes: 34e119c96b2b ("KVM: PPC: Book3S HV P9: Reduce mtmsrd instructions required to save host SPRs") Cc: stable@vger.kernel.org # v5.17+ Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://msgid.link/20231122025811.2973-1-npiggin@gmail.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/process.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 392404688cec..9452a54d356c 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1198,11 +1198,11 @@ void kvmppc_save_user_regs(void) usermsr = current->thread.regs->msr; + /* Caller has enabled FP/VEC/VSX/TM in MSR */ if (usermsr & MSR_FP) - save_fpu(current); - + __giveup_fpu(current); if (usermsr & MSR_VEC) - save_altivec(current); + __giveup_altivec(current); #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (usermsr & MSR_TM) { From 76aeadbd535d88d9450be7e5b2968520dc4e83d6 Mon Sep 17 00:00:00 2001 From: Timothy Pearson Date: Sun, 19 Nov 2023 09:18:02 -0600 Subject: [PATCH 0694/1397] powerpc: Don't clobber f0/vs0 during fp|altivec register save commit 5e1d824f9a283cbf90f25241b66d1f69adb3835b upstream. During floating point and vector save to thread data f0/vs0 are clobbered by the FPSCR/VSCR store routine. This has been obvserved to lead to userspace register corruption and application data corruption with io-uring. Fix it by restoring f0/vs0 after FPSCR/VSCR store has completed for all the FP, altivec, VMX register save paths. Tested under QEMU in kvm mode, running on a Talos II workstation with dual POWER9 DD2.2 CPUs. Additional detail (mpe): Typically save_fpu() is called from __giveup_fpu() which saves the FP regs and also *turns off FP* in the tasks MSR, meaning the kernel will reload the FP regs from the thread struct before letting the task use FP again. So in that case save_fpu() is free to clobber f0 because the FP regs no longer hold live values for the task. There is another case though, which is the path via: sys_clone() ... copy_process() dup_task_struct() arch_dup_task_struct() flush_all_to_thread() save_all() That path saves the FP regs but leaves them live. That's meant as an optimisation for a process that's using FP/VSX and then calls fork(), leaving the regs live means the parent process doesn't have to take a fault after the fork to get its FP regs back. The optimisation was added in commit 8792468da5e1 ("powerpc: Add the ability to save FPU without giving it up"). That path does clobber f0, but f0 is volatile across function calls, and typically programs reach copy_process() from userspace via a syscall wrapper function. So in normal usage f0 being clobbered across a syscall doesn't cause visible data corruption. But there is now a new path, because io-uring can call copy_process() via create_io_thread() from the signal handling path. That's OK if the signal is handled as part of syscall return, but it's not OK if the signal is handled due to some other interrupt. That path is: interrupt_return_srr_user() interrupt_exit_user_prepare() interrupt_exit_user_prepare_main() do_notify_resume() get_signal() task_work_run() create_worker_cb() create_io_worker() copy_process() dup_task_struct() arch_dup_task_struct() flush_all_to_thread() save_all() if (tsk->thread.regs->msr & MSR_FP) save_fpu() # f0 is clobbered and potentially live in userspace Note the above discussion applies equally to save_altivec(). Fixes: 8792468da5e1 ("powerpc: Add the ability to save FPU without giving it up") Cc: stable@vger.kernel.org # v4.6+ Closes: https://lore.kernel.org/all/480932026.45576726.1699374859845.JavaMail.zimbra@raptorengineeringinc.com/ Closes: https://lore.kernel.org/linuxppc-dev/480221078.47953493.1700206777956.JavaMail.zimbra@raptorengineeringinc.com/ Tested-by: Timothy Pearson Tested-by: Jens Axboe Signed-off-by: Timothy Pearson [mpe: Reword change log to describe exact path of corruption & other minor tweaks] Signed-off-by: Michael Ellerman Link: https://msgid.link/1921539696.48534988.1700407082933.JavaMail.zimbra@raptorengineeringinc.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/fpu.S | 13 +++++++++++++ arch/powerpc/kernel/vector.S | 2 ++ 2 files changed, 15 insertions(+) diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index 6a9acfb690c9..2f8f3f93cbb6 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -23,6 +23,15 @@ #include #ifdef CONFIG_VSX +#define __REST_1FPVSR(n,c,base) \ +BEGIN_FTR_SECTION \ + b 2f; \ +END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ + REST_FPR(n,base); \ + b 3f; \ +2: REST_VSR(n,c,base); \ +3: + #define __REST_32FPVSRS(n,c,base) \ BEGIN_FTR_SECTION \ b 2f; \ @@ -41,9 +50,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ 2: SAVE_32VSRS(n,c,base); \ 3: #else +#define __REST_1FPVSR(n,b,base) REST_FPR(n, base) #define __REST_32FPVSRS(n,b,base) REST_32FPRS(n, base) #define __SAVE_32FPVSRS(n,b,base) SAVE_32FPRS(n, base) #endif +#define REST_1FPVSR(n,c,base) __REST_1FPVSR(n,__REG_##c,__REG_##base) #define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base) #define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base) @@ -67,6 +78,7 @@ _GLOBAL(store_fp_state) SAVE_32FPVSRS(0, R4, R3) mffs fr0 stfd fr0,FPSTATE_FPSCR(r3) + REST_1FPVSR(0, R4, R3) blr EXPORT_SYMBOL(store_fp_state) @@ -138,4 +150,5 @@ _GLOBAL(save_fpu) 2: SAVE_32FPVSRS(0, R4, R6) mffs fr0 stfd fr0,FPSTATE_FPSCR(r6) + REST_1FPVSR(0, R4, R6) blr diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 4094e4c4c77a..80b3f6e476b6 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -33,6 +33,7 @@ _GLOBAL(store_vr_state) mfvscr v0 li r4, VRSTATE_VSCR stvx v0, r4, r3 + lvx v0, 0, r3 blr EXPORT_SYMBOL(store_vr_state) @@ -109,6 +110,7 @@ _GLOBAL(save_altivec) mfvscr v0 li r4,VRSTATE_VSCR stvx v0,r4,r7 + lvx v0,0,r7 blr #ifdef CONFIG_VSX From bf2040129110724ca59cb552f02a78b1ac12c60b Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 20 Nov 2023 15:37:50 +0100 Subject: [PATCH 0695/1397] parisc: Mark ex_table entries 32-bit aligned in assembly.h commit e11d4cccd094a7cd4696c8c42e672c76c092dad5 upstream. Add an align statement to tell the linker that all ex_table entries and as such the whole ex_table section should be 32-bit aligned in vmlinux and modules. Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v6.0+ Signed-off-by: Greg Kroah-Hartman --- arch/parisc/include/asm/assembly.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h index 75677b526b2b..74d17d7e759d 100644 --- a/arch/parisc/include/asm/assembly.h +++ b/arch/parisc/include/asm/assembly.h @@ -574,6 +574,7 @@ */ #define ASM_EXCEPTIONTABLE_ENTRY(fault_addr, except_addr) \ .section __ex_table,"aw" ! \ + .align 4 ! \ .word (fault_addr - .), (except_addr - .) ! \ .previous From b8ee167df9e7d3f510908bfaf85333c1f8b34102 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 20 Nov 2023 15:39:03 +0100 Subject: [PATCH 0696/1397] parisc: Mark ex_table entries 32-bit aligned in uaccess.h commit a80aeb86542a50aa8521729ea4cc731ee7174f03 upstream. Add an align statement to tell the linker that all ex_table entries and as such the whole ex_table section should be 32-bit aligned in vmlinux and modules. Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v6.0+ Signed-off-by: Greg Kroah-Hartman --- arch/parisc/include/asm/uaccess.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index 2bf660eabe42..4165079898d9 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -41,6 +41,7 @@ struct exception_table_entry { #define ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr )\ ".section __ex_table,\"aw\"\n" \ + ".align 4\n" \ ".word (" #fault_addr " - .), (" #except_addr " - .)\n\t" \ ".previous\n" From d68cafc2357d8c229150246984b521b901e17384 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 20 Nov 2023 23:30:49 +0100 Subject: [PATCH 0697/1397] parisc: Use natural CPU alignment for bug_table commit fe76a1349f235969381832c83d703bc911021eb6 upstream. Make sure that the __bug_table section gets 32- or 64-bit aligned, depending if a 32- or 64-bit kernel is being built. Mark it non-writeable and use .blockz instead of the .org assembler directive to pad the struct. Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v6.0+ Signed-off-by: Greg Kroah-Hartman --- arch/parisc/include/asm/bug.h | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/arch/parisc/include/asm/bug.h b/arch/parisc/include/asm/bug.h index 4b6d60b94124..b9cad0bb4461 100644 --- a/arch/parisc/include/asm/bug.h +++ b/arch/parisc/include/asm/bug.h @@ -28,13 +28,15 @@ do { \ asm volatile("\n" \ "1:\t" PARISC_BUG_BREAK_ASM "\n" \ - "\t.pushsection __bug_table,\"aw\"\n" \ + "\t.pushsection __bug_table,\"a\"\n" \ + "\t.align %4\n" \ "2:\t" ASM_WORD_INSN "1b, %c0\n" \ - "\t.short %c1, %c2\n" \ - "\t.org 2b+%c3\n" \ + "\t.short %1, %2\n" \ + "\t.blockz %3-2*%4-2*2\n" \ "\t.popsection" \ : : "i" (__FILE__), "i" (__LINE__), \ - "i" (0), "i" (sizeof(struct bug_entry)) ); \ + "i" (0), "i" (sizeof(struct bug_entry)), \ + "i" (sizeof(long)) ); \ unreachable(); \ } while(0) @@ -51,27 +53,31 @@ do { \ asm volatile("\n" \ "1:\t" PARISC_BUG_BREAK_ASM "\n" \ - "\t.pushsection __bug_table,\"aw\"\n" \ + "\t.pushsection __bug_table,\"a\"\n" \ + "\t.align %4\n" \ "2:\t" ASM_WORD_INSN "1b, %c0\n" \ - "\t.short %c1, %c2\n" \ - "\t.org 2b+%c3\n" \ + "\t.short %1, %2\n" \ + "\t.blockz %3-2*%4-2*2\n" \ "\t.popsection" \ : : "i" (__FILE__), "i" (__LINE__), \ "i" (BUGFLAG_WARNING|(flags)), \ - "i" (sizeof(struct bug_entry)) ); \ + "i" (sizeof(struct bug_entry)), \ + "i" (sizeof(long)) ); \ } while(0) #else #define __WARN_FLAGS(flags) \ do { \ asm volatile("\n" \ "1:\t" PARISC_BUG_BREAK_ASM "\n" \ - "\t.pushsection __bug_table,\"aw\"\n" \ + "\t.pushsection __bug_table,\"a\"\n" \ + "\t.align %2\n" \ "2:\t" ASM_WORD_INSN "1b\n" \ - "\t.short %c0\n" \ - "\t.org 2b+%c1\n" \ + "\t.short %0\n" \ + "\t.blockz %1-%2-2\n" \ "\t.popsection" \ : : "i" (BUGFLAG_WARNING|(flags)), \ - "i" (sizeof(struct bug_entry)) ); \ + "i" (sizeof(struct bug_entry)), \ + "i" (sizeof(long)) ); \ } while(0) #endif From fb997f16c20bd310d9fa9c6c71ea7205bb394be2 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 25 Nov 2023 09:11:56 +0100 Subject: [PATCH 0698/1397] parisc: Mark lock_aligned variables 16-byte aligned on SMP commit b28fc0d8739c03e7b6c44914a9d00d4c6dddc0ea upstream. On parisc we need 16-byte alignment for variables which are used for locking. Mark the __lock_aligned attribute acordingly so that the .data..lock_aligned section will get that alignment in the generated object files. Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v6.0+ Signed-off-by: Greg Kroah-Hartman --- arch/parisc/include/asm/ldcw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/include/asm/ldcw.h b/arch/parisc/include/asm/ldcw.h index ee9e071859b2..47ebc4c91eaf 100644 --- a/arch/parisc/include/asm/ldcw.h +++ b/arch/parisc/include/asm/ldcw.h @@ -55,7 +55,7 @@ }) #ifdef CONFIG_SMP -# define __lock_aligned __section(".data..lock_aligned") +# define __lock_aligned __section(".data..lock_aligned") __aligned(16) #endif #endif /* __PARISC_LDCW_H */ From e0837630f3c0eaa628cb50b2e834b68bcd861e5a Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 23 Nov 2023 20:28:27 +0100 Subject: [PATCH 0699/1397] parisc: Drop the HP-UX ENOSYM and EREMOTERELEASE error codes commit e5f3e299a2b1e9c3ece24a38adfc089aef307e8a upstream. Those return codes are only defined for the parisc architecture and are leftovers from when we wanted to be HP-UX compatible. They are not returned by any Linux kernel syscall but do trigger problems with the glibc strerrorname_np() and strerror() functions as reported in glibc issue #31080. There is no need to keep them, so simply remove them. Signed-off-by: Helge Deller Reported-by: Bruno Haible Closes: https://sourceware.org/bugzilla/show_bug.cgi?id=31080 Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/parisc/include/uapi/asm/errno.h | 2 -- lib/errname.c | 6 ------ tools/arch/parisc/include/uapi/asm/errno.h | 2 -- 3 files changed, 10 deletions(-) diff --git a/arch/parisc/include/uapi/asm/errno.h b/arch/parisc/include/uapi/asm/errno.h index 87245c584784..8d94739d75c6 100644 --- a/arch/parisc/include/uapi/asm/errno.h +++ b/arch/parisc/include/uapi/asm/errno.h @@ -75,7 +75,6 @@ /* We now return you to your regularly scheduled HPUX. */ -#define ENOSYM 215 /* symbol does not exist in executable */ #define ENOTSOCK 216 /* Socket operation on non-socket */ #define EDESTADDRREQ 217 /* Destination address required */ #define EMSGSIZE 218 /* Message too long */ @@ -101,7 +100,6 @@ #define ETIMEDOUT 238 /* Connection timed out */ #define ECONNREFUSED 239 /* Connection refused */ #define EREFUSED ECONNREFUSED /* for HP's NFS apparently */ -#define EREMOTERELEASE 240 /* Remote peer released connection */ #define EHOSTDOWN 241 /* Host is down */ #define EHOSTUNREACH 242 /* No route to host */ diff --git a/lib/errname.c b/lib/errname.c index 67739b174a8c..0c336b0f12f6 100644 --- a/lib/errname.c +++ b/lib/errname.c @@ -111,9 +111,6 @@ static const char *names_0[] = { E(ENOSPC), E(ENOSR), E(ENOSTR), -#ifdef ENOSYM - E(ENOSYM), -#endif E(ENOSYS), E(ENOTBLK), E(ENOTCONN), @@ -144,9 +141,6 @@ static const char *names_0[] = { #endif E(EREMOTE), E(EREMOTEIO), -#ifdef EREMOTERELEASE - E(EREMOTERELEASE), -#endif E(ERESTART), E(ERFKILL), E(EROFS), diff --git a/tools/arch/parisc/include/uapi/asm/errno.h b/tools/arch/parisc/include/uapi/asm/errno.h index 87245c584784..8d94739d75c6 100644 --- a/tools/arch/parisc/include/uapi/asm/errno.h +++ b/tools/arch/parisc/include/uapi/asm/errno.h @@ -75,7 +75,6 @@ /* We now return you to your regularly scheduled HPUX. */ -#define ENOSYM 215 /* symbol does not exist in executable */ #define ENOTSOCK 216 /* Socket operation on non-socket */ #define EDESTADDRREQ 217 /* Destination address required */ #define EMSGSIZE 218 /* Message too long */ @@ -101,7 +100,6 @@ #define ETIMEDOUT 238 /* Connection timed out */ #define ECONNREFUSED 239 /* Connection refused */ #define EREFUSED ECONNREFUSED /* for HP's NFS apparently */ -#define EREMOTERELEASE 240 /* Remote peer released connection */ #define EHOSTDOWN 241 /* Host is down */ #define EHOSTUNREACH 242 /* No route to host */ From 405870b8a861810a5686828dfa05f9f8c6423847 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 20 Nov 2023 23:14:39 +0100 Subject: [PATCH 0700/1397] parisc: Mark jump_table naturally aligned commit 07eecff8ae78df7f28800484d31337e1f9bfca3a upstream. The jump_table stores two 32-bit words and one 32- (on 32-bit kernel) or one 64-bit word (on 64-bit kernel). Ensure that the last word is always 64-bit aligned on a 64-bit kernel by aligning the whole structure on sizeof(long). Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v6.0+ Signed-off-by: Greg Kroah-Hartman --- arch/parisc/include/asm/jump_label.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/parisc/include/asm/jump_label.h b/arch/parisc/include/asm/jump_label.h index af2a598bc0f8..94428798b6aa 100644 --- a/arch/parisc/include/asm/jump_label.h +++ b/arch/parisc/include/asm/jump_label.h @@ -15,10 +15,12 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran asm_volatile_goto("1:\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" + ".align %1\n\t" ".word 1b - ., %l[l_yes] - .\n\t" __stringify(ASM_ULONG_INSN) " %c0 - .\n\t" ".popsection\n\t" - : : "i" (&((char *)key)[branch]) : : l_yes); + : : "i" (&((char *)key)[branch]), "i" (sizeof(long)) + : : l_yes); return false; l_yes: @@ -30,10 +32,12 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, bool asm_volatile_goto("1:\n\t" "b,n %l[l_yes]\n\t" ".pushsection __jump_table, \"aw\"\n\t" + ".align %1\n\t" ".word 1b - ., %l[l_yes] - .\n\t" __stringify(ASM_ULONG_INSN) " %c0 - .\n\t" ".popsection\n\t" - : : "i" (&((char *)key)[branch]) : : l_yes); + : : "i" (&((char *)key)[branch]), "i" (sizeof(long)) + : : l_yes); return false; l_yes: From 3f01d7bb6ee262b89b614515f706d1968e184767 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 25 Nov 2023 09:16:02 +0100 Subject: [PATCH 0701/1397] parisc: Ensure 32-bit alignment on parisc unwind section commit c9fcb2b65c2849e8ff3be23fd8828312fb68dc19 upstream. Make sure the .PARISC.unwind section will be 32-bit aligned. Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v6.0+ Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/vmlinux.lds.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index 58694d1989c2..548051b0b4af 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -130,6 +130,7 @@ SECTIONS RO_DATA(8) /* unwind info */ + . = ALIGN(4); .PARISC.unwind : { __start___unwind = .; *(.PARISC.unwind) From 3b6ff6bb7e877ad4c424a62872132ac08c35fafb Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 20 Nov 2023 23:10:20 +0100 Subject: [PATCH 0702/1397] parisc: Mark altinstructions read-only and 32-bit aligned commit 33f806da2df68606f77d7b892cd1298ba3d463e8 upstream. Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v6.0+ Signed-off-by: Greg Kroah-Hartman --- arch/parisc/include/asm/alternative.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/parisc/include/asm/alternative.h b/arch/parisc/include/asm/alternative.h index 1ed45fd085d3..1eb488f25b83 100644 --- a/arch/parisc/include/asm/alternative.h +++ b/arch/parisc/include/asm/alternative.h @@ -34,7 +34,8 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end, /* Alternative SMP implementation. */ #define ALTERNATIVE(cond, replacement) "!0:" \ - ".section .altinstructions, \"aw\" !" \ + ".section .altinstructions, \"a\" !" \ + ".align 4 !" \ ".word (0b-4-.) !" \ ".hword 1, " __stringify(cond) " !" \ ".word " __stringify(replacement) " !" \ @@ -44,7 +45,8 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end, /* to replace one single instructions by a new instruction */ #define ALTERNATIVE(from, to, cond, replacement)\ - .section .altinstructions, "aw" ! \ + .section .altinstructions, "a" ! \ + .align 4 ! \ .word (from - .) ! \ .hword (to - from)/4, cond ! \ .word replacement ! \ @@ -52,7 +54,8 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end, /* to replace multiple instructions by new code */ #define ALTERNATIVE_CODE(from, num_instructions, cond, new_instr_ptr)\ - .section .altinstructions, "aw" ! \ + .section .altinstructions, "a" ! \ + .align 4 ! \ .word (from - .) ! \ .hword -num_instructions, cond ! \ .word (new_instr_ptr - .) ! \ From 9f7894e299140bb0e52c099ebb0813f4b3f3cfb0 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 2 Nov 2023 07:54:50 +1030 Subject: [PATCH 0703/1397] btrfs: add dmesg output for first mount and last unmount of a filesystem commit 2db313205f8b96eea467691917138d646bb50aef upstream. There is a feature request to add dmesg output when unmounting a btrfs. There are several alternative methods to do the same thing, but with their own problems: - Use eBPF to watch btrfs_put_super()/open_ctree() Not end user friendly, they have to dip their head into the source code. - Watch for directory /sys/fs// This is way more simple, but still requires some simple device -> uuid lookups. And a script needs to use inotify to watch /sys/fs/. Compared to all these, directly outputting the information into dmesg would be the most simple one, with both device and UUID included. And since we're here, also add the output when mounting a filesystem for the first time for parity. A more fine grained monitoring of subvolume mounts should be done by another layer, like audit. Now mounting a btrfs with all default mkfs options would look like this: [81.906566] BTRFS info (device dm-8): first mount of filesystem 633b5c16-afe3-4b79-b195-138fe145e4f2 [81.907494] BTRFS info (device dm-8): using crc32c (crc32c-intel) checksum algorithm [81.908258] BTRFS info (device dm-8): using free space tree [81.912644] BTRFS info (device dm-8): auto enabling async discard [81.913277] BTRFS info (device dm-8): checking UUID tree [91.668256] BTRFS info (device dm-8): last unmount of filesystem 633b5c16-afe3-4b79-b195-138fe145e4f2 CC: stable@vger.kernel.org # 5.4+ Link: https://github.com/kdave/btrfs-progs/issues/689 Reviewed-by: Anand Jain Signed-off-by: Qu Wenruo Reviewed-by: David Sterba [ update changelog ] Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/disk-io.c | 1 + fs/btrfs/super.c | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6b78517b1fd5..71efb6883f30 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3197,6 +3197,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device goto fail_alloc; } + btrfs_info(fs_info, "first mount of filesystem %pU", disk_super->fsid); /* * Verify the type first, if that or the checksum value are * corrupted, we'll find out diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 1a093ec0f7e3..de0bfebce126 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -79,7 +79,10 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data); static void btrfs_put_super(struct super_block *sb) { - close_ctree(btrfs_sb(sb)); + struct btrfs_fs_info *fs_info = btrfs_sb(sb); + + btrfs_info(fs_info, "last unmount of filesystem %pU", fs_info->fs_devices->fsid); + close_ctree(fs_info); } enum { From 88287582f4d2dec54f934f3ab39cead5d880e6c2 Mon Sep 17 00:00:00 2001 From: Bragatheswaran Manickavel Date: Sat, 18 Nov 2023 14:40:12 +0530 Subject: [PATCH 0704/1397] btrfs: ref-verify: fix memory leaks in btrfs_ref_tree_mod() commit f91192cd68591c6b037da345bc9fcd5e50540358 upstream. In btrfs_ref_tree_mod(), when !parent 're' was allocated through kmalloc(). In the following code, if an error occurs, the execution will be redirected to 'out' or 'out_unlock' and the function will be exited. However, on some of the paths, 're' are not deallocated and may lead to memory leaks. For example: lookup_block_entry() for 'be' returns NULL, the out label will be invoked. During that flow ref and 'ra' are freed but not 're', which can potentially lead to a memory leak. CC: stable@vger.kernel.org # 5.10+ Reported-and-tested-by: syzbot+d66de4cbf532749df35f@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=d66de4cbf532749df35f Signed-off-by: Bragatheswaran Manickavel Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ref-verify.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c index 95d28497de7c..e646662e61c6 100644 --- a/fs/btrfs/ref-verify.c +++ b/fs/btrfs/ref-verify.c @@ -791,6 +791,7 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info, dump_ref_action(fs_info, ra); kfree(ref); kfree(ra); + kfree(re); goto out_unlock; } else if (be->num_refs == 0) { btrfs_err(fs_info, @@ -800,6 +801,7 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info, dump_ref_action(fs_info, ra); kfree(ref); kfree(ra); + kfree(re); goto out_unlock; } From 3952f84ed8428c4d10543ad458d68d779dcbc3c5 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 21 Nov 2023 13:38:32 +0000 Subject: [PATCH 0705/1397] btrfs: fix off-by-one when checking chunk map includes logical address commit 5fba5a571858ce2d787fdaf55814e42725bfa895 upstream. At btrfs_get_chunk_map() we get the extent map for the chunk that contains the given logical address stored in the 'logical' argument. Then we do sanity checks to verify the extent map contains the logical address. One of these checks verifies if the extent map covers a range with an end offset behind the target logical address - however this check has an off-by-one error since it will consider an extent map whose start offset plus its length matches the target logical address as inclusive, while the fact is that the last byte it covers is behind the target logical address (by 1). So fix this condition by using '<=' rather than '<' when comparing the extent map's "start + length" against the target logical address. CC: stable@vger.kernel.org # 4.14+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/volumes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c26d3499a289..3200794e88c8 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3050,7 +3050,7 @@ struct extent_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info, return ERR_PTR(-EINVAL); } - if (em->start > logical || em->start + em->len < logical) { + if (em->start > logical || em->start + em->len <= logical) { btrfs_crit(fs_info, "found a bad mapping, wanted %llu-%llu, found %llu-%llu", logical, length, em->start, em->start + em->len); From da2dbbb73e33d8326d7c0bd54810033706d4a3f2 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 24 Nov 2023 17:48:31 +0100 Subject: [PATCH 0706/1397] btrfs: send: ensure send_fd is writable commit 0ac1d13a55eb37d398b63e6ff6db4a09a2c9128c upstream. kernel_write() requires the caller to ensure that the file is writable. Let's do that directly after looking up the ->send_fd. We don't need a separate bailout path because the "out" path already does fput() if ->send_filp is non-NULL. This has no security impact for two reasons: - the ioctl requires CAP_SYS_ADMIN - __kernel_write() bails out on read-only files - but only since 5.8, see commit a01ac27be472 ("fs: check FMODE_WRITE in __kernel_write") Reported-and-tested-by: syzbot+12e098239d20385264d3@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=12e098239d20385264d3 Fixes: 31db9f7c23fb ("Btrfs: introduce BTRFS_IOC_SEND for btrfs send/receive") CC: stable@vger.kernel.org # 4.14+ Signed-off-by: Jann Horn Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/send.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 3a566150c531..db94eefda27e 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -8158,7 +8158,7 @@ long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg) } sctx->send_filp = fget(arg->send_fd); - if (!sctx->send_filp) { + if (!sctx->send_filp || !(sctx->send_filp->f_mode & FMODE_WRITE)) { ret = -EBADF; goto out; } From 47ec6065fcd8bd0139676247689be341d15fdf6e Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 21 Nov 2023 13:38:33 +0000 Subject: [PATCH 0707/1397] btrfs: make error messages more clear when getting a chunk map commit 7d410d5efe04e42a6cd959bfe6d59d559fdf8b25 upstream. When getting a chunk map, at btrfs_get_chunk_map(), we do some sanity checks to verify we found a chunk map and that map found covers the logical address the caller passed in. However the messages aren't very clear in the sense that don't mention the issue is with a chunk map and one of them prints the 'length' argument as if it were the end offset of the requested range (while the in the string format we use %llu-%llu which suggests a range, and the second %llu-%llu is actually a range for the chunk map). So improve these two details in the error messages. CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/volumes.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 3200794e88c8..722a1dde7563 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3045,15 +3045,16 @@ struct extent_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info, read_unlock(&em_tree->lock); if (!em) { - btrfs_crit(fs_info, "unable to find logical %llu length %llu", + btrfs_crit(fs_info, + "unable to find chunk map for logical %llu length %llu", logical, length); return ERR_PTR(-EINVAL); } if (em->start > logical || em->start + em->len <= logical) { btrfs_crit(fs_info, - "found a bad mapping, wanted %llu-%llu, found %llu-%llu", - logical, length, em->start, em->start + em->len); + "found a bad chunk map, wanted %llu-%llu, found %llu-%llu", + logical, logical + length, em->start, em->start + em->len); free_extent_map(em); return ERR_PTR(-EINVAL); } From 33357859faf221c0cb743cce45901f322b371a9b Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 24 Nov 2023 14:53:50 +1030 Subject: [PATCH 0708/1397] btrfs: free the allocated memory if btrfs_alloc_page_array() fails commit 94dbf7c0871f7ae6349ba4b0341ce8f5f98a071d upstream. [BUG] If btrfs_alloc_page_array() fail to allocate all pages but part of the slots, then the partially allocated pages would be leaked in function btrfs_submit_compressed_read(). [CAUSE] As explicitly stated, if btrfs_alloc_page_array() returned -ENOMEM, caller is responsible to free the partially allocated pages. For the existing call sites, most of them are fine: - btrfs_raid_bio::stripe_pages Handled by free_raid_bio(). - extent_buffer::pages[] Handled btrfs_release_extent_buffer_pages(). - scrub_stripe::pages[] Handled by release_scrub_stripe(). But there is one exception in btrfs_submit_compressed_read(), if btrfs_alloc_page_array() failed, we didn't cleanup the array and freed the array pointer directly. Initially there is still the error handling in commit dd137dd1f2d7 ("btrfs: factor out allocating an array of pages"), but later in commit 544fe4a903ce ("btrfs: embed a btrfs_bio into struct compressed_bio"), the error handling is removed, leading to the possible memory leak. [FIX] This patch would add back the error handling first, then to prevent such situation from happening again, also Make btrfs_alloc_page_array() to free the allocated pages as a extra safety net, then we don't need to add the error handling to btrfs_submit_compressed_read(). Fixes: 544fe4a903ce ("btrfs: embed a btrfs_bio into struct compressed_bio") CC: stable@vger.kernel.org # 6.4+ Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent_io.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index caccd0376342..1530df88370c 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -675,8 +675,8 @@ static void end_bio_extent_readpage(struct btrfs_bio *bbio) * the array will be skipped * * Return: 0 if all pages were able to be allocated; - * -ENOMEM otherwise, and the caller is responsible for freeing all - * non-null page pointers in the array. + * -ENOMEM otherwise, the partially allocated pages would be freed and + * the array slots zeroed */ int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array) { @@ -695,8 +695,13 @@ int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array) * though alloc_pages_bulk_array() falls back to alloc_page() * if it could not bulk-allocate. So we must be out of memory. */ - if (allocated == last) + if (allocated == last) { + for (int i = 0; i < allocated; i++) { + __free_page(page_array[i]); + page_array[i] = NULL; + } return -ENOMEM; + } memalloc_retry_wait(GFP_NOFS); } From 4a1a4bf2261e0772d780a98fabfbed718b83ba3a Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 14 Nov 2023 17:44:11 +0100 Subject: [PATCH 0709/1397] btrfs: fix 64bit compat send ioctl arguments not initializing version member commit 5de0434bc064606d6b7467ec3e5ad22963a18c04 upstream. When the send protocol versioning was added in 5.16 e77fbf990316 ("btrfs: send: prepare for v2 protocol"), the 32/64bit compat code was not updated (added by 2351f431f727 ("btrfs: fix send ioctl on 32bit with 64bit kernel")), missing the version struct member. The compat code is probably rarely used, nobody reported any bugs. Found by tool https://github.com/jirislaby/clang-struct . Fixes: e77fbf990316 ("btrfs: send: prepare for v2 protocol") CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ioctl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 4cb4065453dd..0b120716aeb9 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -4351,6 +4351,7 @@ static int _btrfs_ioctl_send(struct inode *inode, void __user *argp, bool compat arg->clone_sources = compat_ptr(args32.clone_sources); arg->parent_root = args32.parent_root; arg->flags = args32.flags; + arg->version = args32.version; memcpy(arg->reserved, args32.reserved, sizeof(args32.reserved)); #else From 4ca5f54af7d3b60613991b201223c2dc6ebdf1c8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 27 Nov 2023 20:53:52 -0700 Subject: [PATCH 0710/1397] io_uring: enable io_mem_alloc/free to be used in other parts commit edecf1689768452ba1a64b7aaf3a47a817da651a upstream. In preparation for using these helpers, make them non-static and add them to our internal header. Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 4 ++-- io_uring/io_uring.h | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 5570c155128f..dd23b9459aab 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2659,7 +2659,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0; } -static void io_mem_free(void *ptr) +void io_mem_free(void *ptr) { if (!ptr) return; @@ -2771,7 +2771,7 @@ static void io_rings_free(struct io_ring_ctx *ctx) } } -static void *io_mem_alloc(size_t size) +void *io_mem_alloc(size_t size) { gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP; void *ret; diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index 0bc145614a6e..d2bad1df347d 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -86,6 +86,9 @@ bool __io_alloc_req_refill(struct io_ring_ctx *ctx); bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task, bool cancel_all); +void *io_mem_alloc(size_t size); +void io_mem_free(void *ptr); + #if defined(CONFIG_PROVE_LOCKING) static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx) { From 7138ebbe65caf65f52b923d4ef819c77d04ea671 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 27 Nov 2023 16:47:04 -0700 Subject: [PATCH 0711/1397] io_uring/kbuf: defer release of mapped buffer rings commit c392cbecd8eca4c53f2bf508731257d9d0a21c2d upstream. If a provided buffer ring is setup with IOU_PBUF_RING_MMAP, then the kernel allocates the memory for it and the application is expected to mmap(2) this memory. However, io_uring uses remap_pfn_range() for this operation, so we cannot rely on normal munmap/release on freeing them for us. Stash an io_buf_free entry away for each of these, if any, and provide a helper to free them post ->release(). Cc: stable@vger.kernel.org Fixes: c56e022c0a27 ("io_uring: add support for user mapped provided buffer ring") Reported-by: Jann Horn Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- include/linux/io_uring_types.h | 3 +++ io_uring/io_uring.c | 2 ++ io_uring/kbuf.c | 44 ++++++++++++++++++++++++++++++---- io_uring/kbuf.h | 2 ++ 4 files changed, 46 insertions(+), 5 deletions(-) diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 13d19b9be9f4..5fd664fb71c8 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -327,6 +327,9 @@ struct io_ring_ctx { struct list_head io_buffers_cache; + /* deferred free list, protected by ->uring_lock */ + struct hlist_head io_buf_list; + /* Keep this last, we don't need it for the fast path */ struct wait_queue_head poll_wq; struct io_restriction restrictions; diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index dd23b9459aab..f09e3ee11229 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -323,6 +323,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) INIT_LIST_HEAD(&ctx->sqd_list); INIT_LIST_HEAD(&ctx->cq_overflow_list); INIT_LIST_HEAD(&ctx->io_buffers_cache); + INIT_HLIST_HEAD(&ctx->io_buf_list); io_alloc_cache_init(&ctx->rsrc_node_cache, IO_NODE_ALLOC_CACHE_MAX, sizeof(struct io_rsrc_node)); io_alloc_cache_init(&ctx->apoll_cache, IO_ALLOC_CACHE_MAX, @@ -2942,6 +2943,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) ctx->mm_account = NULL; } io_rings_free(ctx); + io_kbuf_mmap_list_free(ctx); percpu_ref_exit(&ctx->refs); free_uid(ctx->user); diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index cd8c688c1a97..8da5c10ec352 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -41,6 +41,11 @@ static struct io_buffer_list *__io_buffer_get_list(struct io_ring_ctx *ctx, return xa_load(&ctx->io_bl_xa, bgid); } +struct io_buf_free { + struct hlist_node list; + void *mem; +}; + static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx, unsigned int bgid) { @@ -238,7 +243,10 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, if (bl->is_mapped) { i = bl->buf_ring->tail - bl->head; if (bl->is_mmap) { - folio_put(virt_to_folio(bl->buf_ring)); + /* + * io_kbuf_list_free() will free the page(s) at + * ->release() time. + */ bl->buf_ring = NULL; bl->is_mmap = 0; } else if (bl->buf_nr_pages) { @@ -552,18 +560,28 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg, return -EINVAL; } -static int io_alloc_pbuf_ring(struct io_uring_buf_reg *reg, +static int io_alloc_pbuf_ring(struct io_ring_ctx *ctx, + struct io_uring_buf_reg *reg, struct io_buffer_list *bl) { - gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP; + struct io_buf_free *ibf; size_t ring_size; void *ptr; ring_size = reg->ring_entries * sizeof(struct io_uring_buf_ring); - ptr = (void *) __get_free_pages(gfp, get_order(ring_size)); + ptr = io_mem_alloc(ring_size); if (!ptr) return -ENOMEM; + /* Allocate and store deferred free entry */ + ibf = kmalloc(sizeof(*ibf), GFP_KERNEL_ACCOUNT); + if (!ibf) { + io_mem_free(ptr); + return -ENOMEM; + } + ibf->mem = ptr; + hlist_add_head(&ibf->list, &ctx->io_buf_list); + bl->buf_ring = ptr; bl->is_mapped = 1; bl->is_mmap = 1; @@ -622,7 +640,7 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) if (!(reg.flags & IOU_PBUF_RING_MMAP)) ret = io_pin_pbuf_ring(®, bl); else - ret = io_alloc_pbuf_ring(®, bl); + ret = io_alloc_pbuf_ring(ctx, ®, bl); if (!ret) { bl->nr_entries = reg.ring_entries; @@ -682,3 +700,19 @@ void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid) return bl->buf_ring; } + +/* + * Called at or after ->release(), free the mmap'ed buffers that we used + * for memory mapped provided buffer rings. + */ +void io_kbuf_mmap_list_free(struct io_ring_ctx *ctx) +{ + struct io_buf_free *ibf; + struct hlist_node *tmp; + + hlist_for_each_entry_safe(ibf, tmp, &ctx->io_buf_list, list) { + hlist_del(&ibf->list); + io_mem_free(ibf->mem); + kfree(ibf); + } +} diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h index 274f8b7fb8ab..3d0cb6b8c1ed 100644 --- a/io_uring/kbuf.h +++ b/io_uring/kbuf.h @@ -54,6 +54,8 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags); int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg); int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg); +void io_kbuf_mmap_list_free(struct io_ring_ctx *ctx); + unsigned int __io_put_kbuf(struct io_kiocb *req, unsigned issue_flags); void io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags); From 9e1152a6c2b5488b688f50f81a6af029b2537258 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 28 Nov 2023 11:17:25 -0700 Subject: [PATCH 0712/1397] io_uring/kbuf: recycle freed mapped buffer ring entries commit b10b73c102a2eab91e1cd62a03d6446f1dfecc64 upstream. Right now we stash any potentially mmap'ed provided ring buffer range for freeing at release time, regardless of when they get unregistered. Since we're keeping track of these ranges anyway, keep track of their registration state as well, and use that to recycle ranges when appropriate rather than always allocate new ones. The lookup is a basic scan of entries, checking for the best matching free entry. Fixes: c392cbecd8ec ("io_uring/kbuf: defer release of mapped buffer rings") Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/kbuf.c | 77 ++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 66 insertions(+), 11 deletions(-) diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 8da5c10ec352..012f62203604 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -44,6 +44,8 @@ static struct io_buffer_list *__io_buffer_get_list(struct io_ring_ctx *ctx, struct io_buf_free { struct hlist_node list; void *mem; + size_t size; + int inuse; }; static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx, @@ -231,6 +233,24 @@ static __cold int io_init_bl_list(struct io_ring_ctx *ctx) return 0; } +/* + * Mark the given mapped range as free for reuse + */ +static void io_kbuf_mark_free(struct io_ring_ctx *ctx, struct io_buffer_list *bl) +{ + struct io_buf_free *ibf; + + hlist_for_each_entry(ibf, &ctx->io_buf_list, list) { + if (bl->buf_ring == ibf->mem) { + ibf->inuse = 0; + return; + } + } + + /* can't happen... */ + WARN_ON_ONCE(1); +} + static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer_list *bl, unsigned nbufs) { @@ -247,6 +267,7 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, * io_kbuf_list_free() will free the page(s) at * ->release() time. */ + io_kbuf_mark_free(ctx, bl); bl->buf_ring = NULL; bl->is_mmap = 0; } else if (bl->buf_nr_pages) { @@ -560,6 +581,34 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg, return -EINVAL; } +/* + * See if we have a suitable region that we can reuse, rather than allocate + * both a new io_buf_free and mem region again. We leave it on the list as + * even a reused entry will need freeing at ring release. + */ +static struct io_buf_free *io_lookup_buf_free_entry(struct io_ring_ctx *ctx, + size_t ring_size) +{ + struct io_buf_free *ibf, *best = NULL; + size_t best_dist; + + hlist_for_each_entry(ibf, &ctx->io_buf_list, list) { + size_t dist; + + if (ibf->inuse || ibf->size < ring_size) + continue; + dist = ibf->size - ring_size; + if (!best || dist < best_dist) { + best = ibf; + if (!dist) + break; + best_dist = dist; + } + } + + return best; +} + static int io_alloc_pbuf_ring(struct io_ring_ctx *ctx, struct io_uring_buf_reg *reg, struct io_buffer_list *bl) @@ -569,20 +618,26 @@ static int io_alloc_pbuf_ring(struct io_ring_ctx *ctx, void *ptr; ring_size = reg->ring_entries * sizeof(struct io_uring_buf_ring); - ptr = io_mem_alloc(ring_size); - if (!ptr) - return -ENOMEM; - /* Allocate and store deferred free entry */ - ibf = kmalloc(sizeof(*ibf), GFP_KERNEL_ACCOUNT); + /* Reuse existing entry, if we can */ + ibf = io_lookup_buf_free_entry(ctx, ring_size); if (!ibf) { - io_mem_free(ptr); - return -ENOMEM; - } - ibf->mem = ptr; - hlist_add_head(&ibf->list, &ctx->io_buf_list); + ptr = io_mem_alloc(ring_size); + if (!ptr) + return -ENOMEM; - bl->buf_ring = ptr; + /* Allocate and store deferred free entry */ + ibf = kmalloc(sizeof(*ibf), GFP_KERNEL_ACCOUNT); + if (!ibf) { + io_mem_free(ptr); + return -ENOMEM; + } + ibf->mem = ptr; + ibf->size = ring_size; + hlist_add_head(&ibf->list, &ctx->io_buf_list); + } + ibf->inuse = 1; + bl->buf_ring = ibf->mem; bl->is_mapped = 1; bl->is_mmap = 1; return 0; From 4a7e92551618f3737b305f62451353ee05662f57 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 6 Nov 2023 23:17:16 +0100 Subject: [PATCH 0713/1397] wifi: cfg80211: fix CQM for non-range use commit 7e7efdda6adb385fbdfd6f819d76bc68c923c394 upstream. My prior race fix here broke CQM when ranges aren't used, as the reporting worker now requires the cqm_config to be set in the wdev, but isn't set when there's no range configured. Rather than continuing to special-case the range version, set the cqm_config always and configure accordingly, also tracking if range was used or not to be able to clear the configuration appropriately with the same API, which was actually not right if both were implemented by a driver for some reason, as is the case with mac80211 (though there the implementations are equivalent so it doesn't matter.) Also, the original multiple-RSSI commit lost checking for the callback, so might have potentially crashed if a driver had neither implementation, and userspace tried to use it despite not being advertised as supported. Cc: stable@vger.kernel.org Fixes: 4a4b8169501b ("cfg80211: Accept multiple RSSI thresholds for CQM") Fixes: 37c20b2effe9 ("wifi: cfg80211: fix cqm_config access race") Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/core.h | 1 + net/wireless/nl80211.c | 50 ++++++++++++++++++++++++++---------------- 2 files changed, 32 insertions(+), 19 deletions(-) diff --git a/net/wireless/core.h b/net/wireless/core.h index e536c0b615a0..f0a3a2317638 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -299,6 +299,7 @@ struct cfg80211_cqm_config { u32 rssi_hyst; s32 last_rssi_event_value; enum nl80211_cqm_rssi_threshold_event last_rssi_event_type; + bool use_range_api; int n_rssi_thresholds; s32 rssi_thresholds[] __counted_by(n_rssi_thresholds); }; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 931a03f4549c..6a82dd876f27 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -12824,10 +12824,6 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev, int i, n, low_index; int err; - /* RSSI reporting disabled? */ - if (!cqm_config) - return rdev_set_cqm_rssi_range_config(rdev, dev, 0, 0); - /* * Obtain current RSSI value if possible, if not and no RSSI threshold * event has been received yet, we should receive an event after a @@ -12902,18 +12898,6 @@ static int nl80211_set_cqm_rssi(struct genl_info *info, wdev->iftype != NL80211_IFTYPE_P2P_CLIENT) return -EOPNOTSUPP; - if (n_thresholds <= 1 && rdev->ops->set_cqm_rssi_config) { - if (n_thresholds == 0 || thresholds[0] == 0) /* Disabling */ - return rdev_set_cqm_rssi_config(rdev, dev, 0, 0); - - return rdev_set_cqm_rssi_config(rdev, dev, - thresholds[0], hysteresis); - } - - if (!wiphy_ext_feature_isset(&rdev->wiphy, - NL80211_EXT_FEATURE_CQM_RSSI_LIST)) - return -EOPNOTSUPP; - if (n_thresholds == 1 && thresholds[0] == 0) /* Disabling */ n_thresholds = 0; @@ -12921,6 +12905,20 @@ static int nl80211_set_cqm_rssi(struct genl_info *info, old = rcu_dereference_protected(wdev->cqm_config, lockdep_is_held(&wdev->mtx)); + /* if already disabled just succeed */ + if (!n_thresholds && !old) + return 0; + + if (n_thresholds > 1) { + if (!wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_CQM_RSSI_LIST) || + !rdev->ops->set_cqm_rssi_range_config) + return -EOPNOTSUPP; + } else { + if (!rdev->ops->set_cqm_rssi_config) + return -EOPNOTSUPP; + } + if (n_thresholds) { cqm_config = kzalloc(struct_size(cqm_config, rssi_thresholds, n_thresholds), @@ -12935,13 +12933,26 @@ static int nl80211_set_cqm_rssi(struct genl_info *info, memcpy(cqm_config->rssi_thresholds, thresholds, flex_array_size(cqm_config, rssi_thresholds, n_thresholds)); + cqm_config->use_range_api = n_thresholds > 1 || + !rdev->ops->set_cqm_rssi_config; rcu_assign_pointer(wdev->cqm_config, cqm_config); + + if (cqm_config->use_range_api) + err = cfg80211_cqm_rssi_update(rdev, dev, cqm_config); + else + err = rdev_set_cqm_rssi_config(rdev, dev, + thresholds[0], + hysteresis); } else { RCU_INIT_POINTER(wdev->cqm_config, NULL); + /* if enabled as range also disable via range */ + if (old->use_range_api) + err = rdev_set_cqm_rssi_range_config(rdev, dev, 0, 0); + else + err = rdev_set_cqm_rssi_config(rdev, dev, 0, 0); } - err = cfg80211_cqm_rssi_update(rdev, dev, cqm_config); if (err) { rcu_assign_pointer(wdev->cqm_config, old); kfree_rcu(cqm_config, rcu_head); @@ -19131,10 +19142,11 @@ void cfg80211_cqm_rssi_notify_work(struct wiphy *wiphy, struct wiphy_work *work) wdev_lock(wdev); cqm_config = rcu_dereference_protected(wdev->cqm_config, lockdep_is_held(&wdev->mtx)); - if (!wdev->cqm_config) + if (!cqm_config) goto unlock; - cfg80211_cqm_rssi_update(rdev, wdev->netdev, cqm_config); + if (cqm_config->use_range_api) + cfg80211_cqm_rssi_update(rdev, wdev->netdev, cqm_config); rssi_level = cqm_config->last_rssi_event_value; rssi_event = cqm_config->last_rssi_event_type; From 3e3fa8056fd259f34e6f0c02b54d17893114c909 Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Tue, 31 Oct 2023 08:08:07 +0000 Subject: [PATCH 0714/1397] pinctrl: stm32: Add check for devm_kcalloc [ Upstream commit b0eeba527e704d6023a6cd9103f929226e326b03 ] Add check for the return value of devm_kcalloc() and return the error if it fails in order to avoid NULL pointer dereference. Fixes: 32c170ff15b0 ("pinctrl: stm32: set default gpio line names using pin names") Signed-off-by: Chen Ni Acked-by: Valentin Caron Link: https://lore.kernel.org/r/20231031080807.3600656-1-nichen@iscas.ac.cn Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/stm32/pinctrl-stm32.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c index a73385a431de..419eca49ccec 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32.c +++ b/drivers/pinctrl/stm32/pinctrl-stm32.c @@ -1378,6 +1378,11 @@ static int stm32_gpiolib_register_bank(struct stm32_pinctrl *pctl, struct fwnode } names = devm_kcalloc(dev, npins, sizeof(char *), GFP_KERNEL); + if (!names) { + err = -ENOMEM; + goto err_clk; + } + for (i = 0; i < npins; i++) { stm32_pin = stm32_pctrl_get_desc_pin_from_gpio(pctl, bank, i); if (stm32_pin && stm32_pin->pin.name) From a9659016f9ebc0868b74d1982e38d3af4511f948 Mon Sep 17 00:00:00 2001 From: Antonio Borneo Date: Tue, 7 Nov 2023 12:05:20 +0100 Subject: [PATCH 0715/1397] pinctrl: stm32: fix array read out of bound [ Upstream commit edd48fd9d45370d6c8ba0dd834fcc51ff688cc87 ] The existing code does not verify if the "tentative" index exceeds the size of the array, causing out of bound read. Issue identified with kasan. Check the index before using it. Signed-off-by: Antonio Borneo Fixes: 32c170ff15b0 ("pinctrl: stm32: set default gpio line names using pin names") Link: https://lore.kernel.org/r/20231107110520.4449-1-antonio.borneo@foss.st.com Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/stm32/pinctrl-stm32.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c index 419eca49ccec..346a31f31bba 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32.c +++ b/drivers/pinctrl/stm32/pinctrl-stm32.c @@ -1283,9 +1283,11 @@ static struct stm32_desc_pin *stm32_pctrl_get_desc_pin_from_gpio(struct stm32_pi int i; /* With few exceptions (e.g. bank 'Z'), pin number matches with pin index in array */ - pin_desc = pctl->pins + stm32_pin_nb; - if (pin_desc->pin.number == stm32_pin_nb) - return pin_desc; + if (stm32_pin_nb < pctl->npins) { + pin_desc = pctl->pins + stm32_pin_nb; + if (pin_desc->pin.number == stm32_pin_nb) + return pin_desc; + } /* Otherwise, loop all array to find the pin with the right number */ for (i = 0; i < pctl->npins; i++) { From 2cc612b8ed89f3e80869a736b6551dd296875bdc Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 3 Nov 2023 10:39:24 +0300 Subject: [PATCH 0716/1397] media: v4l2-subdev: Fix a 64bit bug [ Upstream commit 5d33213fac5929a2e7766c88d78779fd443b0fe8 ] The problem is this line here from subdev_do_ioctl(). client_cap->capabilities &= ~V4L2_SUBDEV_CLIENT_CAP_STREAMS; The "client_cap->capabilities" variable is a u64. The AND operation is supposed to clear out the V4L2_SUBDEV_CLIENT_CAP_STREAMS flag. But because it's a 32 bit variable it accidentally clears out the high 32 bits as well. Currently we only use the first bit and none of the upper bits so this doesn't affect runtime behavior. Fixes: f57fa2959244 ("media: v4l2-subdev: Add new ioctl for client capabilities") Signed-off-by: Dan Carpenter Reviewed-by: Tomi Valkeinen Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- include/uapi/linux/v4l2-subdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/v4l2-subdev.h b/include/uapi/linux/v4l2-subdev.h index 4a195b68f28f..b383c2fe0cf3 100644 --- a/include/uapi/linux/v4l2-subdev.h +++ b/include/uapi/linux/v4l2-subdev.h @@ -239,7 +239,7 @@ struct v4l2_subdev_routing { * set (which is the default), the 'stream' fields will be forced to 0 by the * kernel. */ - #define V4L2_SUBDEV_CLIENT_CAP_STREAMS (1U << 0) + #define V4L2_SUBDEV_CLIENT_CAP_STREAMS (1ULL << 0) /** * struct v4l2_subdev_client_capability - Capabilities of the client accessing From 41a0b3b8cfff6cc2827fe4b77dfea571a4128d43 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 13 Nov 2023 20:54:52 -0800 Subject: [PATCH 0717/1397] netdevsim: Don't accept device bound programs [ Upstream commit c0c6bde586c7dce82719b4ff32a2db6af9ee3d65 ] Commit 2b3486bc2d23 ("bpf: Introduce device-bound XDP programs") introduced device-bound programs by largely reusing existing offloading infrastructure. This changed the semantics of 'prog->aux->offload' a bit. Now, it's non-NULL for both offloaded and device-bound programs. Instead of looking at 'prog->aux->offload' let's call bpf_prog_is_offloaded which should be true iff the program is offloaded and not merely device-bound. Fixes: 2b3486bc2d23 ("bpf: Introduce device-bound XDP programs") Reported-by: syzbot+44c2416196b7c607f226@syzkaller.appspotmail.com Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Kicinski Cc: Dipendra Khadka Link: https://lore.kernel.org/bpf/20231114045453.1816995-2-sdf@google.com Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- drivers/net/netdevsim/bpf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index f60eb97e3a62..608953d4f98d 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -93,7 +93,7 @@ static void nsim_prog_set_loaded(struct bpf_prog *prog, bool loaded) { struct nsim_bpf_bound_prog *state; - if (!prog || !prog->aux->offload) + if (!prog || !bpf_prog_is_offloaded(prog->aux)) return; state = prog->aux->offload->dev_priv; @@ -311,7 +311,7 @@ nsim_setup_prog_hw_checks(struct netdevsim *ns, struct netdev_bpf *bpf) if (!bpf->prog) return 0; - if (!bpf->prog->aux->offload) { + if (!bpf_prog_is_offloaded(bpf->prog->aux)) { NSIM_EA(bpf->extack, "xdpoffload of non-bound program"); return -EINVAL; } From 85311671989d87d038e6240578dc4331f53c36cd Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 22 Nov 2023 14:11:41 +0900 Subject: [PATCH 0718/1397] net: rswitch: Fix type of ret in rswitch_start_xmit() [ Upstream commit 109b25d13e0054337860d44841b990d11b32d262 ] The type of ret in rswitch_start_xmit() should be netdev_tx_t. So, fix it. Fixes: 3590918b5d07 ("net: ethernet: renesas: Add support for "Ethernet Switch"") Signed-off-by: Yoshihiro Shimoda Reviewed-by: Wojciech Drewek Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/renesas/rswitch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index 0fc0b6bea753..898f22aa796e 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -1501,8 +1501,8 @@ static netdev_tx_t rswitch_start_xmit(struct sk_buff *skb, struct net_device *nd { struct rswitch_device *rdev = netdev_priv(ndev); struct rswitch_gwca_queue *gq = rdev->tx_queue; + netdev_tx_t ret = NETDEV_TX_OK; struct rswitch_ext_desc *desc; - int ret = NETDEV_TX_OK; dma_addr_t dma_addr; if (rswitch_get_num_cur_queues(gq) >= gq->ring_size - 1) { From 940af3fee268568a96511108c1b610227696cd69 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 22 Nov 2023 14:11:42 +0900 Subject: [PATCH 0719/1397] net: rswitch: Fix return value in rswitch_start_xmit() [ Upstream commit 1aaef8634a20b322c82e84f12a9b6aec1e2fd4fa ] This .ndo_start_xmit() function should return netdev_tx_t value, not -ENOMEM. So, fix it. Fixes: 33f5d733b589 ("net: renesas: rswitch: Improve TX timestamp accuracy") Signed-off-by: Yoshihiro Shimoda Reviewed-by: Wojciech Drewek Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/renesas/rswitch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index 898f22aa796e..3ccf93184c9b 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -1532,7 +1532,7 @@ static netdev_tx_t rswitch_start_xmit(struct sk_buff *skb, struct net_device *nd ts_info = kzalloc(sizeof(*ts_info), GFP_ATOMIC); if (!ts_info) { dma_unmap_single(ndev->dev.parent, dma_addr, skb->len, DMA_TO_DEVICE); - return -ENOMEM; + return ret; } skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; From 85afa283e07cede89423d665e0c2b5564c37eef1 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 22 Nov 2023 14:11:43 +0900 Subject: [PATCH 0720/1397] net: rswitch: Fix missing dev_kfree_skb_any() in error path [ Upstream commit 782486af9b5b76493012711413c141509ac45dec ] Before returning the rswitch_start_xmit() in the error path, dev_kfree_skb_any() should be called. So, fix it. Fixes: 33f5d733b589 ("net: renesas: rswitch: Improve TX timestamp accuracy") Signed-off-by: Yoshihiro Shimoda Reviewed-by: Wojciech Drewek Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/renesas/rswitch.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index 3ccf93184c9b..ae9d8722b76f 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -1514,10 +1514,8 @@ static netdev_tx_t rswitch_start_xmit(struct sk_buff *skb, struct net_device *nd return ret; dma_addr = dma_map_single(ndev->dev.parent, skb->data, skb->len, DMA_TO_DEVICE); - if (dma_mapping_error(ndev->dev.parent, dma_addr)) { - dev_kfree_skb_any(skb); - return ret; - } + if (dma_mapping_error(ndev->dev.parent, dma_addr)) + goto err_kfree; gq->skbs[gq->cur] = skb; desc = &gq->tx_ring[gq->cur]; @@ -1530,10 +1528,8 @@ static netdev_tx_t rswitch_start_xmit(struct sk_buff *skb, struct net_device *nd struct rswitch_gwca_ts_info *ts_info; ts_info = kzalloc(sizeof(*ts_info), GFP_ATOMIC); - if (!ts_info) { - dma_unmap_single(ndev->dev.parent, dma_addr, skb->len, DMA_TO_DEVICE); - return ret; - } + if (!ts_info) + goto err_unmap; skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; rdev->ts_tag++; @@ -1555,6 +1551,14 @@ static netdev_tx_t rswitch_start_xmit(struct sk_buff *skb, struct net_device *nd gq->cur = rswitch_next_queue_index(gq, true, 1); rswitch_modify(rdev->addr, GWTRC(gq->index), 0, BIT(gq->index % 32)); + return ret; + +err_unmap: + dma_unmap_single(ndev->dev.parent, dma_addr, skb->len, DMA_TO_DEVICE); + +err_kfree: + dev_kfree_skb_any(skb); + return ret; } From bf8601dabed0c134a7d58085824e3e466840c5d1 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Thu, 23 Nov 2023 15:13:14 +0800 Subject: [PATCH 0721/1397] ipv4: igmp: fix refcnt uaf issue when receiving igmp query packet [ Upstream commit e2b706c691905fe78468c361aaabc719d0a496f1 ] When I perform the following test operations: 1.ip link add br0 type bridge 2.brctl addif br0 eth0 3.ip addr add 239.0.0.1/32 dev eth0 4.ip addr add 239.0.0.1/32 dev br0 5.ip addr add 224.0.0.1/32 dev br0 6.while ((1)) do ifconfig br0 up ifconfig br0 down done 7.send IGMPv2 query packets to port eth0 continuously. For example, ./mausezahn ethX -c 0 "01 00 5e 00 00 01 00 72 19 88 aa 02 08 00 45 00 00 1c 00 01 00 00 01 02 0e 7f c0 a8 0a b7 e0 00 00 01 11 64 ee 9b 00 00 00 00" The preceding tests may trigger the refcnt uaf issue of the mc list. The stack is as follows: refcount_t: addition on 0; use-after-free. WARNING: CPU: 21 PID: 144 at lib/refcount.c:25 refcount_warn_saturate (lib/refcount.c:25) CPU: 21 PID: 144 Comm: ksoftirqd/21 Kdump: loaded Not tainted 6.7.0-rc1-next-20231117-dirty #80 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:refcount_warn_saturate (lib/refcount.c:25) RSP: 0018:ffffb68f00657910 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff8a00c3bf96c0 RCX: ffff8a07b6160908 RDX: 00000000ffffffd8 RSI: 0000000000000027 RDI: ffff8a07b6160900 RBP: ffff8a00cba36862 R08: 0000000000000000 R09: 00000000ffff7fff R10: ffffb68f006577c0 R11: ffffffffb0fdcdc8 R12: ffff8a00c3bf9680 R13: ffff8a00c3bf96f0 R14: 0000000000000000 R15: ffff8a00d8766e00 FS: 0000000000000000(0000) GS:ffff8a07b6140000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055f10b520b28 CR3: 000000039741a000 CR4: 00000000000006f0 Call Trace: igmp_heard_query (net/ipv4/igmp.c:1068) igmp_rcv (net/ipv4/igmp.c:1132) ip_protocol_deliver_rcu (net/ipv4/ip_input.c:205) ip_local_deliver_finish (net/ipv4/ip_input.c:234) __netif_receive_skb_one_core (net/core/dev.c:5529) netif_receive_skb_internal (net/core/dev.c:5729) netif_receive_skb (net/core/dev.c:5788) br_handle_frame_finish (net/bridge/br_input.c:216) nf_hook_bridge_pre (net/bridge/br_input.c:294) __netif_receive_skb_core (net/core/dev.c:5423) __netif_receive_skb_list_core (net/core/dev.c:5606) __netif_receive_skb_list (net/core/dev.c:5674) netif_receive_skb_list_internal (net/core/dev.c:5764) napi_gro_receive (net/core/gro.c:609) e1000_clean_rx_irq (drivers/net/ethernet/intel/e1000/e1000_main.c:4467) e1000_clean (drivers/net/ethernet/intel/e1000/e1000_main.c:3805) __napi_poll (net/core/dev.c:6533) net_rx_action (net/core/dev.c:6735) __do_softirq (kernel/softirq.c:554) run_ksoftirqd (kernel/softirq.c:913) smpboot_thread_fn (kernel/smpboot.c:164) kthread (kernel/kthread.c:388) ret_from_fork (arch/x86/kernel/process.c:153) ret_from_fork_asm (arch/x86/entry/entry_64.S:250) The root causes are as follows: Thread A Thread B ... netif_receive_skb br_dev_stop ... br_multicast_leave_snoopers ... __ip_mc_dec_group ... __igmp_group_dropped igmp_rcv igmp_stop_timer igmp_heard_query //ref = 1 ip_ma_put igmp_mod_timer refcount_dec_and_test igmp_start_timer //ref = 0 ... refcount_inc //ref increases from 0 When the device receives an IGMPv2 Query message, it starts the timer immediately, regardless of whether the device is running. If the device is down and has left the multicast group, it will cause the mc list refcount uaf issue. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Zhengchao Shao Reviewed-by: Eric Dumazet Reviewed-by: Hangbin Liu Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/igmp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 418e5fb58fd3..d515881d02a6 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -216,8 +216,10 @@ static void igmp_start_timer(struct ip_mc_list *im, int max_delay) int tv = get_random_u32_below(max_delay); im->tm_running = 1; - if (!mod_timer(&im->timer, jiffies+tv+2)) - refcount_inc(&im->refcnt); + if (refcount_inc_not_zero(&im->refcnt)) { + if (mod_timer(&im->timer, jiffies + tv + 2)) + ip_ma_put(im); + } } static void igmp_gq_start_timer(struct in_device *in_dev) From cf3b66ecf48a752c96cf7c03c25199497df61b3d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 27 Sep 2023 15:40:41 +0300 Subject: [PATCH 0722/1397] wifi: iwlwifi: mvm: fix an error code in iwl_mvm_mld_add_sta() [ Upstream commit 71b5e40651d89a8685bea1592dfcd2aa61559628 ] This error path should return -EINVAL instead of success. Fixes: 57974a55d995 ("wifi: iwlwifi: mvm: refactor iwl_mvm_mac_sta_state_common()") Signed-off-by: Dan Carpenter Acked-by: Gregory Greenman Link: https://lore.kernel.org/r/75e4ea09-db58-462f-bd4e-5ad4e5e5dcb5@moroto.mountain Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c index 56f51344c193..1ccbe8c1eeb4 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c @@ -705,8 +705,10 @@ int iwl_mvm_mld_add_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif, rcu_dereference_protected(mvm_sta->link[link_id], lockdep_is_held(&mvm->mutex)); - if (WARN_ON(!link_conf || !mvm_link_sta)) + if (WARN_ON(!link_conf || !mvm_link_sta)) { + ret = -EINVAL; goto err; + } ret = iwl_mvm_mld_cfg_sta(mvm, sta, vif, link_sta, link_conf, mvm_link_sta); From ee0db868ee4d88493dfdc82f59e3b4e449ddddd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Old=C5=99ich=20Jedli=C4=8Dka?= Date: Sat, 4 Nov 2023 15:13:33 +0100 Subject: [PATCH 0723/1397] wifi: mac80211: do not pass AP_VLAN vif pointer to drivers during flush MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3e3a2b645c043f7e3e488d5011478cefb69bbe8b ] This fixes WARN_ONs when using AP_VLANs after station removal. The flush call passed AP_VLAN vif to driver, but because these vifs are virtual and not registered with drivers, we need to translate to the correct AP vif first. Closes: https://github.com/openwrt/openwrt/issues/12420 Fixes: 0b75a1b1e42e ("wifi: mac80211: flush queues on STA removal") Fixes: d00800a289c9 ("wifi: mac80211: add flush_sta method") Tested-by: Konstantin Demin Tested-by: Koen Vandeputte Signed-off-by: Oldřich Jedlička Link: https://lore.kernel.org/r/20231104141333.3710-1-oldium.pro@gmail.com Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/driver-ops.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index c4505593ba7a..2bc2fbe58f94 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -23,7 +23,7 @@ static inline struct ieee80211_sub_if_data * get_bss_sdata(struct ieee80211_sub_if_data *sdata) { - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + if (sdata && sdata->vif.type == NL80211_IFTYPE_AP_VLAN) sdata = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap); @@ -638,10 +638,13 @@ static inline void drv_flush(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, u32 queues, bool drop) { - struct ieee80211_vif *vif = sdata ? &sdata->vif : NULL; + struct ieee80211_vif *vif; might_sleep(); + sdata = get_bss_sdata(sdata); + vif = sdata ? &sdata->vif : NULL; + if (sdata && !check_sdata_in_driver(sdata)) return; @@ -657,6 +660,8 @@ static inline void drv_flush_sta(struct ieee80211_local *local, { might_sleep(); + sdata = get_bss_sdata(sdata); + if (sdata && !check_sdata_in_driver(sdata)) return; From 5cc4ed203b0631e818c3400a7668610af278543e Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Fri, 24 Nov 2023 14:15:28 +1000 Subject: [PATCH 0724/1397] net: dsa: mv88e6xxx: fix marvell 6350 switch probing [ Upstream commit b3f1a164c7f742503dc7159011f7ad6b092b660e ] As of commit de5c9bf40c45 ("net: phylink: require supported_interfaces to be filled") Marvell 88e6350 switches fail to be probed: ... mv88e6085 d0072004.mdio-mii:11: switch 0x3710 detected: Marvell 88E6350, revision 2 mv88e6085 d0072004.mdio-mii:11: phylink: error: empty supported_interfaces error creating PHYLINK: -22 mv88e6085: probe of d0072004.mdio-mii:11 failed with error -22 ... The problem stems from the use of mv88e6185_phylink_get_caps() to get the device capabilities. Create a new dedicated phylink_get_caps for the 6351 family (which the 6350 is one of) to properly support their set of capabilities. According to chip.h the 6351 switch family includes the 6171, 6175, 6350 and 6351 switches, so update each of these to use the correct phylink_get_caps. Fixes: de5c9bf40c45 ("net: phylink: require supported_interfaces to be filled") Signed-off-by: Greg Ungerer Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/dsa/mv88e6xxx/chip.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index ab434a77b059..6d7256ea477a 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -577,6 +577,18 @@ static void mv88e6250_phylink_get_caps(struct mv88e6xxx_chip *chip, int port, config->mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100; } +static void mv88e6351_phylink_get_caps(struct mv88e6xxx_chip *chip, int port, + struct phylink_config *config) +{ + unsigned long *supported = config->supported_interfaces; + + /* Translate the default cmode */ + mv88e6xxx_translate_cmode(chip->ports[port].cmode, supported); + + config->mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100 | + MAC_1000FD; +} + static int mv88e6352_get_port4_serdes_cmode(struct mv88e6xxx_chip *chip) { u16 reg, val; @@ -4340,7 +4352,7 @@ static const struct mv88e6xxx_ops mv88e6171_ops = { .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, .stu_getnext = mv88e6352_g1_stu_getnext, .stu_loadpurge = mv88e6352_g1_stu_loadpurge, - .phylink_get_caps = mv88e6185_phylink_get_caps, + .phylink_get_caps = mv88e6351_phylink_get_caps, }; static const struct mv88e6xxx_ops mv88e6172_ops = { @@ -4440,7 +4452,7 @@ static const struct mv88e6xxx_ops mv88e6175_ops = { .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, .stu_getnext = mv88e6352_g1_stu_getnext, .stu_loadpurge = mv88e6352_g1_stu_loadpurge, - .phylink_get_caps = mv88e6185_phylink_get_caps, + .phylink_get_caps = mv88e6351_phylink_get_caps, }; static const struct mv88e6xxx_ops mv88e6176_ops = { @@ -5069,7 +5081,7 @@ static const struct mv88e6xxx_ops mv88e6350_ops = { .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, .stu_getnext = mv88e6352_g1_stu_getnext, .stu_loadpurge = mv88e6352_g1_stu_loadpurge, - .phylink_get_caps = mv88e6185_phylink_get_caps, + .phylink_get_caps = mv88e6351_phylink_get_caps, }; static const struct mv88e6xxx_ops mv88e6351_ops = { @@ -5117,7 +5129,7 @@ static const struct mv88e6xxx_ops mv88e6351_ops = { .stu_loadpurge = mv88e6352_g1_stu_loadpurge, .avb_ops = &mv88e6352_avb_ops, .ptp_ops = &mv88e6352_ptp_ops, - .phylink_get_caps = mv88e6185_phylink_get_caps, + .phylink_get_caps = mv88e6351_phylink_get_caps, }; static const struct mv88e6xxx_ops mv88e6352_ops = { From 3a5fb57481930cf656cd59851083919cf22076f0 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Fri, 24 Nov 2023 14:15:29 +1000 Subject: [PATCH 0725/1397] net: dsa: mv88e6xxx: fix marvell 6350 probe crash [ Upstream commit a524eabcd72d28425d9db242cf375d0389d74eba ] As of commit b92143d4420f ("net: dsa: mv88e6xxx: add infrastructure for phylink_pcs") probing of a Marvell 88e6350 switch causes a NULL pointer de-reference like this example: ... mv88e6085 d0072004.mdio-mii:11: switch 0x3710 detected: Marvell 88E6350, revision 2 8<--- cut here --- Unable to handle kernel NULL pointer dereference at virtual address 00000000 when read [00000000] *pgd=00000000 Internal error: Oops: 5 [#1] ARM Modules linked in: CPU: 0 PID: 8 Comm: kworker/u2:0 Not tainted 6.7.0-rc2-dirty #26 Hardware name: Marvell Armada 370/XP (Device Tree) Workqueue: events_unbound deferred_probe_work_func PC is at mv88e6xxx_port_setup+0x1c/0x44 LR is at dsa_port_devlink_setup+0x74/0x154 pc : [] lr : [] psr: a0000013 sp : c184fce0 ip : c542b8f4 fp : 00000000 r10: 00000001 r9 : c542a540 r8 : c542bc00 r7 : c542b838 r6 : c5244580 r5 : 00000005 r4 : c5244580 r3 : 00000000 r2 : c542b840 r1 : 00000005 r0 : c1a02040 ... The Marvell 6350 switch has no SERDES interface and so has no corresponding pcs_ops defined for it. But during probing a call is made to mv88e6xxx_port_setup() which unconditionally expects pcs_ops to exist - though the presence of the pcs_ops->pcs_init function is optional. Modify code to check for pcs_ops first, before checking for and calling pcs_ops->pcs_init. Modify checking and use of pcs_ops->pcs_teardown which may potentially suffer the same problem. Fixes: b92143d4420f ("net: dsa: mv88e6xxx: add infrastructure for phylink_pcs") Signed-off-by: Greg Ungerer Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/dsa/mv88e6xxx/chip.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 6d7256ea477a..dc7f9b99f409 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3892,7 +3892,8 @@ static int mv88e6xxx_port_setup(struct dsa_switch *ds, int port) struct mv88e6xxx_chip *chip = ds->priv; int err; - if (chip->info->ops->pcs_ops->pcs_init) { + if (chip->info->ops->pcs_ops && + chip->info->ops->pcs_ops->pcs_init) { err = chip->info->ops->pcs_ops->pcs_init(chip, port); if (err) return err; @@ -3907,7 +3908,8 @@ static void mv88e6xxx_port_teardown(struct dsa_switch *ds, int port) mv88e6xxx_teardown_devlink_regions_port(ds, port); - if (chip->info->ops->pcs_ops->pcs_teardown) + if (chip->info->ops->pcs_ops && + chip->info->ops->pcs_ops->pcs_teardown) chip->info->ops->pcs_ops->pcs_teardown(chip, port); } From 434fad50176a6fff8eda20803ee09b92e29b189c Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Fri, 24 Nov 2023 12:28:04 +0200 Subject: [PATCH 0726/1397] dpaa2-eth: increase the needed headroom to account for alignment [ Upstream commit f422abe3f23d483cf01f386819f26fb3fe0dbb2b ] Increase the needed headroom to account for a 64 byte alignment restriction which, with this patch, we make mandatory on the Tx path. The case in which the amount of headroom needed is not available is already handled by the driver which instead sends a S/G frame with the first buffer only holding the SW and HW annotation areas. Without this patch, we can empirically see data corruption happening between Tx and Tx confirmation which sometimes leads to the SW annotation area being overwritten. Since this is an old IP where the hardware team cannot help to understand the underlying behavior, we make the Tx alignment mandatory for all frames to avoid the crash on Tx conf. Also, remove the comment that suggested that this is just an optimization. This patch also sets the needed_headroom net device field to the usual value that the driver would need on the Tx path: - 64 bytes for the software annotation area - 64 bytes to account for a 64 byte aligned buffer address Fixes: 6e2387e8f19e ("staging: fsl-dpaa2/eth: Add Freescale DPAA2 Ethernet driver") Closes: https://lore.kernel.org/netdev/aa784d0c-85eb-4e5d-968b-c8f74fa86be6@gin.de/ Signed-off-by: Ioana Ciornei Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 8 ++++---- drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 15bab41cee48..774377db0b4b 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -1073,14 +1073,12 @@ static int dpaa2_eth_build_single_fd(struct dpaa2_eth_priv *priv, dma_addr_t addr; buffer_start = skb->data - dpaa2_eth_needed_headroom(skb); - - /* If there's enough room to align the FD address, do it. - * It will help hardware optimize accesses. - */ aligned_start = PTR_ALIGN(buffer_start - DPAA2_ETH_TX_BUF_ALIGN, DPAA2_ETH_TX_BUF_ALIGN); if (aligned_start >= skb->head) buffer_start = aligned_start; + else + return -ENOMEM; /* Store a backpointer to the skb at the beginning of the buffer * (in the private data area) such that we can release it @@ -4967,6 +4965,8 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev) if (err) goto err_dl_port_add; + net_dev->needed_headroom = DPAA2_ETH_SWA_SIZE + DPAA2_ETH_TX_BUF_ALIGN; + err = register_netdev(net_dev); if (err < 0) { dev_err(dev, "register_netdev() failed\n"); diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h index bfb6c96c3b2f..834cba8c3a41 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h @@ -740,7 +740,7 @@ static inline bool dpaa2_eth_rx_pause_enabled(u64 link_options) static inline unsigned int dpaa2_eth_needed_headroom(struct sk_buff *skb) { - unsigned int headroom = DPAA2_ETH_SWA_SIZE; + unsigned int headroom = DPAA2_ETH_SWA_SIZE + DPAA2_ETH_TX_BUF_ALIGN; /* If we don't have an skb (e.g. XDP buffer), we only need space for * the software annotation area From 9453d9a03000787e29205c529bc6ba1fe7b03df4 Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Fri, 24 Nov 2023 12:28:05 +0200 Subject: [PATCH 0727/1397] dpaa2-eth: recycle the RX buffer only after all processing done [ Upstream commit beb1930f966d1517921488bd5d64147f58f79abf ] The blamed commit added support for Rx copybreak. This meant that for certain frame sizes, a new skb was allocated and the initial data buffer was recycled. Instead of waiting to recycle the Rx buffer only after all processing was done on it (like accessing the parse results or timestamp information), the code path just went ahead and re-used the buffer right away. This sometimes lead to corrupted HW and SW annotation areas. Fix this by delaying the moment when the buffer is recycled. Fixes: 50f826999a80 ("dpaa2-eth: add rx copybreak support") Signed-off-by: Ioana Ciornei Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 774377db0b4b..888509cf1f21 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -516,8 +516,6 @@ struct sk_buff *dpaa2_eth_alloc_skb(struct dpaa2_eth_priv *priv, memcpy(skb->data, fd_vaddr + fd_offset, fd_length); - dpaa2_eth_recycle_buf(priv, ch, dpaa2_fd_get_addr(fd)); - return skb; } @@ -589,6 +587,7 @@ void dpaa2_eth_rx(struct dpaa2_eth_priv *priv, struct rtnl_link_stats64 *percpu_stats; struct dpaa2_eth_drv_stats *percpu_extras; struct device *dev = priv->net_dev->dev.parent; + bool recycle_rx_buf = false; void *buf_data; u32 xdp_act; @@ -618,6 +617,8 @@ void dpaa2_eth_rx(struct dpaa2_eth_priv *priv, dma_unmap_page(dev, addr, priv->rx_buf_size, DMA_BIDIRECTIONAL); skb = dpaa2_eth_build_linear_skb(ch, fd, vaddr); + } else { + recycle_rx_buf = true; } } else if (fd_format == dpaa2_fd_sg) { WARN_ON(priv->xdp_prog); @@ -637,6 +638,9 @@ void dpaa2_eth_rx(struct dpaa2_eth_priv *priv, goto err_build_skb; dpaa2_eth_receive_skb(priv, ch, fd, vaddr, fq, percpu_stats, skb); + + if (recycle_rx_buf) + dpaa2_eth_recycle_buf(priv, ch, dpaa2_fd_get_addr(fd)); return; err_build_skb: From d9105720404c2c35ae79949a06a95ce88ef6280e Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Sat, 11 Nov 2023 12:38:21 +0800 Subject: [PATCH 0728/1397] bpf: Add missed allocation hint for bpf_mem_cache_alloc_flags() [ Upstream commit 75a442581d05edaee168222ffbe00d4389785636 ] bpf_mem_cache_alloc_flags() may call __alloc() directly when there is no free object in free list, but it doesn't initialize the allocation hint for the returned pointer. It may lead to bad memory dereference when freeing the pointer, so fix it by initializing the allocation hint. Fixes: 822fb26bdb55 ("bpf: Add a hint to allocated objects.") Signed-off-by: Hou Tao Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20231111043821.2258513-1-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- kernel/bpf/memalloc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c index d93ddac283d4..956f80ee6f5c 100644 --- a/kernel/bpf/memalloc.c +++ b/kernel/bpf/memalloc.c @@ -958,6 +958,8 @@ void notrace *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags) memcg = get_memcg(c); old_memcg = set_active_memcg(memcg); ret = __alloc(c, NUMA_NO_NODE, GFP_KERNEL | __GFP_NOWARN | __GFP_ACCOUNT); + if (ret) + *(struct bpf_mem_cache **)ret = c; set_active_memcg(old_memcg); mem_cgroup_put(memcg); } From 4b81d155fe0b26c88426e0803b42aee06b7075ef Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Mon, 20 Nov 2023 14:05:08 +0300 Subject: [PATCH 0729/1397] uapi: propagate __struct_group() attributes to the container union [ Upstream commit 4e86f32a13af1970d21be94f659cae56bbe487ee ] Recently the kernel test robot has reported an ARM-specific BUILD_BUG_ON() in an old and unmaintained wil6210 wireless driver. The problem comes from the structure packing rules of old ARM ABI ('-mabi=apcs-gnu'). For example, the following structure is packed to 18 bytes instead of 16: struct poorly_packed { unsigned int a; unsigned int b; unsigned short c; union { struct { unsigned short d; unsigned int e; } __attribute__((packed)); struct { unsigned short d; unsigned int e; } __attribute__((packed)) inner; }; } __attribute__((packed)); To fit it into 16 bytes, it's required to add packed attribute to the container union as well: struct poorly_packed { unsigned int a; unsigned int b; unsigned short c; union { struct { unsigned short d; unsigned int e; } __attribute__((packed)); struct { unsigned short d; unsigned int e; } __attribute__((packed)) inner; } __attribute__((packed)); } __attribute__((packed)); Thanks to Andrew Pinski of GCC team for sorting the things out at https://gcc.gnu.org/pipermail/gcc/2023-November/242888.html. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202311150821.cI4yciFE-lkp@intel.com Signed-off-by: Dmitry Antipov Link: https://lore.kernel.org/r/20231120110607.98956-1-dmantipov@yandex.ru Fixes: 50d7bd38c3aa ("stddef: Introduce struct_group() helper macro") Signed-off-by: Kees Cook Signed-off-by: Sasha Levin --- include/uapi/linux/stddef.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h index 5c6c4269f7ef..2ec6f35cda32 100644 --- a/include/uapi/linux/stddef.h +++ b/include/uapi/linux/stddef.h @@ -27,7 +27,7 @@ union { \ struct { MEMBERS } ATTRS; \ struct TAG { MEMBERS } ATTRS NAME; \ - } + } ATTRS #ifdef __cplusplus /* sizeof(struct{}) is 1 in C++, not 0, can't use C version of the macro. */ From efd3358bc3d0f75554f110487a5d9133abb3f512 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 24 Nov 2023 12:15:19 -0500 Subject: [PATCH 0730/1397] selftests/net: ipsec: fix constant out of range [ Upstream commit 088559815477c6f623a5db5993491ddd7facbec7 ] Fix a small compiler warning. nr_process must be a signed long: it is assigned a signed long by strtol() and is compared against LONG_MIN and LONG_MAX. ipsec.c:2280:65: error: result of comparison of constant -9223372036854775808 with expression of type 'unsigned int' is always false [-Werror,-Wtautological-constant-out-of-range-compare] if ((errno == ERANGE && (nr_process == LONG_MAX || nr_process == LONG_MIN)) Fixes: bc2652b7ae1e ("selftest/net/xfrm: Add test for ipsec tunnel") Signed-off-by: Willem de Bruijn Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com> Link: https://lore.kernel.org/r/20231124171645.1011043-2-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- tools/testing/selftests/net/ipsec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c index 9a8229abfa02..be4a30a0d02a 100644 --- a/tools/testing/selftests/net/ipsec.c +++ b/tools/testing/selftests/net/ipsec.c @@ -2263,7 +2263,7 @@ static int check_results(void) int main(int argc, char **argv) { - unsigned int nr_process = 1; + long nr_process = 1; int route_sock = -1, ret = KSFT_SKIP; int test_desc_fd[2]; uint32_t route_seq; @@ -2284,7 +2284,7 @@ int main(int argc, char **argv) exit_usage(argv); } - if (nr_process > MAX_PROCESSES || !nr_process) { + if (nr_process > MAX_PROCESSES || nr_process < 1) { printk("nr_process should be between [1; %u]", MAX_PROCESSES); exit_usage(argv); From aea066ac9e9a4cf246cf8d63a160fc637ab38244 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 24 Nov 2023 12:15:20 -0500 Subject: [PATCH 0731/1397] selftests/net: fix a char signedness issue [ Upstream commit 7b29828c5af6841bdeb9fafa32fdfeff7ab9c407 ] Signedness of char is signed on x86_64, but unsigned on arm64. Fix the warning building cmsg_sender.c on signed platforms or forced with -fsigned-char: msg_sender.c:455:12: error: implicit conversion from 'int' to 'char' changes value from 128 to -128 [-Werror,-Wconstant-conversion] buf[0] = ICMPV6_ECHO_REQUEST; constant ICMPV6_ECHO_REQUEST is 128. Link: https://lwn.net/Articles/911914 Fixes: de17e305a810 ("selftests: net: cmsg_sender: support icmp and raw sockets") Signed-off-by: Willem de Bruijn Link: https://lore.kernel.org/r/20231124171645.1011043-3-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- tools/testing/selftests/net/cmsg_sender.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c index 24b21b15ed3f..6ff3e732f449 100644 --- a/tools/testing/selftests/net/cmsg_sender.c +++ b/tools/testing/selftests/net/cmsg_sender.c @@ -416,9 +416,9 @@ int main(int argc, char *argv[]) { struct addrinfo hints, *ai; struct iovec iov[1]; + unsigned char *buf; struct msghdr msg; char cbuf[1024]; - char *buf; int err; int fd; From a989f76f2218bf751664f7866bb30c754fc17393 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 24 Nov 2023 12:15:21 -0500 Subject: [PATCH 0732/1397] selftests/net: unix: fix unused variable compiler warning [ Upstream commit 59fef379d453781f0dabfa1f1a1e86e78aee919a ] Remove an unused variable. diag_uid.c:151:24: error: unused variable 'udr' [-Werror,-Wunused-variable] Fixes: ac011361bd4f ("af_unix: Add test for sock_diag and UDIAG_SHOW_UID.") Signed-off-by: Willem de Bruijn Link: https://lore.kernel.org/r/20231124171645.1011043-4-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- tools/testing/selftests/net/af_unix/diag_uid.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/net/af_unix/diag_uid.c b/tools/testing/selftests/net/af_unix/diag_uid.c index 5b88f7129fea..79a3dd75590e 100644 --- a/tools/testing/selftests/net/af_unix/diag_uid.c +++ b/tools/testing/selftests/net/af_unix/diag_uid.c @@ -148,7 +148,6 @@ void receive_response(struct __test_metadata *_metadata, .msg_iov = &iov, .msg_iovlen = 1 }; - struct unix_diag_req *udr; struct nlmsghdr *nlh; int ret; From 20bb3a96cee99e047b0bbae4a413f76b614ddd36 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 24 Nov 2023 12:15:22 -0500 Subject: [PATCH 0733/1397] selftests/net: mptcp: fix uninitialized variable warnings [ Upstream commit 00a4f8fd9c750f20d8fd4535c71c9caa7ef5ff2f ] Same init_rng() in both tests. The function reads /dev/urandom to initialize srand(). In case of failure, it falls back onto the entropy in the uninitialized variable. Not sure if this is on purpose. But failure reading urandom should be rare, so just fail hard. While at it, convert to getrandom(). Which man 4 random suggests is simpler and more robust. mptcp_inq.c:525:6: mptcp_connect.c:1131:6: error: variable 'foo' is used uninitialized whenever 'if' condition is false [-Werror,-Wsometimes-uninitialized] Fixes: 048d19d444be ("mptcp: add basic kselftest for mptcp") Fixes: b51880568f20 ("selftests: mptcp: add inq test case") Cc: Florian Westphal Signed-off-by: Willem de Bruijn ---- When input is randomized because this is expected to meaningfully explore edge cases, should we also add 1. logging the random seed to stdout and 2. adding a command line argument to replay from a specific seed I can do this in net-next, if authors find it useful in this case. Reviewed-by: Matthieu Baerts Link: https://lore.kernel.org/r/20231124171645.1011043-5-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- tools/testing/selftests/net/mptcp/mptcp_connect.c | 11 ++++------- tools/testing/selftests/net/mptcp/mptcp_inq.c | 11 ++++------- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index c7f9ebeebc2c..d2043ec3bf6d 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -1125,15 +1126,11 @@ int main_loop_s(int listensock) static void init_rng(void) { - int fd = open("/dev/urandom", O_RDONLY); unsigned int foo; - if (fd > 0) { - int ret = read(fd, &foo, sizeof(foo)); - - if (ret < 0) - srand(fd + foo); - close(fd); + if (getrandom(&foo, sizeof(foo), 0) == -1) { + perror("getrandom"); + exit(1); } srand(foo); diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c index 8672d898f8cd..218aac467321 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_inq.c +++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -519,15 +520,11 @@ static int client(int unixfd) static void init_rng(void) { - int fd = open("/dev/urandom", O_RDONLY); unsigned int foo; - if (fd > 0) { - int ret = read(fd, &foo, sizeof(foo)); - - if (ret < 0) - srand(fd + foo); - close(fd); + if (getrandom(&foo, sizeof(foo), 0) == -1) { + perror("getrandom"); + exit(1); } srand(foo); From f8e2ec79856f4782eed836f97c79b41d9fb264ac Mon Sep 17 00:00:00 2001 From: Elena Salomatkina Date: Sat, 25 Nov 2023 00:08:02 +0300 Subject: [PATCH 0734/1397] octeontx2-af: Fix possible buffer overflow [ Upstream commit ad31c629ca3c87f6d557488c1f9faaebfbcd203c ] A loop in rvu_mbox_handler_nix_bandprof_free() contains a break if (idx == MAX_BANDPROF_PER_PFFUNC), but if idx may reach MAX_BANDPROF_PER_PFFUNC buffer '(*req->prof_idx)[layer]' overflow happens before that check. The patch moves the break to the beginning of the loop. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: e8e095b3b370 ("octeontx2-af: cn10k: Bandwidth profiles config support"). Signed-off-by: Elena Salomatkina Reviewed-by: Simon Horman Reviewed-by: Subbaraya Sundeep Link: https://lore.kernel.org/r/20231124210802.109763-1-elena.salomatkina.cmc@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 23c2f2ed2fb8..c112c71ff576 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -5505,6 +5505,8 @@ int rvu_mbox_handler_nix_bandprof_free(struct rvu *rvu, ipolicer = &nix_hw->ipolicer[layer]; for (idx = 0; idx < req->prof_count[layer]; idx++) { + if (idx == MAX_BANDPROF_PER_PFFUNC) + break; prof_idx = req->prof_idx[layer][idx]; if (prof_idx >= ipolicer->band_prof.max || ipolicer->pfvf_map[prof_idx] != pcifunc) @@ -5518,8 +5520,6 @@ int rvu_mbox_handler_nix_bandprof_free(struct rvu *rvu, ipolicer->pfvf_map[prof_idx] = 0x00; ipolicer->match_id[prof_idx] = 0; rvu_free_rsrc(&ipolicer->band_prof, prof_idx); - if (idx == MAX_BANDPROF_PER_PFFUNC) - break; } } mutex_unlock(&rvu->rsrc_lock); From 46550079ef9f26607868162df3ccd7cbf9b048ef Mon Sep 17 00:00:00 2001 From: Furong Xu <0x1207@gmail.com> Date: Sat, 25 Nov 2023 14:01:26 +0800 Subject: [PATCH 0735/1397] net: stmmac: xgmac: Disable FPE MMC interrupts [ Upstream commit e54d628a2721bfbb002c19f6e8ca6746cec7640f ] Commit aeb18dd07692 ("net: stmmac: xgmac: Disable MMC interrupts by default") tries to disable MMC interrupts to avoid a storm of unhandled interrupts, but leaves the FPE(Frame Preemption) MMC interrupts enabled, FPE MMC interrupts can cause the same problem. Now we mask FPE TX and RX interrupts to disable all MMC interrupts. Fixes: aeb18dd07692 ("net: stmmac: xgmac: Disable MMC interrupts by default") Reviewed-by: Larysa Zaremba Signed-off-by: Furong Xu <0x1207@gmail.com> Reviewed-by: Serge Semin Reviewed-by: Wojciech Drewek Link: https://lore.kernel.org/r/20231125060126.2328690-1-0x1207@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/mmc_core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c index ea4910ae0921..6a7c1d325c46 100644 --- a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c @@ -177,8 +177,10 @@ #define MMC_XGMAC_RX_DISCARD_OCT_GB 0x1b4 #define MMC_XGMAC_RX_ALIGN_ERR_PKT 0x1bc +#define MMC_XGMAC_TX_FPE_INTR_MASK 0x204 #define MMC_XGMAC_TX_FPE_FRAG 0x208 #define MMC_XGMAC_TX_HOLD_REQ 0x20c +#define MMC_XGMAC_RX_FPE_INTR_MASK 0x224 #define MMC_XGMAC_RX_PKT_ASSEMBLY_ERR 0x228 #define MMC_XGMAC_RX_PKT_SMD_ERR 0x22c #define MMC_XGMAC_RX_PKT_ASSEMBLY_OK 0x230 @@ -352,6 +354,8 @@ static void dwxgmac_mmc_intr_all_mask(void __iomem *mmcaddr) { writel(0x0, mmcaddr + MMC_RX_INTR_MASK); writel(0x0, mmcaddr + MMC_TX_INTR_MASK); + writel(MMC_DEFAULT_MASK, mmcaddr + MMC_XGMAC_TX_FPE_INTR_MASK); + writel(MMC_DEFAULT_MASK, mmcaddr + MMC_XGMAC_RX_FPE_INTR_MASK); writel(MMC_DEFAULT_MASK, mmcaddr + MMC_XGMAC_RX_IPC_INTR_MASK); } From dc75da22adf63063a71904869816800781dd0903 Mon Sep 17 00:00:00 2001 From: Geetha sowjanya Date: Sat, 25 Nov 2023 22:04:02 +0530 Subject: [PATCH 0736/1397] octeontx2-pf: Fix adding mbox work queue entry when num_vfs > 64 [ Upstream commit 51597219e0cd5157401d4d0ccb5daa4d9961676f ] When more than 64 VFs are enabled for a PF then mbox communication between VF and PF is not working as mbox work queueing for few VFs are skipped due to wrong calculation of VF numbers. Fixes: d424b6c02415 ("octeontx2-pf: Enable SRIOV and added VF mbox handling") Signed-off-by: Geetha sowjanya Signed-off-by: Subbaraya Sundeep Link: https://lore.kernel.org/r/1700930042-5400-1-git-send-email-sbhatta@marvell.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index ba95ac913274..6d56fc191845 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -566,7 +566,9 @@ static irqreturn_t otx2_pfvf_mbox_intr_handler(int irq, void *pf_irq) otx2_write64(pf, RVU_PF_VFPF_MBOX_INTX(1), intr); otx2_queue_work(mbox, pf->mbox_pfvf_wq, 64, vfs, intr, TYPE_PFVF); - vfs -= 64; + if (intr) + trace_otx2_msg_interrupt(mbox->mbox.pdev, "VF(s) to PF", intr); + vfs = 64; } intr = otx2_read64(pf, RVU_PF_VFPF_MBOX_INTX(0)); @@ -574,7 +576,8 @@ static irqreturn_t otx2_pfvf_mbox_intr_handler(int irq, void *pf_irq) otx2_queue_work(mbox, pf->mbox_pfvf_wq, 0, vfs, intr, TYPE_PFVF); - trace_otx2_msg_interrupt(mbox->mbox.pdev, "VF(s) to PF", intr); + if (intr) + trace_otx2_msg_interrupt(mbox->mbox.pdev, "VF(s) to PF", intr); return IRQ_HANDLED; } From d3290f7ec7e2f61fce9346e97cf3c7b0c86c2e08 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Sat, 25 Nov 2023 22:06:57 +0530 Subject: [PATCH 0737/1397] octeontx2-pf: Restore TC ingress police rules when interface is up [ Upstream commit fd7f98b2e12a3d96a92bde6640657ec7116f4372 ] TC ingress policer rules depends on interface receive queue contexts since the bandwidth profiles are attached to RQ contexts. When an interface is brought down all the queue contexts are freed. This in turn frees bandwidth profiles in hardware causing ingress police rules non-functional after the interface is brought up. Fix this by applying all the ingress police rules config to hardware in otx2_open. Also allow adding ingress rules only when interface is running since no contexts exist for the interface when it is down. Fixes: 68fbff68dbea ("octeontx2-pf: Add police action for TC flower") Signed-off-by: Subbaraya Sundeep Link: https://lore.kernel.org/r/1700930217-5707-1-git-send-email-sbhatta@marvell.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- .../ethernet/marvell/octeontx2/nic/cn10k.c | 3 + .../marvell/octeontx2/nic/otx2_common.h | 2 + .../ethernet/marvell/octeontx2/nic/otx2_pf.c | 2 + .../ethernet/marvell/octeontx2/nic/otx2_tc.c | 120 ++++++++++++++---- 4 files changed, 102 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c index a4a258da8dd5..c1c99d7054f8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c @@ -450,6 +450,9 @@ int cn10k_set_ipolicer_rate(struct otx2_nic *pfvf, u16 profile, aq->prof.pebs_mantissa = 0; aq->prof_mask.pebs_mantissa = 0xFF; + aq->prof.hl_en = 0; + aq->prof_mask.hl_en = 1; + /* Fill AQ info */ aq->qidx = profile; aq->ctype = NIX_AQ_CTYPE_BANDPROF; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index e7c69b57147e..06910307085e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -1070,6 +1070,8 @@ int otx2_init_tc(struct otx2_nic *nic); void otx2_shutdown_tc(struct otx2_nic *nic); int otx2_setup_tc(struct net_device *netdev, enum tc_setup_type type, void *type_data); +void otx2_tc_apply_ingress_police_rules(struct otx2_nic *nic); + /* CGX/RPM DMAC filters support */ int otx2_dmacflt_get_max_cnt(struct otx2_nic *pf); int otx2_dmacflt_add(struct otx2_nic *pf, const u8 *mac, u32 bit_pos); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 6d56fc191845..532e324bdcc8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1873,6 +1873,8 @@ int otx2_open(struct net_device *netdev) if (pf->flags & OTX2_FLAG_DMACFLTR_SUPPORT) otx2_dmacflt_reinstall_flows(pf); + otx2_tc_apply_ingress_police_rules(pf); + err = otx2_rxtx_enable(pf, true); /* If a mbox communication error happens at this point then interface * will end up in a state such that it is in down state but hardware diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c index fab9d85bfb37..423ce54eaea6 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c @@ -45,6 +45,9 @@ struct otx2_tc_flow { bool is_act_police; u32 prio; struct npc_install_flow_req req; + u64 rate; + u32 burst; + bool is_pps; }; static void otx2_get_egress_burst_cfg(struct otx2_nic *nic, u32 burst, @@ -282,21 +285,10 @@ static int otx2_tc_egress_matchall_delete(struct otx2_nic *nic, return err; } -static int otx2_tc_act_set_police(struct otx2_nic *nic, - struct otx2_tc_flow *node, - struct flow_cls_offload *f, - u64 rate, u32 burst, u32 mark, - struct npc_install_flow_req *req, bool pps) +static int otx2_tc_act_set_hw_police(struct otx2_nic *nic, + struct otx2_tc_flow *node) { - struct netlink_ext_ack *extack = f->common.extack; - struct otx2_hw *hw = &nic->hw; - int rq_idx, rc; - - rq_idx = find_first_zero_bit(&nic->rq_bmap, hw->rx_queues); - if (rq_idx >= hw->rx_queues) { - NL_SET_ERR_MSG_MOD(extack, "Police action rules exceeded"); - return -EINVAL; - } + int rc; mutex_lock(&nic->mbox.lock); @@ -306,23 +298,17 @@ static int otx2_tc_act_set_police(struct otx2_nic *nic, return rc; } - rc = cn10k_set_ipolicer_rate(nic, node->leaf_profile, burst, rate, pps); + rc = cn10k_set_ipolicer_rate(nic, node->leaf_profile, + node->burst, node->rate, node->is_pps); if (rc) goto free_leaf; - rc = cn10k_map_unmap_rq_policer(nic, rq_idx, node->leaf_profile, true); + rc = cn10k_map_unmap_rq_policer(nic, node->rq, node->leaf_profile, true); if (rc) goto free_leaf; mutex_unlock(&nic->mbox.lock); - req->match_id = mark & 0xFFFFULL; - req->index = rq_idx; - req->op = NIX_RX_ACTIONOP_UCAST; - set_bit(rq_idx, &nic->rq_bmap); - node->is_act_police = true; - node->rq = rq_idx; - return 0; free_leaf: @@ -334,6 +320,39 @@ static int otx2_tc_act_set_police(struct otx2_nic *nic, return rc; } +static int otx2_tc_act_set_police(struct otx2_nic *nic, + struct otx2_tc_flow *node, + struct flow_cls_offload *f, + u64 rate, u32 burst, u32 mark, + struct npc_install_flow_req *req, bool pps) +{ + struct netlink_ext_ack *extack = f->common.extack; + struct otx2_hw *hw = &nic->hw; + int rq_idx, rc; + + rq_idx = find_first_zero_bit(&nic->rq_bmap, hw->rx_queues); + if (rq_idx >= hw->rx_queues) { + NL_SET_ERR_MSG_MOD(extack, "Police action rules exceeded"); + return -EINVAL; + } + + req->match_id = mark & 0xFFFFULL; + req->index = rq_idx; + req->op = NIX_RX_ACTIONOP_UCAST; + + node->is_act_police = true; + node->rq = rq_idx; + node->burst = burst; + node->rate = rate; + node->is_pps = pps; + + rc = otx2_tc_act_set_hw_police(nic, node); + if (!rc) + set_bit(rq_idx, &nic->rq_bmap); + + return rc; +} + static int otx2_tc_parse_actions(struct otx2_nic *nic, struct flow_action *flow_action, struct npc_install_flow_req *req, @@ -986,6 +1005,11 @@ static int otx2_tc_del_flow(struct otx2_nic *nic, } if (flow_node->is_act_police) { + __clear_bit(flow_node->rq, &nic->rq_bmap); + + if (nic->flags & OTX2_FLAG_INTF_DOWN) + goto free_mcam_flow; + mutex_lock(&nic->mbox.lock); err = cn10k_map_unmap_rq_policer(nic, flow_node->rq, @@ -1001,11 +1025,10 @@ static int otx2_tc_del_flow(struct otx2_nic *nic, "Unable to free leaf bandwidth profile(%d)\n", flow_node->leaf_profile); - __clear_bit(flow_node->rq, &nic->rq_bmap); - mutex_unlock(&nic->mbox.lock); } +free_mcam_flow: otx2_del_mcam_flow_entry(nic, flow_node->entry, NULL); otx2_tc_update_mcam_table(nic, flow_cfg, flow_node, false); kfree_rcu(flow_node, rcu); @@ -1025,6 +1048,11 @@ static int otx2_tc_add_flow(struct otx2_nic *nic, if (!(nic->flags & OTX2_FLAG_TC_FLOWER_SUPPORT)) return -ENOMEM; + if (nic->flags & OTX2_FLAG_INTF_DOWN) { + NL_SET_ERR_MSG_MOD(extack, "Interface not initialized"); + return -EINVAL; + } + if (flow_cfg->nr_flows == flow_cfg->max_flows) { NL_SET_ERR_MSG_MOD(extack, "Free MCAM entry not available to add the flow"); @@ -1384,3 +1412,45 @@ void otx2_shutdown_tc(struct otx2_nic *nic) otx2_destroy_tc_flow_list(nic); } EXPORT_SYMBOL(otx2_shutdown_tc); + +static void otx2_tc_config_ingress_rule(struct otx2_nic *nic, + struct otx2_tc_flow *node) +{ + struct npc_install_flow_req *req; + + if (otx2_tc_act_set_hw_police(nic, node)) + return; + + mutex_lock(&nic->mbox.lock); + + req = otx2_mbox_alloc_msg_npc_install_flow(&nic->mbox); + if (!req) + goto err; + + memcpy(req, &node->req, sizeof(struct npc_install_flow_req)); + + if (otx2_sync_mbox_msg(&nic->mbox)) + netdev_err(nic->netdev, + "Failed to install MCAM flow entry for ingress rule"); +err: + mutex_unlock(&nic->mbox.lock); +} + +void otx2_tc_apply_ingress_police_rules(struct otx2_nic *nic) +{ + struct otx2_flow_config *flow_cfg = nic->flow_cfg; + struct otx2_tc_flow *node; + + /* If any ingress policer rules exist for the interface then + * apply those rules. Ingress policer rules depend on bandwidth + * profiles linked to the receive queues. Since no receive queues + * exist when interface is down, ingress policer rules are stored + * and configured in hardware after all receive queues are allocated + * in otx2_open. + */ + list_for_each_entry(node, &flow_cfg->flow_list_tc, list) { + if (node->is_act_police) + otx2_tc_config_ingress_rule(nic, node); + } +} +EXPORT_SYMBOL(otx2_tc_apply_ingress_police_rules); From 9e23c7f75b32b6286f1706efaff50837de3c6cc2 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sat, 25 Nov 2023 15:33:58 -0600 Subject: [PATCH 0738/1397] neighbour: Fix __randomize_layout crash in struct neighbour [ Upstream commit 45b3fae4675dc1d4ee2d7aefa19d85ee4f891377 ] Previously, one-element and zero-length arrays were treated as true flexible arrays, even though they are actually "fake" flex arrays. The __randomize_layout would leave them untouched at the end of the struct, similarly to proper C99 flex-array members. However, this approach changed with commit 1ee60356c2dc ("gcc-plugins: randstruct: Only warn about true flexible arrays"). Now, only C99 flexible-array members will remain untouched at the end of the struct, while one-element and zero-length arrays will be subject to randomization. Fix a `__randomize_layout` crash in `struct neighbour` by transforming zero-length array `primary_key` into a proper C99 flexible-array member. Fixes: 1ee60356c2dc ("gcc-plugins: randstruct: Only warn about true flexible arrays") Closes: https://lore.kernel.org/linux-hardening/20231124102458.GB1503258@e124191.cambridge.arm.com/ Signed-off-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Tested-by: Joey Gouly Link: https://lore.kernel.org/r/ZWJoRsJGnCPdJ3+2@work Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- include/net/neighbour.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 07022bb0d44d..0d28172193fa 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -162,7 +162,7 @@ struct neighbour { struct rcu_head rcu; struct net_device *dev; netdevice_tracker dev_tracker; - u8 primary_key[0]; + u8 primary_key[]; } __randomize_layout; struct neigh_ops { From 589959bf46e46f9d056782ea4601a546d163cba6 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Fri, 3 Nov 2023 10:13:54 -0500 Subject: [PATCH 0739/1397] efi/unaccepted: Fix off-by-one when checking for overlapping ranges [ Upstream commit 01b1e3ca0e5ce47bbae8217d47376ad01b331b07 ] When a task needs to accept memory it will scan the accepting_list to see if any ranges already being processed by other tasks overlap with its range. Due to an off-by-one in the range comparisons, a task might falsely determine that an overlapping range is being accepted, leading to an unnecessary delay before it begins processing the range. Fix the off-by-one in the range comparison to prevent this and slightly improve performance. Fixes: 50e782a86c98 ("efi/unaccepted: Fix soft lockups caused by parallel memory acceptance") Link: https://lore.kernel.org/linux-mm/20231101004523.vseyi5bezgfaht5i@amd.com/T/#me2eceb9906fcae5fe958b3fe88e41f920f8335b6 Reviewed-by: Kirill A. Shutemov Signed-off-by: Michael Roth Acked-by: Vlastimil Babka Signed-off-by: Ard Biesheuvel Signed-off-by: Sasha Levin --- drivers/firmware/efi/unaccepted_memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/unaccepted_memory.c b/drivers/firmware/efi/unaccepted_memory.c index 135278ddaf62..79fb687bb90f 100644 --- a/drivers/firmware/efi/unaccepted_memory.c +++ b/drivers/firmware/efi/unaccepted_memory.c @@ -100,7 +100,7 @@ void accept_memory(phys_addr_t start, phys_addr_t end) * overlap on physical address level. */ list_for_each_entry(entry, &accepting_list, list) { - if (entry->end < range.start) + if (entry->end <= range.start) continue; if (entry->start >= range.end) continue; From a9ef897677f957401842e45f2006167cdf757ee4 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 26 Nov 2023 23:01:02 +0100 Subject: [PATCH 0740/1397] r8169: prevent potential deadlock in rtl8169_close [ Upstream commit 91d3d149978ba7b238198dd80e4b823756aa7cfa ] ndo_stop() is RTNL-protected by net core, and the worker function takes RTNL as well. Therefore we will deadlock when trying to execute a pending work synchronously. To fix this execute any pending work asynchronously. This will do no harm because netif_running() is false in ndo_stop(), and therefore the work function is effectively a no-op. However we have to ensure that no task is running or pending after rtl_remove_one(), therefore add a call to cancel_work_sync(). Fixes: abe5fc42f9ce ("r8169: use RTNL to protect critical sections") Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/12395867-1d17-4cac-aa7d-c691938fcddf@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/realtek/r8169_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index a43e33e4b25e..62cabeeb842a 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -4694,7 +4694,7 @@ static int rtl8169_close(struct net_device *dev) rtl8169_down(tp); rtl8169_rx_clear(tp); - cancel_work_sync(&tp->wk.work); + cancel_work(&tp->wk.work); free_irq(tp->irq, tp); @@ -4928,6 +4928,8 @@ static void rtl_remove_one(struct pci_dev *pdev) if (pci_dev_run_wake(pdev)) pm_runtime_get_noresume(&pdev->dev); + cancel_work_sync(&tp->wk.work); + unregister_netdev(tp->dev); if (tp->dash_type != RTL_DASH_NONE) From 658dc8b4a3fb0c9c3e67d67c6cdda4107e798a0c Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Mon, 27 Nov 2023 21:24:20 +0900 Subject: [PATCH 0741/1397] ravb: Fix races between ravb_tx_timeout_work() and net related ops [ Upstream commit 9870257a0a338cd8d6c1cddab74e703f490f6779 ] Fix races between ravb_tx_timeout_work() and functions of net_device_ops and ethtool_ops by using rtnl_trylock() and rtnl_unlock(). Note that since ravb_close() is under the rtnl lock and calls cancel_work_sync(), ravb_tx_timeout_work() should calls rtnl_trylock(). Otherwise, a deadlock may happen in ravb_tx_timeout_work() like below: CPU0 CPU1 ravb_tx_timeout() schedule_work() ... __dev_close_many() // Under rtnl lock ravb_close() cancel_work_sync() // Waiting ravb_tx_timeout_work() rtnl_lock() // This is possible to cause a deadlock If rtnl_trylock() fails, rescheduling the work with sleep for 1 msec. Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper") Signed-off-by: Yoshihiro Shimoda Reviewed-by: Sergey Shtylyov Link: https://lore.kernel.org/r/20231127122420.3706751-1-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/renesas/ravb_main.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 0ef0b88b7145..de12c02c55cb 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1874,6 +1874,12 @@ static void ravb_tx_timeout_work(struct work_struct *work) struct net_device *ndev = priv->ndev; int error; + if (!rtnl_trylock()) { + usleep_range(1000, 2000); + schedule_work(&priv->work); + return; + } + netif_tx_stop_all_queues(ndev); /* Stop PTP Clock driver */ @@ -1907,7 +1913,7 @@ static void ravb_tx_timeout_work(struct work_struct *work) */ netdev_err(ndev, "%s: ravb_dmac_init() failed, error %d\n", __func__, error); - return; + goto out_unlock; } ravb_emac_init(ndev); @@ -1917,6 +1923,9 @@ static void ravb_tx_timeout_work(struct work_struct *work) ravb_ptp_init(ndev, priv->pdev); netif_tx_start_all_queues(ndev); + +out_unlock: + rtnl_unlock(); } /* Packet transmit function for Ethernet AVB */ From 61ae993c308fb91e889604efd410ebf84b975440 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 26 Nov 2023 14:58:06 -0800 Subject: [PATCH 0742/1397] ethtool: don't propagate EOPNOTSUPP from dumps [ Upstream commit cbeb989e41f4094f54bec2cecce993f26f547bea ] The default dump handler needs to clear ret before returning. Otherwise if the last interface returns an inconsequential error this error will propagate to user space. This may confuse user space (ethtool CLI seems to ignore it, but YNL doesn't). It will also terminate the dump early for mutli-skb dump, because netlink core treats EOPNOTSUPP as a real error. Fixes: 728480f12442 ("ethtool: default handlers for GET requests") Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20231126225806.2143528-1-kuba@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ethtool/netlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 3bbd5afb7b31..fe3553f60bf3 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -505,6 +505,7 @@ static int ethnl_default_dumpit(struct sk_buff *skb, ret = skb->len; break; } + ret = 0; } rtnl_unlock(); From 484a2f50cc62538f0fed1245c68cd50a209150e4 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 28 Nov 2023 17:25:56 -0800 Subject: [PATCH 0743/1397] bpf, sockmap: af_unix stream sockets need to hold ref for pair sock [ Upstream commit 8866730aed5100f06d3d965c22f1c61f74942541 ] AF_UNIX stream sockets are a paired socket. So sending on one of the pairs will lookup the paired socket as part of the send operation. It is possible however to put just one of the pairs in a BPF map. This currently increments the refcnt on the sock in the sockmap to ensure it is not free'd by the stack before sockmap cleans up its state and stops any skbs being sent/recv'd to that socket. But we missed a case. If the peer socket is closed it will be free'd by the stack. However, the paired socket can still be referenced from BPF sockmap side because we hold a reference there. Then if we are sending traffic through BPF sockmap to that socket it will try to dereference the free'd pair in its send logic creating a use after free. And following splat: [59.900375] BUG: KASAN: slab-use-after-free in sk_wake_async+0x31/0x1b0 [59.901211] Read of size 8 at addr ffff88811acbf060 by task kworker/1:2/954 [...] [59.905468] Call Trace: [59.905787] [59.906066] dump_stack_lvl+0x130/0x1d0 [59.908877] print_report+0x16f/0x740 [59.910629] kasan_report+0x118/0x160 [59.912576] sk_wake_async+0x31/0x1b0 [59.913554] sock_def_readable+0x156/0x2a0 [59.914060] unix_stream_sendmsg+0x3f9/0x12a0 [59.916398] sock_sendmsg+0x20e/0x250 [59.916854] skb_send_sock+0x236/0xac0 [59.920527] sk_psock_backlog+0x287/0xaa0 To fix let BPF sockmap hold a refcnt on both the socket in the sockmap and its paired socket. It wasn't obvious how to contain the fix to bpf_unix logic. The primarily problem with keeping this logic in bpf_unix was: In the sock close() we could handle the deref by having a close handler. But, when we are destroying the psock through a map delete operation we wouldn't have gotten any signal thorugh the proto struct other than it being replaced. If we do the deref from the proto replace its too early because we need to deref the sk_pair after the backlog worker has been stopped. Given all this it seems best to just cache it at the end of the psock and eat 8B for the af_unix and vsock users. Notice dgram sockets are OK because they handle locking already. Fixes: 94531cfcbe79 ("af_unix: Add unix_stream_proto for sockmap") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Sitnicki Link: https://lore.kernel.org/bpf/20231129012557.95371-2-john.fastabend@gmail.com Signed-off-by: Sasha Levin --- include/linux/skmsg.h | 1 + include/net/af_unix.h | 1 + net/core/skmsg.c | 2 ++ net/unix/af_unix.c | 2 -- net/unix/unix_bpf.c | 5 +++++ 5 files changed, 9 insertions(+), 2 deletions(-) diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index c1637515a8a4..c953b8c0d2f4 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -106,6 +106,7 @@ struct sk_psock { struct mutex work_mutex; struct sk_psock_work_state work_state; struct delayed_work work; + struct sock *sk_pair; struct rcu_work rwork; }; diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 824c258143a3..49c4640027d8 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -75,6 +75,7 @@ struct unix_sock { }; #define unix_sk(ptr) container_of_const(ptr, struct unix_sock, sk) +#define unix_peer(sk) (unix_sk(sk)->peer) #define peer_wait peer_wq.wait diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 6c31eefbd777..93ecfceac1bc 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -826,6 +826,8 @@ static void sk_psock_destroy(struct work_struct *work) if (psock->sk_redir) sock_put(psock->sk_redir); + if (psock->sk_pair) + sock_put(psock->sk_pair); sock_put(psock->sk); kfree(psock); } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 3e6eeacb13ae..1e1a88bd4e68 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -212,8 +212,6 @@ static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) } #endif /* CONFIG_SECURITY_NETWORK */ -#define unix_peer(sk) (unix_sk(sk)->peer) - static inline int unix_our_peer(struct sock *sk, struct sock *osk) { return unix_peer(osk) == sk; diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c index 2f9d8271c6ec..7ea7c3a0d0d0 100644 --- a/net/unix/unix_bpf.c +++ b/net/unix/unix_bpf.c @@ -159,12 +159,17 @@ int unix_dgram_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool re int unix_stream_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) { + struct sock *sk_pair; + if (restore) { sk->sk_write_space = psock->saved_write_space; sock_replace_proto(sk, psock->sk_proto); return 0; } + sk_pair = unix_peer(sk); + sock_hold(sk_pair); + psock->sk_pair = sk_pair; unix_stream_bpf_check_needs_rebuild(psock->sk_proto); sock_replace_proto(sk, &unix_stream_bpf_prot); return 0; From fd7f7a8ad3363fd94fc3868616cc70a1d8dfc01b Mon Sep 17 00:00:00 2001 From: Dave Ertman Date: Mon, 27 Nov 2023 13:23:38 -0800 Subject: [PATCH 0744/1397] ice: Fix VF Reset paths when interface in a failed over aggregate [ Upstream commit 9f74a3dfcf83e11aedcb98250b8040dbc6d9659a ] There is an error when an interface has the following conditions: - PF is in an aggregate (bond) - PF has VFs created on it - bond is in a state where it is failed-over to the secondary interface - A VF reset is issued on one or more of those VFs The issue is generated by the originating PF trying to rebuild or reconfigure the VF resources. Since the bond is failed over to the secondary interface the queue contexts are in a modified state. To fix this issue, have the originating interface reclaim its resources prior to the tear-down and rebuild or reconfigure. Then after the process is complete, move the resources back to the currently active interface. There are multiple paths that can be used depending on what triggered the event, so create a helper function to move the queues and use paired calls to the helper (back to origin, process, then move back to active interface) under the same lag_mutex lock. Fixes: 1e0f9881ef79 ("ice: Flesh out implementation of support for SRIOV on bonded interface") Signed-off-by: Dave Ertman Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen Reviewed-by: Przemek Kitszel Link: https://lore.kernel.org/r/20231127212340.1137657-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_lag.c | 122 +++++++++++------- drivers/net/ethernet/intel/ice/ice_lag.h | 1 + drivers/net/ethernet/intel/ice/ice_vf_lib.c | 20 +++ drivers/net/ethernet/intel/ice/ice_virtchnl.c | 25 ++++ 4 files changed, 118 insertions(+), 50 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c index fb40ad98e6aa..d86e2460b5a4 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.c +++ b/drivers/net/ethernet/intel/ice/ice_lag.c @@ -536,6 +536,50 @@ ice_lag_move_vf_node_tc(struct ice_lag *lag, u8 oldport, u8 newport, dev_dbg(dev, "Problem restarting traffic for LAG node move\n"); } +/** + * ice_lag_build_netdev_list - populate the lag struct's netdev list + * @lag: local lag struct + * @ndlist: pointer to netdev list to populate + */ +static void ice_lag_build_netdev_list(struct ice_lag *lag, + struct ice_lag_netdev_list *ndlist) +{ + struct ice_lag_netdev_list *nl; + struct net_device *tmp_nd; + + INIT_LIST_HEAD(&ndlist->node); + rcu_read_lock(); + for_each_netdev_in_bond_rcu(lag->upper_netdev, tmp_nd) { + nl = kzalloc(sizeof(*nl), GFP_ATOMIC); + if (!nl) + break; + + nl->netdev = tmp_nd; + list_add(&nl->node, &ndlist->node); + } + rcu_read_unlock(); + lag->netdev_head = &ndlist->node; +} + +/** + * ice_lag_destroy_netdev_list - free lag struct's netdev list + * @lag: pointer to local lag struct + * @ndlist: pointer to lag struct netdev list + */ +static void ice_lag_destroy_netdev_list(struct ice_lag *lag, + struct ice_lag_netdev_list *ndlist) +{ + struct ice_lag_netdev_list *entry, *n; + + rcu_read_lock(); + list_for_each_entry_safe(entry, n, &ndlist->node, node) { + list_del(&entry->node); + kfree(entry); + } + rcu_read_unlock(); + lag->netdev_head = NULL; +} + /** * ice_lag_move_single_vf_nodes - Move Tx scheduling nodes for single VF * @lag: primary interface LAG struct @@ -564,7 +608,6 @@ ice_lag_move_single_vf_nodes(struct ice_lag *lag, u8 oldport, u8 newport, void ice_lag_move_new_vf_nodes(struct ice_vf *vf) { struct ice_lag_netdev_list ndlist; - struct list_head *tmp, *n; u8 pri_port, act_port; struct ice_lag *lag; struct ice_vsi *vsi; @@ -588,38 +631,15 @@ void ice_lag_move_new_vf_nodes(struct ice_vf *vf) pri_port = pf->hw.port_info->lport; act_port = lag->active_port; - if (lag->upper_netdev) { - struct ice_lag_netdev_list *nl; - struct net_device *tmp_nd; - - INIT_LIST_HEAD(&ndlist.node); - rcu_read_lock(); - for_each_netdev_in_bond_rcu(lag->upper_netdev, tmp_nd) { - nl = kzalloc(sizeof(*nl), GFP_ATOMIC); - if (!nl) - break; - - nl->netdev = tmp_nd; - list_add(&nl->node, &ndlist.node); - } - rcu_read_unlock(); - } - - lag->netdev_head = &ndlist.node; + if (lag->upper_netdev) + ice_lag_build_netdev_list(lag, &ndlist); if (ice_is_feature_supported(pf, ICE_F_SRIOV_LAG) && lag->bonded && lag->primary && pri_port != act_port && !list_empty(lag->netdev_head)) ice_lag_move_single_vf_nodes(lag, pri_port, act_port, vsi->idx); - list_for_each_safe(tmp, n, &ndlist.node) { - struct ice_lag_netdev_list *entry; - - entry = list_entry(tmp, struct ice_lag_netdev_list, node); - list_del(&entry->node); - kfree(entry); - } - lag->netdev_head = NULL; + ice_lag_destroy_netdev_list(lag, &ndlist); new_vf_unlock: mutex_unlock(&pf->lag_mutex); @@ -646,6 +666,29 @@ static void ice_lag_move_vf_nodes(struct ice_lag *lag, u8 oldport, u8 newport) ice_lag_move_single_vf_nodes(lag, oldport, newport, i); } +/** + * ice_lag_move_vf_nodes_cfg - move vf nodes outside LAG netdev event context + * @lag: local lag struct + * @src_prt: lport value for source port + * @dst_prt: lport value for destination port + * + * This function is used to move nodes during an out-of-netdev-event situation, + * primarily when the driver needs to reconfigure or recreate resources. + * + * Must be called while holding the lag_mutex to avoid lag events from + * processing while out-of-sync moves are happening. Also, paired moves, + * such as used in a reset flow, should both be called under the same mutex + * lock to avoid changes between start of reset and end of reset. + */ +void ice_lag_move_vf_nodes_cfg(struct ice_lag *lag, u8 src_prt, u8 dst_prt) +{ + struct ice_lag_netdev_list ndlist; + + ice_lag_build_netdev_list(lag, &ndlist); + ice_lag_move_vf_nodes(lag, src_prt, dst_prt); + ice_lag_destroy_netdev_list(lag, &ndlist); +} + #define ICE_LAG_SRIOV_CP_RECIPE 10 #define ICE_LAG_SRIOV_TRAIN_PKT_LEN 16 @@ -2022,7 +2065,6 @@ void ice_lag_rebuild(struct ice_pf *pf) { struct ice_lag_netdev_list ndlist; struct ice_lag *lag, *prim_lag; - struct list_head *tmp, *n; u8 act_port, loc_port; if (!pf->lag || !pf->lag->bonded) @@ -2034,21 +2076,7 @@ void ice_lag_rebuild(struct ice_pf *pf) if (lag->primary) { prim_lag = lag; } else { - struct ice_lag_netdev_list *nl; - struct net_device *tmp_nd; - - INIT_LIST_HEAD(&ndlist.node); - rcu_read_lock(); - for_each_netdev_in_bond_rcu(lag->upper_netdev, tmp_nd) { - nl = kzalloc(sizeof(*nl), GFP_ATOMIC); - if (!nl) - break; - - nl->netdev = tmp_nd; - list_add(&nl->node, &ndlist.node); - } - rcu_read_unlock(); - lag->netdev_head = &ndlist.node; + ice_lag_build_netdev_list(lag, &ndlist); prim_lag = ice_lag_find_primary(lag); } @@ -2078,13 +2106,7 @@ void ice_lag_rebuild(struct ice_pf *pf) ice_clear_rdma_cap(pf); lag_rebuild_out: - list_for_each_safe(tmp, n, &ndlist.node) { - struct ice_lag_netdev_list *entry; - - entry = list_entry(tmp, struct ice_lag_netdev_list, node); - list_del(&entry->node); - kfree(entry); - } + ice_lag_destroy_netdev_list(lag, &ndlist); mutex_unlock(&pf->lag_mutex); } diff --git a/drivers/net/ethernet/intel/ice/ice_lag.h b/drivers/net/ethernet/intel/ice/ice_lag.h index facb6c894b6d..7f2298767501 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.h +++ b/drivers/net/ethernet/intel/ice/ice_lag.h @@ -63,4 +63,5 @@ int ice_init_lag(struct ice_pf *pf); void ice_deinit_lag(struct ice_pf *pf); void ice_lag_rebuild(struct ice_pf *pf); bool ice_lag_is_switchdev_running(struct ice_pf *pf); +void ice_lag_move_vf_nodes_cfg(struct ice_lag *lag, u8 src_prt, u8 dst_prt); #endif /* _ICE_LAG_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c index 24e4f4d897b6..d488c7156d09 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c @@ -827,12 +827,16 @@ static void ice_notify_vf_reset(struct ice_vf *vf) int ice_reset_vf(struct ice_vf *vf, u32 flags) { struct ice_pf *pf = vf->pf; + struct ice_lag *lag; struct ice_vsi *vsi; + u8 act_prt, pri_prt; struct device *dev; int err = 0; bool rsd; dev = ice_pf_to_dev(pf); + act_prt = ICE_LAG_INVALID_PORT; + pri_prt = pf->hw.port_info->lport; if (flags & ICE_VF_RESET_NOTIFY) ice_notify_vf_reset(vf); @@ -843,6 +847,17 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags) return 0; } + lag = pf->lag; + mutex_lock(&pf->lag_mutex); + if (lag && lag->bonded && lag->primary) { + act_prt = lag->active_port; + if (act_prt != pri_prt && act_prt != ICE_LAG_INVALID_PORT && + lag->upper_netdev) + ice_lag_move_vf_nodes_cfg(lag, act_prt, pri_prt); + else + act_prt = ICE_LAG_INVALID_PORT; + } + if (flags & ICE_VF_RESET_LOCK) mutex_lock(&vf->cfg_lock); else @@ -935,6 +950,11 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags) if (flags & ICE_VF_RESET_LOCK) mutex_unlock(&vf->cfg_lock); + if (lag && lag->bonded && lag->primary && + act_prt != ICE_LAG_INVALID_PORT) + ice_lag_move_vf_nodes_cfg(lag, pri_prt, act_prt); + mutex_unlock(&pf->lag_mutex); + return err; } diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index db97353efd06..62337e6569b2 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -1600,9 +1600,24 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) (struct virtchnl_vsi_queue_config_info *)msg; struct virtchnl_queue_pair_info *qpi; struct ice_pf *pf = vf->pf; + struct ice_lag *lag; struct ice_vsi *vsi; + u8 act_prt, pri_prt; int i = -1, q_idx; + lag = pf->lag; + mutex_lock(&pf->lag_mutex); + act_prt = ICE_LAG_INVALID_PORT; + pri_prt = pf->hw.port_info->lport; + if (lag && lag->bonded && lag->primary) { + act_prt = lag->active_port; + if (act_prt != pri_prt && act_prt != ICE_LAG_INVALID_PORT && + lag->upper_netdev) + ice_lag_move_vf_nodes_cfg(lag, act_prt, pri_prt); + else + act_prt = ICE_LAG_INVALID_PORT; + } + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) goto error_param; @@ -1710,6 +1725,11 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) } } + if (lag && lag->bonded && lag->primary && + act_prt != ICE_LAG_INVALID_PORT) + ice_lag_move_vf_nodes_cfg(lag, pri_prt, act_prt); + mutex_unlock(&pf->lag_mutex); + /* send the response to the VF */ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES, VIRTCHNL_STATUS_SUCCESS, NULL, 0); @@ -1724,6 +1744,11 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) vf->vf_id, i); } + if (lag && lag->bonded && lag->primary && + act_prt != ICE_LAG_INVALID_PORT) + ice_lag_move_vf_nodes_cfg(lag, pri_prt, act_prt); + mutex_unlock(&pf->lag_mutex); + ice_lag_move_new_vf_nodes(vf); /* send the response to the VF */ From c870191fc87c4c15ab6b7a2c72e1a98d29af9210 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Tue, 28 Nov 2023 10:04:34 +0200 Subject: [PATCH 0745/1397] net: ravb: Check return value of reset_control_deassert() [ Upstream commit d8eb6ea4b302e7ff78535c205510e359ac10a0bd ] reset_control_deassert() could return an error. Some devices cannot work if reset signal de-assert operation fails. To avoid this check the return code of reset_control_deassert() in ravb_probe() and take proper action. Along with it, the free_netdev() call from the error path was moved after reset_control_assert() on its own label (out_free_netdev) to free netdev in case reset_control_deassert() fails. Fixes: 0d13a1a464a0 ("ravb: Add reset support") Reviewed-by: Sergey Shtylyov Reviewed-by: Philipp Zabel Signed-off-by: Claudiu Beznea Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/renesas/ravb_main.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index de12c02c55cb..f76ccb543838 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -2654,7 +2654,10 @@ static int ravb_probe(struct platform_device *pdev) ndev->features = info->net_features; ndev->hw_features = info->net_hw_features; - reset_control_deassert(rstc); + error = reset_control_deassert(rstc); + if (error) + goto out_free_netdev; + pm_runtime_enable(&pdev->dev); pm_runtime_get_sync(&pdev->dev); @@ -2881,11 +2884,11 @@ static int ravb_probe(struct platform_device *pdev) out_disable_refclk: clk_disable_unprepare(priv->refclk); out_release: - free_netdev(ndev); - pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); reset_control_assert(rstc); +out_free_netdev: + free_netdev(ndev); return error; } From 4782f4b74268f6217984b7ff295d8d19a04ff9f1 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Tue, 28 Nov 2023 10:04:35 +0200 Subject: [PATCH 0746/1397] net: ravb: Use pm_runtime_resume_and_get() [ Upstream commit 88b74831faaee455c2af380382d979fc38e79270 ] pm_runtime_get_sync() may return an error. In case it returns with an error dev->power.usage_count needs to be decremented. pm_runtime_resume_and_get() takes care of this. Thus use it. Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper") Reviewed-by: Sergey Shtylyov Signed-off-by: Claudiu Beznea Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/renesas/ravb_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index f76ccb543838..cdcac7d1f93a 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -2659,7 +2659,9 @@ static int ravb_probe(struct platform_device *pdev) goto out_free_netdev; pm_runtime_enable(&pdev->dev); - pm_runtime_get_sync(&pdev->dev); + error = pm_runtime_resume_and_get(&pdev->dev); + if (error < 0) + goto out_rpm_disable; if (info->multi_irqs) { if (info->err_mgmt_irqs) @@ -2885,6 +2887,7 @@ static int ravb_probe(struct platform_device *pdev) clk_disable_unprepare(priv->refclk); out_release: pm_runtime_put(&pdev->dev); +out_rpm_disable: pm_runtime_disable(&pdev->dev); reset_control_assert(rstc); out_free_netdev: From 1a4fc93dfaa7b98fdf16550497206d4a7fe8e89b Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Tue, 28 Nov 2023 10:04:36 +0200 Subject: [PATCH 0747/1397] net: ravb: Make write access to CXR35 first before accessing other EMAC registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d78c0ced60d5e2f8b5a4a0468a5c400b24aeadf2 ] Hardware manual of RZ/G3S (and RZ/G2L) specifies the following on the description of CXR35 register (chapter "PHY interface select register (CXR35)"): "After release reset, make write-access to this register before making write-access to other registers (except MDIOMOD). Even if not need to change the value of this register, make write-access to this register at least one time. Because RGMII/MII MODE is recognized by accessing this register". The setup procedure for EMAC module (chapter "Setup procedure" of RZ/G3S, RZ/G2L manuals) specifies the E-MAC.CXR35 register is the first EMAC register that is to be configured. Note [A] from chapter "PHY interface select register (CXR35)" specifies the following: [A] The case which CXR35 SEL_XMII is used for the selection of RGMII/MII in APB Clock 100 MHz. (1) To use RGMII interface, Set ‘H’03E8_0000’ to this register. (2) To use MII interface, Set ‘H’03E8_0002’ to this register. Take into account these indication. Fixes: 1089877ada8d ("ravb: Add RZ/G2L MII interface support") Reviewed-by: Sergey Shtylyov Signed-off-by: Claudiu Beznea Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/renesas/ravb_main.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index cdcac7d1f93a..9bf938eed66b 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -515,6 +515,15 @@ static void ravb_emac_init_gbeth(struct net_device *ndev) { struct ravb_private *priv = netdev_priv(ndev); + if (priv->phy_interface == PHY_INTERFACE_MODE_MII) { + ravb_write(ndev, (1000 << 16) | CXR35_SEL_XMII_MII, CXR35); + ravb_modify(ndev, CXR31, CXR31_SEL_LINK0 | CXR31_SEL_LINK1, 0); + } else { + ravb_write(ndev, (1000 << 16) | CXR35_SEL_XMII_RGMII, CXR35); + ravb_modify(ndev, CXR31, CXR31_SEL_LINK0 | CXR31_SEL_LINK1, + CXR31_SEL_LINK0); + } + /* Receive frame limit set register */ ravb_write(ndev, GBETH_RX_BUFF_MAX + ETH_FCS_LEN, RFLR); @@ -537,14 +546,6 @@ static void ravb_emac_init_gbeth(struct net_device *ndev) /* E-MAC interrupt enable register */ ravb_write(ndev, ECSIPR_ICDIP, ECSIPR); - - if (priv->phy_interface == PHY_INTERFACE_MODE_MII) { - ravb_modify(ndev, CXR31, CXR31_SEL_LINK0 | CXR31_SEL_LINK1, 0); - ravb_write(ndev, (1000 << 16) | CXR35_SEL_XMII_MII, CXR35); - } else { - ravb_modify(ndev, CXR31, CXR31_SEL_LINK0 | CXR31_SEL_LINK1, - CXR31_SEL_LINK0); - } } static void ravb_emac_init_rcar(struct net_device *ndev) From c03af071bee7f8a6f5e16214c5eb4ab177a9d363 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Tue, 28 Nov 2023 10:04:37 +0200 Subject: [PATCH 0748/1397] net: ravb: Start TX queues after HW initialization succeeded [ Upstream commit 6f32c086602050fc11157adeafaa1c1eb393f0af ] ravb_phy_start() may fail. If that happens, the TX queues will remain started. Thus, move the netif_tx_start_all_queues() after PHY is successfully initialized. Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper") Reviewed-by: Sergey Shtylyov Signed-off-by: Claudiu Beznea Reviewed-by: Kalesh AP Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/renesas/ravb_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 9bf938eed66b..1b35cc216d38 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1812,13 +1812,13 @@ static int ravb_open(struct net_device *ndev) if (info->gptp) ravb_ptp_init(ndev, priv->pdev); - netif_tx_start_all_queues(ndev); - /* PHY control start */ error = ravb_phy_start(ndev); if (error) goto out_ptp_stop; + netif_tx_start_all_queues(ndev); + return 0; out_ptp_stop: From 736651c8f874705d8a1997a718c3f369934c393b Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Tue, 28 Nov 2023 10:04:38 +0200 Subject: [PATCH 0749/1397] net: ravb: Stop DMA in case of failures on ravb_open() [ Upstream commit eac16a733427ba0de2449ffc7bd3da32ddb65cb7 ] In case ravb_phy_start() returns with error the settings applied in ravb_dmac_init() are not reverted (e.g. config mode). For this call ravb_stop_dma() on failure path of ravb_open(). Fixes: a0d2f20650e8 ("Renesas Ethernet AVB PTP clock driver") Reviewed-by: Sergey Shtylyov Signed-off-by: Claudiu Beznea Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/renesas/ravb_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 1b35cc216d38..0b8af7be20fa 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1825,6 +1825,7 @@ static int ravb_open(struct net_device *ndev) /* Stop PTP Clock driver */ if (info->gptp) ravb_ptp_stop(ndev); + ravb_stop_dma(ndev); out_free_irq_mgmta: if (!info->multi_irqs) goto out_free_irq; From 11a8e1ff0d0de713f4c7c0514fed824964eb7cfa Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Tue, 28 Nov 2023 10:04:39 +0200 Subject: [PATCH 0750/1397] net: ravb: Keep reverse order of operations in ravb_remove() [ Upstream commit edf9bc396e05081ca281ffb0cd41e44db478ff26 ] On RZ/G3S SMARC Carrier II board having RGMII connections b/w Ethernet MACs and PHYs it has been discovered that doing unbind/bind for ravb driver in a loop leads to wrong speed and duplex for Ethernet links and broken connectivity (the connectivity cannot be restored even with bringing interface down/up). Before doing unbind/bind the Ethernet interfaces were configured though systemd. The sh instructions used to do unbind/bind were: $ cd /sys/bus/platform/drivers/ravb/ $ while :; do echo 11c30000.ethernet > unbind ; \ echo 11c30000.ethernet > bind; done It has been discovered that there is a race b/w IOCTLs initialized by systemd at the response of success binding and the "ravb_write(ndev, CCC_OPC_RESET, CCC)" call in ravb_remove() as follows: 1/ as a result of bind success the user space open/configures the interfaces tough an IOCTL; the following stack trace has been identified on RZ/G3S: Call trace: dump_backtrace+0x9c/0x100 show_stack+0x20/0x38 dump_stack_lvl+0x48/0x60 dump_stack+0x18/0x28 ravb_open+0x70/0xa58 __dev_open+0xf4/0x1e8 __dev_change_flags+0x198/0x218 dev_change_flags+0x2c/0x80 devinet_ioctl+0x640/0x708 inet_ioctl+0x1e4/0x200 sock_do_ioctl+0x50/0x108 sock_ioctl+0x240/0x358 __arm64_sys_ioctl+0xb0/0x100 invoke_syscall+0x50/0x128 el0_svc_common.constprop.0+0xc8/0xf0 do_el0_svc+0x24/0x38 el0_svc+0x34/0xb8 el0t_64_sync_handler+0xc0/0xc8 el0t_64_sync+0x190/0x198 2/ this call may execute concurrently with ravb_remove() as the unbind/bind operation was executed in a loop 3/ if the operation mode is changed to RESET (through ravb_write(ndev, CCC_OPC_RESET, CCC) call in ravb_remove()) while the above ravb_open() is in progress it may lead to MAC (or PHY, or MAC-PHY connection, the right point hasn't been identified at the moment) to be broken, thus the Ethernet connectivity fails to restore. The simple fix for this is to move ravb_write(ndev, CCC_OPC_RESET, CCC)) after unregister_netdev() to avoid resetting the controller while the netdev interface is still registered. To avoid future issues in ravb_remove(), the patch follows the proper order of operations in ravb_remove(): reverse order compared with ravb_probe(). This avoids described races as the IOCTLs as well as unregister_netdev() (called now at the beginning of ravb_remove()) calls rtnl_lock() before continuing and IOCTLs check (though devinet_ioctl()) if device is still registered just after taking the lock: int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr) { // ... rtnl_lock(); ret = -ENODEV; dev = __dev_get_by_name(net, ifr->ifr_name); if (!dev) goto done; // ... done: rtnl_unlock(); out: return ret; } Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper") Reviewed-by: Sergey Shtylyov Signed-off-by: Claudiu Beznea Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/renesas/ravb_main.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 0b8af7be20fa..bb56cf409042 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -2903,22 +2903,26 @@ static int ravb_remove(struct platform_device *pdev) struct ravb_private *priv = netdev_priv(ndev); const struct ravb_hw_info *info = priv->info; - /* Stop PTP Clock driver */ - if (info->ccc_gac) - ravb_ptp_stop(ndev); - - clk_disable_unprepare(priv->gptp_clk); - clk_disable_unprepare(priv->refclk); - - /* Set reset mode */ - ravb_write(ndev, CCC_OPC_RESET, CCC); unregister_netdev(ndev); if (info->nc_queues) netif_napi_del(&priv->napi[RAVB_NC]); netif_napi_del(&priv->napi[RAVB_BE]); + ravb_mdio_release(priv); + + /* Stop PTP Clock driver */ + if (info->ccc_gac) + ravb_ptp_stop(ndev); + dma_free_coherent(ndev->dev.parent, priv->desc_bat_size, priv->desc_bat, priv->desc_bat_dma); + + /* Set reset mode */ + ravb_write(ndev, CCC_OPC_RESET, CCC); + + clk_disable_unprepare(priv->gptp_clk); + clk_disable_unprepare(priv->refclk); + pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); reset_control_assert(priv->rstc); From 8db5cb33d6e4b63778413a82340ebae72db0e60d Mon Sep 17 00:00:00 2001 From: Gaurav Batra Date: Mon, 2 Oct 2023 22:08:02 -0500 Subject: [PATCH 0751/1397] powerpc/pseries/iommu: enable_ddw incorrectly returns direct mapping for SR-IOV device [ Upstream commit 3bf983e4e93ce8e6d69e9d63f52a66ec0856672e ] When a device is initialized, the driver invokes dma_supported() twice - first for streaming mappings followed by coherent mappings. For an SR-IOV device, default window is deleted and DDW created. With vPMEM enabled, TCE mappings are dynamically created for both vPMEM and SR-IOV device. There are no direct mappings. First time when dma_supported() is called with 64 bit mask, DDW is created and marked as dynamic window. The second time dma_supported() is called, enable_ddw() finds existing window for the device and incorrectly returns it as "direct mapping". This only happens when size of DDW is big enough to map max LPAR memory. This results in streaming TCEs to not get dynamically mapped, since code incorrently assumes these are already pre-mapped. The adapter initially comes up but goes down due to EEH. Fixes: 381ceda88c4c ("powerpc/pseries/iommu: Make use of DDW for indirect mapping") Cc: stable@vger.kernel.org # v5.15+ Signed-off-by: Gaurav Batra Signed-off-by: Michael Ellerman Link: https://msgid.link/20231003030802.47914-1-gbatra@linux.vnet.ibm.com Signed-off-by: Sasha Levin --- arch/powerpc/platforms/pseries/iommu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 16d93b580f61..496e16c588aa 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -914,7 +914,8 @@ static int remove_ddw(struct device_node *np, bool remove_prop, const char *win_ return 0; } -static bool find_existing_ddw(struct device_node *pdn, u64 *dma_addr, int *window_shift) +static bool find_existing_ddw(struct device_node *pdn, u64 *dma_addr, int *window_shift, + bool *direct_mapping) { struct dma_win *window; const struct dynamic_dma_window_prop *dma64; @@ -927,6 +928,7 @@ static bool find_existing_ddw(struct device_node *pdn, u64 *dma_addr, int *windo dma64 = window->prop; *dma_addr = be64_to_cpu(dma64->dma_base); *window_shift = be32_to_cpu(dma64->window_shift); + *direct_mapping = window->direct; found = true; break; } @@ -1270,10 +1272,8 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) mutex_lock(&dma_win_init_mutex); - if (find_existing_ddw(pdn, &dev->dev.archdata.dma_offset, &len)) { - direct_mapping = (len >= max_ram_len); + if (find_existing_ddw(pdn, &dev->dev.archdata.dma_offset, &len, &direct_mapping)) goto out_unlock; - } /* * If we already went through this for a previous function of From 7bfe7741468c47309ff2876e9ef4aff86f40ada1 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 24 Oct 2023 10:15:20 +0200 Subject: [PATCH 0752/1397] s390/cmma: fix handling of swapper_pg_dir and invalid_pg_dir [ Upstream commit 84bb41d5df48868055d159d9247b80927f1f70f9 ] If the cmma no-dat feature is available the kernel page tables are walked to identify and mark all pages which are used for address translation (all region, segment, and page tables). In a subsequent loop all other pages are marked as "no-dat" pages with the ESSA instruction. This information is visible to the hypervisor, so that the hypervisor can optimize purging of guest TLB entries. All pages used for swapper_pg_dir and invalid_pg_dir are incorrectly marked as no-dat, which in turn can result in incorrect guest TLB flushes. Fix this by marking those pages correctly as being used for DAT. Cc: Reviewed-by: Claudio Imbrenda Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Sasha Levin --- arch/s390/mm/page-states.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c index 00e7b0876dc5..79a037f49f70 100644 --- a/arch/s390/mm/page-states.c +++ b/arch/s390/mm/page-states.c @@ -181,6 +181,12 @@ void __init cmma_init_nodat(void) return; /* Mark pages used in kernel page tables */ mark_kernel_pgd(); + page = virt_to_page(&swapper_pg_dir); + for (i = 0; i < 4; i++) + set_bit(PG_arch_1, &page[i].flags); + page = virt_to_page(&invalid_pg_dir); + for (i = 0; i < 4; i++) + set_bit(PG_arch_1, &page[i].flags); /* Set all kernel pages not used for page tables to stable/no-dat */ for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) { From a8728dbb4ba279ca7436649d62d4dfe6d21a130b Mon Sep 17 00:00:00 2001 From: Ian Chen Date: Tue, 4 Jul 2023 15:31:43 +0800 Subject: [PATCH 0753/1397] drm/amd/display: Refactor edp power control [ Upstream commit 45f98fccb1f6895f527bd5f811f23478c2f920f5 ] [Why & How] To organize the edp power control a bit: 1. add flag in dc_link to indicate dc to skip all implicit eDP power control. 2. add edp_set_panel_power link service for DM to call. Reviewed-by: Aric Cyr Reviewed-by: Jun Lei Acked-by: Wayne Lin Signed-off-by: Ian Chen Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Stable-dep-of: b0399e22ada0 ("drm/amd/display: Remove power sequencing check") Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/core/dc.c | 21 ++++++++++++++ drivers/gpu/drm/amd/display/dc/dc.h | 4 +++ drivers/gpu/drm/amd/display/dc/dc_stream.h | 1 - .../display/dc/dce110/dce110_hw_sequencer.c | 2 +- .../drm/amd/display/dc/dcn314/dcn314_hwseq.c | 3 +- drivers/gpu/drm/amd/display/dc/inc/link.h | 1 + .../gpu/drm/amd/display/dc/link/link_dpms.c | 9 +++--- .../drm/amd/display/dc/link/link_factory.c | 1 + .../display/dc/link/protocols/link_dp_phy.c | 3 +- .../link/protocols/link_edp_panel_control.c | 29 +++++++++++++++++++ .../link/protocols/link_edp_panel_control.h | 1 + 11 files changed, 66 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 186936ad283a..a1be93f6385c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -5254,3 +5254,24 @@ void dc_query_current_properties(struct dc *dc, struct dc_current_properties *pr properties->cursor_size_limit = subvp_in_use ? 64 : dc->caps.max_cursor_size; } +/** + ***************************************************************************** + * dc_set_edp_power() - DM controls eDP power to be ON/OFF + * + * Called when DM wants to power on/off eDP. + * Only work on links with flag skip_implict_edp_power_control is set. + * + ***************************************************************************** + */ +void dc_set_edp_power(const struct dc *dc, struct dc_link *edp_link, + bool powerOn) +{ + if (edp_link->connector_signal != SIGNAL_TYPE_EDP) + return; + + if (edp_link->skip_implict_edp_power_control == false) + return; + + edp_link->dc->link_srv->edp_set_panel_power(edp_link, powerOn); +} + diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index c05e91b257ac..dd7bf31ef6b0 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -1578,6 +1578,7 @@ struct dc_link { struct phy_state phy_state; // BW ALLOCATON USB4 ONLY struct dc_dpia_bw_alloc dpia_bw_alloc_config; + bool skip_implict_edp_power_control; }; /* Return an enumerated dc_link. @@ -1597,6 +1598,9 @@ void dc_get_edp_links(const struct dc *dc, struct dc_link **edp_links, int *edp_num); +void dc_set_edp_power(const struct dc *dc, struct dc_link *edp_link, + bool powerOn); + /* The function initiates detection handshake over the given link. It first * determines if there are display connections over the link. If so it initiates * detection protocols supported by the connected receiver device. The function diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index 3697ea1d14c1..d5b3e3a32cc6 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -302,7 +302,6 @@ struct dc_stream_state { bool vblank_synchronized; bool fpo_in_use; struct mall_stream_config mall_stream_config; - bool skip_edp_power_down; }; #define ABM_LEVEL_IMMEDIATE_DISABLE 255 diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 2a6157555fd1..9c78e42418f3 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1226,7 +1226,7 @@ void dce110_blank_stream(struct pipe_ctx *pipe_ctx) struct dce_hwseq *hws = link->dc->hwseq; if (link->local_sink && link->local_sink->sink_signal == SIGNAL_TYPE_EDP) { - if (!stream->skip_edp_power_down) + if (!link->skip_implict_edp_power_control) hws->funcs.edp_backlight_control(link, false); link->dc->hwss.set_abm_immediate_disable(pipe_ctx); } diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c index 4d2820ffe468..33a8626bda73 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c @@ -476,7 +476,8 @@ void dcn314_disable_link_output(struct dc_link *link, struct dmcu *dmcu = dc->res_pool->dmcu; if (signal == SIGNAL_TYPE_EDP && - link->dc->hwss.edp_backlight_control) + link->dc->hwss.edp_backlight_control && + !link->skip_implict_edp_power_control) link->dc->hwss.edp_backlight_control(link, false); else if (dmcu != NULL && dmcu->funcs->lock_phy) dmcu->funcs->lock_phy(dmcu); diff --git a/drivers/gpu/drm/amd/display/dc/inc/link.h b/drivers/gpu/drm/amd/display/dc/inc/link.h index e3e8c76c17cf..d7685368140a 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/link.h +++ b/drivers/gpu/drm/amd/display/dc/inc/link.h @@ -295,6 +295,7 @@ struct link_service { bool (*edp_receiver_ready_T9)(struct dc_link *link); bool (*edp_receiver_ready_T7)(struct dc_link *link); bool (*edp_power_alpm_dpcd_enable)(struct dc_link *link, bool enable); + void (*edp_set_panel_power)(struct dc_link *link, bool powerOn); /*************************** DP CTS ************************************/ diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index 79aef205598b..28cb1f5a504d 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -1930,7 +1930,8 @@ static void disable_link_dp(struct dc_link *link, dp_disable_link_phy(link, link_res, signal); if (link->connector_signal == SIGNAL_TYPE_EDP) { - if (!link->dc->config.edp_no_power_sequencing) + if (!link->dc->config.edp_no_power_sequencing && + !link->skip_implict_edp_power_control) link->dc->hwss.edp_power_control(link, false); } @@ -2219,7 +2220,7 @@ static enum dc_status enable_link( * link settings. Need to call disable first before enabling at * new link settings. */ - if (link->link_status.link_active && !stream->skip_edp_power_down) + if (link->link_status.link_active) disable_link(link, &pipe_ctx->link_res, pipe_ctx->stream->signal); switch (pipe_ctx->stream->signal) { @@ -2338,9 +2339,7 @@ void link_set_dpms_off(struct pipe_ctx *pipe_ctx) dc->hwss.disable_stream(pipe_ctx); } else { dc->hwss.disable_stream(pipe_ctx); - if (!pipe_ctx->stream->skip_edp_power_down) { - disable_link(pipe_ctx->stream->link, &pipe_ctx->link_res, pipe_ctx->stream->signal); - } + disable_link(pipe_ctx->stream->link, &pipe_ctx->link_res, pipe_ctx->stream->signal); } if (pipe_ctx->stream->timing.flags.DSC) { diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c index 0895742a3102..e406561c2c23 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c @@ -223,6 +223,7 @@ static void construct_link_service_edp_panel_control(struct link_service *link_s link_srv->edp_receiver_ready_T9 = edp_receiver_ready_T9; link_srv->edp_receiver_ready_T7 = edp_receiver_ready_T7; link_srv->edp_power_alpm_dpcd_enable = edp_power_alpm_dpcd_enable; + link_srv->edp_set_panel_power = edp_set_panel_power; } /* link dp cts implements dp compliance test automation protocols and manual diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.c index b7abba55bc2f..0050e0a06cbc 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.c @@ -73,7 +73,8 @@ void dp_disable_link_phy(struct dc_link *link, { struct dc *dc = link->ctx->dc; - if (!link->wa_flags.dp_keep_receiver_powered) + if (!link->wa_flags.dp_keep_receiver_powered && + !link->skip_implict_edp_power_control) dpcd_write_rx_power_ctrl(link, false); dc->hwss.disable_link_output(link, link_res, signal); diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index 98e715aa6d8e..24b47fa82f93 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -33,6 +33,7 @@ #include "link_dp_capability.h" #include "dm_helpers.h" #include "dal_asic_id.h" +#include "link_dp_phy.h" #include "dce/dmub_psr.h" #include "dc/dc_dmub_srv.h" #include "dce/dmub_replay.h" @@ -362,6 +363,34 @@ void edp_panel_backlight_power_on(struct dc_link *link, bool wait_for_hpd) link->dc->hwss.edp_backlight_control(link, true); } +void edp_set_panel_power(struct dc_link *link, bool powerOn) +{ + if (powerOn) { + // 1. panel VDD on + if (!link->dc->config.edp_no_power_sequencing) + link->dc->hwss.edp_power_control(link, true); + link->dc->hwss.edp_wait_for_hpd_ready(link, true); + + // 2. panel BL on + if (link->dc->hwss.edp_backlight_control) + link->dc->hwss.edp_backlight_control(link, true); + + // 3. Rx power on + dpcd_write_rx_power_ctrl(link, true); + } else { + // 3. Rx power off + dpcd_write_rx_power_ctrl(link, false); + + // 2. panel BL off + if (link->dc->hwss.edp_backlight_control) + link->dc->hwss.edp_backlight_control(link, false); + + // 1. panel VDD off + if (!link->dc->config.edp_no_power_sequencing) + link->dc->hwss.edp_power_control(link, false); + } +} + bool edp_wait_for_t12(struct dc_link *link) { if (link->connector_signal == SIGNAL_TYPE_EDP && link->dc->hwss.edp_wait_for_T12) { diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h index 0a5bbda8c739..20f91de852e3 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h @@ -69,4 +69,5 @@ void edp_add_delay_for_T9(struct dc_link *link); bool edp_receiver_ready_T9(struct dc_link *link); bool edp_receiver_ready_T7(struct dc_link *link); bool edp_power_alpm_dpcd_enable(struct dc_link *link, bool enable); +void edp_set_panel_power(struct dc_link *link, bool powerOn); #endif /* __DC_LINK_EDP_POWER_CONTROL_H__ */ From 122af8e0d04f941e662ca2131925b1732394173f Mon Sep 17 00:00:00 2001 From: Agustin Gutierrez Date: Mon, 2 Oct 2023 10:21:08 -0400 Subject: [PATCH 0754/1397] drm/amd/display: Remove power sequencing check [ Upstream commit b0399e22ada096435de3e3e73899aa8bc026820d ] [Why] Some ASICs keep backlight powered on after dpms off command has been issued. [How] The check for no edp power sequencing was never going to pass. The value is never changed from what it is set by design. Cc: stable@vger.kernel.org # 6.1+ Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2765 Reviewed-by: Swapnil Patel Acked-by: Roman Li Signed-off-by: Agustin Gutierrez Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/link/link_dpms.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index 28cb1f5a504d..a17a06eb7762 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -1930,8 +1930,7 @@ static void disable_link_dp(struct dc_link *link, dp_disable_link_phy(link, link_res, signal); if (link->connector_signal == SIGNAL_TYPE_EDP) { - if (!link->dc->config.edp_no_power_sequencing && - !link->skip_implict_edp_power_control) + if (!link->skip_implict_edp_power_control) link->dc->hwss.edp_power_control(link, false); } From 59fe58d59070e60ea938f8d6d5435e85d16d9a71 Mon Sep 17 00:00:00 2001 From: Christoph Niedermaier Date: Wed, 22 Nov 2023 14:41:13 +0100 Subject: [PATCH 0755/1397] cpufreq: imx6q: Don't disable 792 Mhz OPP unnecessarily [ Upstream commit 2e4e0984c7d696cc74cf2fd7e7f62997f0e9ebe6 ] For a 900MHz i.MX6ULL CPU the 792MHz OPP is disabled. There is no convincing reason to disable this OPP. If a CPU can run at 900MHz, it should also be able to cope with 792MHz. Looking at the voltage level of 792MHz in [1] (page 24, table 10. "Operating Ranges") the current defined OPP is above the minimum. So the voltage level shouldn't be a problem. However in [2] (page 24, table 10. "Operating Ranges"), it is not mentioned that 792MHz OPP isn't allowed. Change it to only disable 792MHz OPP for i.MX6ULL types below 792 MHz. [1] https://www.nxp.com/docs/en/data-sheet/IMX6ULLIEC.pdf [2] https://www.nxp.com/docs/en/data-sheet/IMX6ULLCEC.pdf Fixes: 0aa9abd4c212 ("cpufreq: imx6q: check speed grades for i.MX6ULL") Signed-off-by: Christoph Niedermaier Reviewed-by: Marek Vasut Reviewed-by: Fabio Estevam [ Viresh: Edited subject ] Signed-off-by: Viresh Kumar Signed-off-by: Sasha Levin --- drivers/cpufreq/imx6q-cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index 494d044b9e72..33728c242f66 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -327,7 +327,7 @@ static int imx6ul_opp_check_speed_grading(struct device *dev) imx6x_disable_freq_in_opp(dev, 696000000); if (of_machine_is_compatible("fsl,imx6ull")) { - if (val != OCOTP_CFG3_6ULL_SPEED_792MHZ) + if (val < OCOTP_CFG3_6ULL_SPEED_792MHZ) imx6x_disable_freq_in_opp(dev, 792000000); if (val != OCOTP_CFG3_6ULL_SPEED_900MHZ) From 7ccb57771b9a7de9debb8da73884fb923e77aeef Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 22 Nov 2023 11:26:03 +0800 Subject: [PATCH 0756/1397] iommu/vt-d: Omit devTLB invalidation requests when TES=0 [ Upstream commit 0f5432a9b839847dcfe9fa369d72e3d646102ddf ] The latest VT-d spec indicates that when remapping hardware is disabled (TES=0 in Global Status Register), upstream ATS Invalidation Completion requests are treated as UR (Unsupported Request). Consequently, the spec recommends in section 4.3 Handling of Device-TLB Invalidations that software refrain from submitting any Device-TLB invalidation requests when address remapping hardware is disabled. Verify address remapping hardware is enabled prior to submitting Device- TLB invalidation requests. Fixes: 792fb43ce2c9 ("iommu/vt-d: Enable Intel IOMMU scalable mode by default") Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20231114011036.70142-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/intel/dmar.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index a3414afe11b0..23cb80d62a9a 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -1522,6 +1522,15 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, { struct qi_desc desc; + /* + * VT-d spec, section 4.3: + * + * Software is recommended to not submit any Device-TLB invalidation + * requests while address remapping hardware is disabled. + */ + if (!(iommu->gcmd & DMA_GCMD_TE)) + return; + if (mask) { addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1; desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE; @@ -1587,6 +1596,15 @@ void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid, unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1); struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0}; + /* + * VT-d spec, section 4.3: + * + * Software is recommended to not submit any Device-TLB invalidation + * requests while address remapping hardware is disabled. + */ + if (!(iommu->gcmd & DMA_GCMD_TE)) + return; + desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) | QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid); From c0b0cfd979fbc6a5abc0127c087e1f3d4c91f281 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 22 Nov 2023 11:26:04 +0800 Subject: [PATCH 0757/1397] iommu/vt-d: Disable PCI ATS in legacy passthrough mode [ Upstream commit da37dddcf4caf015c400a930301d2ee27a7a15fb ] When IOMMU hardware operates in legacy mode, the TT field of the context entry determines the translation type, with three supported types (Section 9.3 Context Entry): - DMA translation without device TLB support - DMA translation with device TLB support - Passthrough mode with translated and translation requests blocked Device TLB support is absent when hardware is configured in passthrough mode. Disable the PCI ATS feature when IOMMU is configured for passthrough translation type in legacy (non-scalable) mode. Fixes: 0faa19a1515f ("iommu/vt-d: Decouple PASID & PRI enabling from SVA") Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20231114011036.70142-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/intel/iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 8e878c3beec5..18fa71aadc90 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2487,7 +2487,8 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, return ret; } - iommu_enable_pci_caps(info); + if (sm_supported(info->iommu) || !domain_type_is_si(info->domain)) + iommu_enable_pci_caps(info); return 0; } From 48f2183a4f9d3540fc5cfc8f8451621ee92c09f8 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 22 Nov 2023 11:26:05 +0800 Subject: [PATCH 0758/1397] iommu/vt-d: Make context clearing consistent with context mapping [ Upstream commit 9a16ab9d640274b20813d2d17475e18d3e99d834 ] In the iommu probe_device path, domain_context_mapping() allows setting up the context entry for a non-PCI device. However, in the iommu release_device path, domain_context_clear() only clears context entries for PCI devices. Make domain_context_clear() behave consistently with domain_context_mapping() by clearing context entries for both PCI and non-PCI devices. Fixes: 579305f75d34 ("iommu/vt-d: Update to use PCI DMA aliases") Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20231114011036.70142-4-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/intel/iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 18fa71aadc90..4c3707384bd9 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3923,8 +3923,8 @@ static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *op */ static void domain_context_clear(struct device_domain_info *info) { - if (!info->iommu || !info->dev || !dev_is_pci(info->dev)) - return; + if (!dev_is_pci(info->dev)) + domain_context_clear_one(info, info->bus, info->devfn); pci_for_each_dma_alias(to_pci_dev(info->dev), &domain_context_clear_one_cb, info); From d03960b4839d47a85d90105d64defa91b849fb30 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 16 Nov 2023 08:44:56 +0000 Subject: [PATCH 0759/1397] drm/i915/gsc: Mark internal GSC engine with reserved uabi class [ Upstream commit 503579448db93f9fbcc93cd99a1f2d5aa4b2cda6 ] The GSC CS is not exposed to the user, so we skipped assigning a uabi class number for it. However, the trace logs use the uabi class and instance to identify the engine, so leaving uabi class unset makes the GSC CS show up as the RCS in those logs. Given that the engine is not exposed to the user, we can't add a new case in the uabi enum, so we insted internally define a kernel internal class as -1. At the same time remove special handling for the name and complete the uabi_classes array so internal class is automatically correctly assigned. Engine will show as 65535:0 other0 in the logs/traces which should be unique enough. v2: * Fix uabi class u8 vs u16 type confusion. Signed-off-by: Tvrtko Ursulin Fixes: 194babe26bdc ("drm/i915/mtl: don't expose GSC command streamer to the user") Cc: Daniele Ceraolo Spurio Cc: Alan Previn Cc: Matt Roper Reviewed-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20231116084456.291533-1-tvrtko.ursulin@linux.intel.com (cherry picked from commit dfed6b58d54f3a5d7e6bc1fb060e2c936330eba2) Signed-off-by: Jani Nikula Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/gt/intel_engine_user.c | 39 ++++++++++++--------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c index dcedff41a825..d304e0a948f0 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c @@ -42,12 +42,15 @@ void intel_engine_add_user(struct intel_engine_cs *engine) (struct llist_head *)&engine->i915->uabi_engines); } -static const u8 uabi_classes[] = { +#define I915_NO_UABI_CLASS ((u16)(-1)) + +static const u16 uabi_classes[] = { [RENDER_CLASS] = I915_ENGINE_CLASS_RENDER, [COPY_ENGINE_CLASS] = I915_ENGINE_CLASS_COPY, [VIDEO_DECODE_CLASS] = I915_ENGINE_CLASS_VIDEO, [VIDEO_ENHANCEMENT_CLASS] = I915_ENGINE_CLASS_VIDEO_ENHANCE, [COMPUTE_CLASS] = I915_ENGINE_CLASS_COMPUTE, + [OTHER_CLASS] = I915_NO_UABI_CLASS, /* Not exposed to users, no uabi class. */ }; static int engine_cmp(void *priv, const struct list_head *A, @@ -202,6 +205,7 @@ static void engine_rename(struct intel_engine_cs *engine, const char *name, u16 void intel_engines_driver_register(struct drm_i915_private *i915) { + u16 name_instance, other_instance = 0; struct legacy_ring ring = {}; struct list_head *it, *next; struct rb_node **p, *prev; @@ -219,27 +223,28 @@ void intel_engines_driver_register(struct drm_i915_private *i915) if (intel_gt_has_unrecoverable_error(engine->gt)) continue; /* ignore incomplete engines */ - /* - * We don't want to expose the GSC engine to the users, but we - * still rename it so it is easier to identify in the debug logs - */ - if (engine->id == GSC0) { - engine_rename(engine, "gsc", 0); - continue; - } - GEM_BUG_ON(engine->class >= ARRAY_SIZE(uabi_classes)); engine->uabi_class = uabi_classes[engine->class]; + if (engine->uabi_class == I915_NO_UABI_CLASS) { + name_instance = other_instance++; + } else { + GEM_BUG_ON(engine->uabi_class >= + ARRAY_SIZE(i915->engine_uabi_class_count)); + name_instance = + i915->engine_uabi_class_count[engine->uabi_class]++; + } + engine->uabi_instance = name_instance; - GEM_BUG_ON(engine->uabi_class >= - ARRAY_SIZE(i915->engine_uabi_class_count)); - engine->uabi_instance = - i915->engine_uabi_class_count[engine->uabi_class]++; - - /* Replace the internal name with the final user facing name */ + /* + * Replace the internal name with the final user and log facing + * name. + */ engine_rename(engine, intel_engine_class_repr(engine->class), - engine->uabi_instance); + name_instance); + + if (engine->uabi_class == I915_NO_UABI_CLASS) + continue; rb_link_node(&engine->uabi_node, prev, p); rb_insert_color(&engine->uabi_node, &i915->uabi_engines); From 7f4ea4035fce404433a48f248fdcada7725e77df Mon Sep 17 00:00:00 2001 From: xiazhengqiao Date: Wed, 29 Nov 2023 16:41:15 +0800 Subject: [PATCH 0760/1397] drm/panel: starry-2081101qfh032011-53g: Fine tune the panel power sequence [ Upstream commit fc1ccc16271a0526518f19f460fed63d575a8a42 ] For the "starry, 2081101qfh032011-53g" panel, it is stipulated in the panel spec that MIPI needs to keep the LP11 state before the lcm_reset pin is pulled high. Fixes: 6069b66cd962 ("drm/panel: support for STARRY 2081101QFH032011-53G MIPI-DSI panel") Signed-off-by: xiazhengqiao Reviewed-by: Jessica Zhang Link: https://lore.kernel.org/r/20231129084115.7918-1-xiazhengqiao@huaqin.corp-partner.google.com Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20231129084115.7918-1-xiazhengqiao@huaqin.corp-partner.google.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c index d76a8ca9c40f..29e63cdfb895 100644 --- a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c +++ b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c @@ -2104,6 +2104,7 @@ static const struct panel_desc starry_qfh032011_53g_desc = { .mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_SYNC_PULSE | MIPI_DSI_MODE_LPM, .init_cmds = starry_qfh032011_53g_init_cmd, + .lp11_before_reset = true, }; static const struct drm_display_mode starry_himax83102_j02_default_mode = { From 684cf5d6aef1a8d5355cc23d3f0577513575833a Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 29 Nov 2023 17:07:15 +0800 Subject: [PATCH 0761/1397] drm/panel: nt36523: fix return value check in nt36523_probe() [ Upstream commit fb18fe0fdf22a2f4512a8b644bb5ea1473829cda ] mipi_dsi_device_register_full() never returns NULL pointer, it will return ERR_PTR() when it fails, so replace the check with IS_ERR(). Fixes: 0993234a0045 ("drm/panel: Add driver for Novatek NT36523") Signed-off-by: Yang Yingliang Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20231129090715.856263-1-yangyingliang@huaweicloud.com Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20231129090715.856263-1-yangyingliang@huaweicloud.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/panel/panel-novatek-nt36523.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-novatek-nt36523.c b/drivers/gpu/drm/panel/panel-novatek-nt36523.c index 9632b9e95b71..c4a804c5d6aa 100644 --- a/drivers/gpu/drm/panel/panel-novatek-nt36523.c +++ b/drivers/gpu/drm/panel/panel-novatek-nt36523.c @@ -1266,9 +1266,9 @@ static int nt36523_probe(struct mipi_dsi_device *dsi) return dev_err_probe(dev, -EPROBE_DEFER, "cannot get secondary DSI host\n"); pinfo->dsi[1] = mipi_dsi_device_register_full(dsi1_host, info); - if (!pinfo->dsi[1]) { + if (IS_ERR(pinfo->dsi[1])) { dev_err(dev, "cannot get secondary DSI device\n"); - return -ENODEV; + return PTR_ERR(pinfo->dsi[1]); } } From 4d78331c193919d9b62a9a72259939a1c5fdecd6 Mon Sep 17 00:00:00 2001 From: Wyes Karny Date: Fri, 17 Nov 2023 06:38:39 +0000 Subject: [PATCH 0762/1397] cpufreq/amd-pstate: Fix scaling_min_freq and scaling_max_freq update [ Upstream commit febab20caebac959fdc3d7520bc52de8b1184455 ] When amd_pstate is running, writing to scaling_min_freq and scaling_max_freq has no effect. These values are only passed to the policy level, but not to the platform level. This means that the platform does not know about the frequency limits set by the user. To fix this, update the min_perf and max_perf values at the platform level whenever the user changes the scaling_min_freq and scaling_max_freq values. Fixes: ffa5096a7c33 ("cpufreq: amd-pstate: implement Pstate EPP support for the AMD processors") Acked-by: Huang Rui Signed-off-by: Wyes Karny Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/cpufreq/amd-pstate.c | 60 ++++++++++++++++++++++++++++-------- include/linux/amd-pstate.h | 4 +++ 2 files changed, 51 insertions(+), 13 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 300f81d36291..3313d1d2c6dd 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -307,11 +307,11 @@ static int pstate_init_perf(struct amd_cpudata *cpudata) highest_perf = AMD_CPPC_HIGHEST_PERF(cap1); WRITE_ONCE(cpudata->highest_perf, highest_perf); - + WRITE_ONCE(cpudata->max_limit_perf, highest_perf); WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1)); WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1)); WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1)); - + WRITE_ONCE(cpudata->min_limit_perf, AMD_CPPC_LOWEST_PERF(cap1)); return 0; } @@ -329,11 +329,12 @@ static int cppc_init_perf(struct amd_cpudata *cpudata) highest_perf = cppc_perf.highest_perf; WRITE_ONCE(cpudata->highest_perf, highest_perf); - + WRITE_ONCE(cpudata->max_limit_perf, highest_perf); WRITE_ONCE(cpudata->nominal_perf, cppc_perf.nominal_perf); WRITE_ONCE(cpudata->lowest_nonlinear_perf, cppc_perf.lowest_nonlinear_perf); WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf); + WRITE_ONCE(cpudata->min_limit_perf, cppc_perf.lowest_perf); if (cppc_state == AMD_PSTATE_ACTIVE) return 0; @@ -432,6 +433,10 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, u64 prev = READ_ONCE(cpudata->cppc_req_cached); u64 value = prev; + min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf, + cpudata->max_limit_perf); + max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf, + cpudata->max_limit_perf); des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); if ((cppc_state == AMD_PSTATE_GUIDED) && (gov_flags & CPUFREQ_GOV_DYNAMIC_SWITCHING)) { @@ -470,6 +475,22 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy) return 0; } +static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) +{ + u32 max_limit_perf, min_limit_perf; + struct amd_cpudata *cpudata = policy->driver_data; + + max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq); + min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq); + + WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf); + WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf); + WRITE_ONCE(cpudata->max_limit_freq, policy->max); + WRITE_ONCE(cpudata->min_limit_freq, policy->min); + + return 0; +} + static int amd_pstate_update_freq(struct cpufreq_policy *policy, unsigned int target_freq, bool fast_switch) { @@ -480,6 +501,9 @@ static int amd_pstate_update_freq(struct cpufreq_policy *policy, if (!cpudata->max_freq) return -ENODEV; + if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq) + amd_pstate_update_min_max_limit(policy); + cap_perf = READ_ONCE(cpudata->highest_perf); min_perf = READ_ONCE(cpudata->lowest_perf); max_perf = cap_perf; @@ -534,6 +558,10 @@ static void amd_pstate_adjust_perf(unsigned int cpu, struct amd_cpudata *cpudata = policy->driver_data; unsigned int target_freq; + if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq) + amd_pstate_update_min_max_limit(policy); + + cap_perf = READ_ONCE(cpudata->highest_perf); lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); max_freq = READ_ONCE(cpudata->max_freq); @@ -747,6 +775,8 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) /* Initial processor data capability frequencies */ cpudata->max_freq = max_freq; cpudata->min_freq = min_freq; + cpudata->max_limit_freq = max_freq; + cpudata->min_limit_freq = min_freq; cpudata->nominal_freq = nominal_freq; cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq; @@ -1185,16 +1215,25 @@ static int amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) return 0; } -static void amd_pstate_epp_init(unsigned int cpu) +static void amd_pstate_epp_update_limit(struct cpufreq_policy *policy) { - struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); struct amd_cpudata *cpudata = policy->driver_data; - u32 max_perf, min_perf; + u32 max_perf, min_perf, min_limit_perf, max_limit_perf; u64 value; s16 epp; max_perf = READ_ONCE(cpudata->highest_perf); min_perf = READ_ONCE(cpudata->lowest_perf); + max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq); + min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq); + + max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf, + cpudata->max_limit_perf); + min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf, + cpudata->max_limit_perf); + + WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf); + WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf); value = READ_ONCE(cpudata->cppc_req_cached); @@ -1212,9 +1251,6 @@ static void amd_pstate_epp_init(unsigned int cpu) value &= ~AMD_CPPC_DES_PERF(~0L); value |= AMD_CPPC_DES_PERF(0); - if (cpudata->epp_policy == cpudata->policy) - goto skip_epp; - cpudata->epp_policy = cpudata->policy; /* Get BIOS pre-defined epp value */ @@ -1224,7 +1260,7 @@ static void amd_pstate_epp_init(unsigned int cpu) * This return value can only be negative for shared_memory * systems where EPP register read/write not supported. */ - goto skip_epp; + return; } if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) @@ -1238,8 +1274,6 @@ static void amd_pstate_epp_init(unsigned int cpu) WRITE_ONCE(cpudata->cppc_req_cached, value); amd_pstate_set_epp(cpudata, epp); -skip_epp: - cpufreq_cpu_put(policy); } static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) @@ -1254,7 +1288,7 @@ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) cpudata->policy = policy->policy; - amd_pstate_epp_init(policy->cpu); + amd_pstate_epp_update_limit(policy); return 0; } diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h index 446394f84606..6ad02ad9c7b4 100644 --- a/include/linux/amd-pstate.h +++ b/include/linux/amd-pstate.h @@ -70,6 +70,10 @@ struct amd_cpudata { u32 nominal_perf; u32 lowest_nonlinear_perf; u32 lowest_perf; + u32 min_limit_perf; + u32 max_limit_perf; + u32 min_limit_freq; + u32 max_limit_freq; u32 max_freq; u32 min_freq; From 9d00fe295c8f309c21177114777f900971375928 Mon Sep 17 00:00:00 2001 From: Ayush Jain Date: Fri, 3 Nov 2023 15:18:15 +0530 Subject: [PATCH 0763/1397] cpufreq/amd-pstate: Only print supported EPP values for performance governor [ Upstream commit 142c169b31beb364ef39385b4e88735bd51d37fe ] show_energy_performance_available_preferences() to show only supported values which is performance in performance governor policy. -------Before-------- $ cat /sys/devices/system/cpu/cpu1/cpufreq/scaling_driver amd-pstate-epp $ cat /sys/devices/system/cpu/cpu1/cpufreq/scaling_governor performance $ cat /sys/devices/system/cpu/cpu1/cpufreq/energy_performance_preference performance $ cat /sys/devices/system/cpu/cpu1/cpufreq/energy_performance_available_preferences default performance balance_performance balance_power power -------After-------- $ cat /sys/devices/system/cpu/cpu1/cpufreq/scaling_driver amd-pstate-epp $ cat /sys/devices/system/cpu/cpu1/cpufreq/scaling_governor performance $ cat /sys/devices/system/cpu/cpu1/cpufreq/energy_performance_preference performance $ cat /sys/devices/system/cpu/cpu1/cpufreq/energy_performance_available_preferences performance Fixes: ffa5096a7c33 ("cpufreq: amd-pstate: implement Pstate EPP support for the AMD processors") Suggested-by: Wyes Karny Signed-off-by: Ayush Jain Reviewed-by: Wyes Karny Acked-by: Huang Rui Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/cpufreq/amd-pstate.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 3313d1d2c6dd..1f6186475715 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -882,11 +882,16 @@ static ssize_t show_energy_performance_available_preferences( { int i = 0; int offset = 0; + struct amd_cpudata *cpudata = policy->driver_data; + + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) + return sysfs_emit_at(buf, offset, "%s\n", + energy_perf_strings[EPP_INDEX_PERFORMANCE]); while (energy_perf_strings[i] != NULL) offset += sysfs_emit_at(buf, offset, "%s ", energy_perf_strings[i++]); - sysfs_emit_at(buf, offset, "\n"); + offset += sysfs_emit_at(buf, offset, "\n"); return offset; } From 6832e5396d68c182871447d438af259d08ee7d36 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Thu, 23 Nov 2023 15:33:22 +0800 Subject: [PATCH 0764/1397] drm/amd/pm: fix a memleak in aldebaran_tables_init [ Upstream commit 7a88f23e768491bae653b444a96091d2aaeb0818 ] When kzalloc() for smu_table->ecc_table fails, we should free the previously allocated resources to prevent memleak. Fixes: edd794208555 ("drm/amd/pm: add message smu to get ecc_table v2") Signed-off-by: Dinghao Liu Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index cc3169400c9b..08fff9600bd2 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -257,8 +257,11 @@ static int aldebaran_tables_init(struct smu_context *smu) } smu_table->ecc_table = kzalloc(tables[SMU_TABLE_ECCINFO].size, GFP_KERNEL); - if (!smu_table->ecc_table) + if (!smu_table->ecc_table) { + kfree(smu_table->metrics_table); + kfree(smu_table->gpu_metrics_table); return -ENOMEM; + } return 0; } From e8841b225b5f1826ec33ed6a713d4fb7e09a4256 Mon Sep 17 00:00:00 2001 From: Daniel Mentz Date: Tue, 7 Nov 2023 22:22:26 -0800 Subject: [PATCH 0765/1397] iommu: Fix printk arg in of_iommu_get_resv_regions() [ Upstream commit c2183b3dcc9dd41b768569ea88bededa58cceebb ] The variable phys is defined as (struct resource *) which aligns with the printk format specifier %pr. Taking the address of it results in a value of type (struct resource **) which is incompatible with the format specifier %pr. Therefore, remove the address of operator (&). Fixes: a5bf3cfce8cb ("iommu: Implement of_iommu_get_resv_regions()") Signed-off-by: Daniel Mentz Acked-by: Thierry Reding Link: https://lore.kernel.org/r/20231108062226.928985-1-danielmentz@google.com Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/of_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index c25b4ae6aeee..35ba090f3b5e 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -197,7 +197,7 @@ iommu_resv_region_get_type(struct device *dev, if (start == phys->start && end == phys->end) return IOMMU_RESV_DIRECT; - dev_warn(dev, "treating non-direct mapping [%pr] -> [%pap-%pap] as reservation\n", &phys, + dev_warn(dev, "treating non-direct mapping [%pr] -> [%pap-%pap] as reservation\n", phys, &start, &end); return IOMMU_RESV_RESERVED; } From 5e03e4a4911d9f9aaa49b5b3f9f02fa13dd1078a Mon Sep 17 00:00:00 2001 From: Sherry Wang Date: Fri, 8 Sep 2023 14:45:00 +0800 Subject: [PATCH 0766/1397] drm/amd/display: refactor ILR to make it work [ Upstream commit 6ec876472ff7edeaf2a07bf6afbff74d7f1dfa35 ] [Why] Current ILR toggle is on/off as a part of panel config for new function, which breaks original ILR logic [How] Refactor ILR and take panel config into account Reviewed-by: Anthony Koo Acked-by: Aurabindo Pillai Signed-off-by: Sherry Wang Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Stable-dep-of: d9e865826c20 ("drm/amd/display: Simplify brightness initialization") Signed-off-by: Sasha Levin --- .../drm/amd/display/dc/link/link_detection.c | 6 ++++++ .../dc/link/protocols/link_dp_capability.c | 14 ++++--------- .../link/protocols/link_edp_panel_control.c | 21 +++++++++++++++++-- .../link/protocols/link_edp_panel_control.h | 2 ++ 4 files changed, 31 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c index e682d27e098f..b3d3e46c466b 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c @@ -1166,6 +1166,12 @@ static bool detect_link_and_local_sink(struct dc_link *link, dm_helpers_init_panel_settings(dc_ctx, &link->panel_config, sink); // Override dc_panel_config if system has specific settings dm_helpers_override_panel_settings(dc_ctx, &link->panel_config); + + //sink only can use supported link rate table, we are foreced to enable it + if (link->reported_link_cap.link_rate == LINK_RATE_UNKNOWN) + link->panel_config.ilr.optimize_edp_link_rate = true; + if (edp_is_ilr_optimization_enabled(link)) + link->reported_link_cap.link_rate = get_max_link_rate_from_ilr_table(link); } } else { diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c index 237e0ff955f3..db87aa7b5c90 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c @@ -707,8 +707,7 @@ bool edp_decide_link_settings(struct dc_link *link, * edp_supported_link_rates_count is only valid for eDP v1.4 or higher. * Per VESA eDP spec, "The DPCD revision for eDP v1.4 is 13h" */ - if (link->dpcd_caps.dpcd_rev.raw < DPCD_REV_13 || - link->dpcd_caps.edp_supported_link_rates_count == 0) { + if (!edp_is_ilr_optimization_enabled(link)) { *link_setting = link->verified_link_cap; return true; } @@ -772,8 +771,7 @@ bool decide_edp_link_settings_with_dsc(struct dc_link *link, * edp_supported_link_rates_count is only valid for eDP v1.4 or higher. * Per VESA eDP spec, "The DPCD revision for eDP v1.4 is 13h" */ - if ((link->dpcd_caps.dpcd_rev.raw < DPCD_REV_13 || - link->dpcd_caps.edp_supported_link_rates_count == 0)) { + if (!edp_is_ilr_optimization_enabled(link)) { /* for DSC enabled case, we search for minimum lane count */ memset(&initial_link_setting, 0, sizeof(initial_link_setting)); initial_link_setting.lane_count = LANE_COUNT_ONE; @@ -1938,9 +1936,7 @@ void detect_edp_sink_caps(struct dc_link *link) * edp_supported_link_rates_count is only valid for eDP v1.4 or higher. * Per VESA eDP spec, "The DPCD revision for eDP v1.4 is 13h" */ - if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_13 && - (link->panel_config.ilr.optimize_edp_link_rate || - link->reported_link_cap.link_rate == LINK_RATE_UNKNOWN)) { + if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_13) { // Read DPCD 00010h - 0001Fh 16 bytes at one shot core_link_read_dpcd(link, DP_SUPPORTED_LINK_RATES, supported_link_rates, sizeof(supported_link_rates)); @@ -1958,12 +1954,10 @@ void detect_edp_sink_caps(struct dc_link *link) link_rate = linkRateInKHzToLinkRateMultiplier(link_rate_in_khz); link->dpcd_caps.edp_supported_link_rates[link->dpcd_caps.edp_supported_link_rates_count] = link_rate; link->dpcd_caps.edp_supported_link_rates_count++; - - if (link->reported_link_cap.link_rate < link_rate) - link->reported_link_cap.link_rate = link_rate; } } } + core_link_read_dpcd(link, DP_EDP_BACKLIGHT_ADJUSTMENT_CAP, &backlight_adj_cap, sizeof(backlight_adj_cap)); diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index 24b47fa82f93..e1708c296b7d 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -300,6 +300,24 @@ bool set_cached_brightness_aux(struct dc_link *link) return set_default_brightness_aux(link); return false; } +bool edp_is_ilr_optimization_enabled(struct dc_link *link) +{ + if (link->dpcd_caps.edp_supported_link_rates_count == 0 || !link->panel_config.ilr.optimize_edp_link_rate) + return false; + return true; +} + +enum dc_link_rate get_max_link_rate_from_ilr_table(struct dc_link *link) +{ + enum dc_link_rate link_rate = link->reported_link_cap.link_rate; + + for (int i = 0; i < link->dpcd_caps.edp_supported_link_rates_count; i++) { + if (link_rate < link->dpcd_caps.edp_supported_link_rates[i]) + link_rate = link->dpcd_caps.edp_supported_link_rates[i]; + } + + return link_rate; +} bool edp_is_ilr_optimization_required(struct dc_link *link, struct dc_crtc_timing *crtc_timing) @@ -312,8 +330,7 @@ bool edp_is_ilr_optimization_required(struct dc_link *link, ASSERT(link || crtc_timing); // invalid input - if (link->dpcd_caps.edp_supported_link_rates_count == 0 || - !link->panel_config.ilr.optimize_edp_link_rate) + if (!edp_is_ilr_optimization_enabled(link)) return false; diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h index 20f91de852e3..ebf7deb63d13 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h @@ -64,6 +64,8 @@ bool edp_get_replay_state(const struct dc_link *link, uint64_t *state); bool edp_wait_for_t12(struct dc_link *link); bool edp_is_ilr_optimization_required(struct dc_link *link, struct dc_crtc_timing *crtc_timing); +bool edp_is_ilr_optimization_enabled(struct dc_link *link); +enum dc_link_rate get_max_link_rate_from_ilr_table(struct dc_link *link); bool edp_backlight_enable_aux(struct dc_link *link, bool enable); void edp_add_delay_for_T9(struct dc_link *link); bool edp_receiver_ready_T9(struct dc_link *link); From 86cd446a7727f92db999b55c66e9a59cf7a0545a Mon Sep 17 00:00:00 2001 From: Swapnil Patel Date: Wed, 4 Oct 2023 15:58:57 -0400 Subject: [PATCH 0767/1397] drm/amd/display: Reduce default backlight min from 5 nits to 1 nits [ Upstream commit 5edb7cdff85af8f8c5fda5b88310535ab823f663 ] [Why & How] Currently set_default_brightness_aux function uses 5 nits as lower limit to check for valid default_backlight setting. However some newer panels can support even lower default settings Reviewed-by: Agustin Gutierrez Acked-by: Roman Li Signed-off-by: Swapnil Patel Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Stable-dep-of: d9e865826c20 ("drm/amd/display: Simplify brightness initialization") Signed-off-by: Sasha Levin --- .../amd/display/dc/link/protocols/link_edp_panel_control.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index e1708c296b7d..a602202610e0 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -281,8 +281,8 @@ bool set_default_brightness_aux(struct dc_link *link) if (link && link->dpcd_sink_ext_caps.bits.oled == 1) { if (!read_default_bl_aux(link, &default_backlight)) default_backlight = 150000; - // if < 5 nits or > 5000, it might be wrong readback - if (default_backlight < 5000 || default_backlight > 5000000) + // if < 1 nits or > 5000, it might be wrong readback + if (default_backlight < 1000 || default_backlight > 5000000) default_backlight = 150000; // return edp_set_backlight_level_nits(link, true, From d05614ebad8c2eff20f40ca1f55e36a76d4b0dd8 Mon Sep 17 00:00:00 2001 From: Camille Cho Date: Fri, 3 Nov 2023 12:08:42 +0800 Subject: [PATCH 0768/1397] drm/amd/display: Simplify brightness initialization [ Upstream commit d9e865826c202b262f9ee3f17a03cc4ac5d44ced ] [Why] Remove the brightness cache in DC. It uses a single value to represent the brightness for both SDR and HDR mode. This leads to flash in HDR on/off. It also unconditionally programs brightness as in HDR mode. This may introduce garbage on SDR mode in miniLED panel. [How] Simplify the initialization flow by removing the DC cache and taking what panel has as default. Expand the mechanism for PWM to DPCD Aux to restore cached brightness value generally. Cc: stable@vger.kernel.org # 6.1+ Reviewed-by: Krunoslav Kovac Acked-by: Hamza Mahfooz Signed-off-by: Camille Cho Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/dc.h | 1 - drivers/gpu/drm/amd/display/dc/dc_types.h | 4 ---- .../gpu/drm/amd/display/dc/link/link_detection.c | 2 +- drivers/gpu/drm/amd/display/dc/link/link_dpms.c | 3 +-- .../dc/link/protocols/link_edp_panel_control.c | 16 +++------------- .../dc/link/protocols/link_edp_panel_control.h | 1 - 6 files changed, 5 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index dd7bf31ef6b0..3f33740e2f65 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -1538,7 +1538,6 @@ struct dc_link { enum edp_revision edp_revision; union dpcd_sink_ext_caps dpcd_sink_ext_caps; - struct backlight_settings backlight_settings; struct psr_settings psr_settings; struct replay_settings replay_settings; diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index ba900b0a62a8..accffba5a683 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -1003,10 +1003,6 @@ struct link_mst_stream_allocation_table { struct link_mst_stream_allocation stream_allocations[MAX_CONTROLLER_NUM]; }; -struct backlight_settings { - uint32_t backlight_millinits; -}; - /* PSR feature flags */ struct psr_settings { bool psr_feature_enabled; // PSR is supported by sink diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c index b3d3e46c466b..c7a9e286a5d4 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c @@ -876,7 +876,7 @@ static bool detect_link_and_local_sink(struct dc_link *link, (link->dpcd_sink_ext_caps.bits.oled == 1)) { dpcd_set_source_specific_data(link); msleep(post_oui_delay); - set_cached_brightness_aux(link); + set_default_brightness_aux(link); } return true; diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index a17a06eb7762..35d087cf1980 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -2140,8 +2140,7 @@ static enum dc_status enable_link_dp(struct dc_state *state, if (link->dpcd_sink_ext_caps.bits.oled == 1 || link->dpcd_sink_ext_caps.bits.sdr_aux_backlight_control == 1 || link->dpcd_sink_ext_caps.bits.hdr_aux_backlight_control == 1) { - set_cached_brightness_aux(link); - + set_default_brightness_aux(link); if (link->dpcd_sink_ext_caps.bits.oled == 1) msleep(bl_oled_enable_delay); edp_backlight_enable_aux(link, true); diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index a602202610e0..fe74d4252a51 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -168,7 +168,6 @@ bool edp_set_backlight_level_nits(struct dc_link *link, *(uint32_t *)&dpcd_backlight_set.backlight_level_millinits = backlight_millinits; *(uint16_t *)&dpcd_backlight_set.backlight_transition_time_ms = (uint16_t)transition_time_in_ms; - link->backlight_settings.backlight_millinits = backlight_millinits; if (!link->dpcd_caps.panel_luminance_control) { if (core_link_write_dpcd(link, DP_SOURCE_BACKLIGHT_LEVEL, @@ -281,9 +280,9 @@ bool set_default_brightness_aux(struct dc_link *link) if (link && link->dpcd_sink_ext_caps.bits.oled == 1) { if (!read_default_bl_aux(link, &default_backlight)) default_backlight = 150000; - // if < 1 nits or > 5000, it might be wrong readback - if (default_backlight < 1000 || default_backlight > 5000000) - default_backlight = 150000; // + // if > 5000, it might be wrong readback + if (default_backlight > 5000000) + default_backlight = 150000; return edp_set_backlight_level_nits(link, true, default_backlight, 0); @@ -291,15 +290,6 @@ bool set_default_brightness_aux(struct dc_link *link) return false; } -bool set_cached_brightness_aux(struct dc_link *link) -{ - if (link->backlight_settings.backlight_millinits) - return edp_set_backlight_level_nits(link, true, - link->backlight_settings.backlight_millinits, 0); - else - return set_default_brightness_aux(link); - return false; -} bool edp_is_ilr_optimization_enabled(struct dc_link *link) { if (link->dpcd_caps.edp_supported_link_rates_count == 0 || !link->panel_config.ilr.optimize_edp_link_rate) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h index ebf7deb63d13..a034288ad75d 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h @@ -30,7 +30,6 @@ enum dp_panel_mode dp_get_panel_mode(struct dc_link *link); void dp_set_panel_mode(struct dc_link *link, enum dp_panel_mode panel_mode); bool set_default_brightness_aux(struct dc_link *link); -bool set_cached_brightness_aux(struct dc_link *link); void edp_panel_backlight_power_on(struct dc_link *link, bool wait_for_hpd); int edp_get_backlight_level(const struct dc_link *link); bool edp_get_backlight_level_nits(struct dc_link *link, From 117917b0be2e9bc893859a70cfff17e48020ada8 Mon Sep 17 00:00:00 2001 From: Ilya Bakoulin Date: Tue, 7 Nov 2023 15:07:56 -0500 Subject: [PATCH 0769/1397] drm/amd/display: Fix MPCC 1DLUT programming [ Upstream commit 6f395cebdd8927fbffdc3a55a14fcacf93634359 ] [Why] Wrong function is used to translate LUT values to HW format, leading to visible artifacting in some cases. [How] Use the correct cm3_helper function. Cc: stable@vger.kernel.org # 6.1+ Reviewed-by: Krunoslav Kovac Acked-by: Hamza Mahfooz Signed-off-by: Ilya Bakoulin Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c index c9140b50c345..650e1598bddc 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c @@ -486,8 +486,7 @@ bool dcn32_set_mcm_luts( if (plane_state->blend_tf->type == TF_TYPE_HWPWL) lut_params = &plane_state->blend_tf->pwl; else if (plane_state->blend_tf->type == TF_TYPE_DISTRIBUTED_POINTS) { - cm_helper_translate_curve_to_hw_format(plane_state->ctx, - plane_state->blend_tf, + cm3_helper_translate_curve_to_hw_format(plane_state->blend_tf, &dpp_base->regamma_params, false); lut_params = &dpp_base->regamma_params; } @@ -501,8 +500,7 @@ bool dcn32_set_mcm_luts( else if (plane_state->in_shaper_func->type == TF_TYPE_DISTRIBUTED_POINTS) { // TODO: dpp_base replace ASSERT(false); - cm_helper_translate_curve_to_hw_format(plane_state->ctx, - plane_state->in_shaper_func, + cm3_helper_translate_curve_to_hw_format(plane_state->in_shaper_func, &dpp_base->shaper_params, true); lut_params = &dpp_base->shaper_params; } From e9e45c19624eb8eb0d91cee487f7c8c5bf7a471a Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Wed, 22 Nov 2023 11:25:31 -0800 Subject: [PATCH 0770/1397] vfio/pds: Fix mutex lock->magic != lock warning [ Upstream commit 91aeb563bd4332e2988f8c0f64f125c4ecb5bcb3 ] The following BUG was found when running on a kernel with CONFIG_DEBUG_MUTEXES=y set: DEBUG_LOCKS_WARN_ON(lock->magic != lock) RIP: 0010:mutex_trylock+0x10d/0x120 Call Trace: ? __warn+0x85/0x140 ? mutex_trylock+0x10d/0x120 ? report_bug+0xfc/0x1e0 ? handle_bug+0x3f/0x70 ? exc_invalid_op+0x17/0x70 ? asm_exc_invalid_op+0x1a/0x20 ? mutex_trylock+0x10d/0x120 ? mutex_trylock+0x10d/0x120 pds_vfio_reset+0x3a/0x60 [pds_vfio_pci] pci_reset_function+0x4b/0x70 reset_store+0x5b/0xa0 kernfs_fop_write_iter+0x137/0x1d0 vfs_write+0x2de/0x410 ksys_write+0x5d/0xd0 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 As shown, lock->magic != lock. This is because mutex_init(&pds_vfio->state_mutex) is called in the VFIO open path. So, if a reset is initiated before the VFIO device is opened the mutex will have never been initialized. Fix this by calling mutex_init(&pds_vfio->state_mutex) in the VFIO init path. Also, don't destroy the mutex on close because the device may be re-opened, which would cause mutex to be uninitialized. Fix this by implementing a driver specific vfio_device_ops.release callback that destroys the mutex before calling vfio_pci_core_release_dev(). Fixes: bb500dbe2ac6 ("vfio/pds: Add VFIO live migration support") Signed-off-by: Brett Creeley Reviewed-by: Shannon Nelson Link: https://lore.kernel.org/r/20231122192532.25791-2-brett.creeley@amd.com Signed-off-by: Alex Williamson Signed-off-by: Sasha Levin --- drivers/vfio/pci/pds/vfio_dev.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/vfio/pci/pds/vfio_dev.c b/drivers/vfio/pci/pds/vfio_dev.c index 649b18ee394b..8c9fb87b13e1 100644 --- a/drivers/vfio/pci/pds/vfio_dev.c +++ b/drivers/vfio/pci/pds/vfio_dev.c @@ -155,6 +155,8 @@ static int pds_vfio_init_device(struct vfio_device *vdev) pds_vfio->vf_id = vf_id; + mutex_init(&pds_vfio->state_mutex); + vdev->migration_flags = VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P; vdev->mig_ops = &pds_vfio_lm_ops; vdev->log_ops = &pds_vfio_log_ops; @@ -168,6 +170,16 @@ static int pds_vfio_init_device(struct vfio_device *vdev) return 0; } +static void pds_vfio_release_device(struct vfio_device *vdev) +{ + struct pds_vfio_pci_device *pds_vfio = + container_of(vdev, struct pds_vfio_pci_device, + vfio_coredev.vdev); + + mutex_destroy(&pds_vfio->state_mutex); + vfio_pci_core_release_dev(vdev); +} + static int pds_vfio_open_device(struct vfio_device *vdev) { struct pds_vfio_pci_device *pds_vfio = @@ -179,7 +191,6 @@ static int pds_vfio_open_device(struct vfio_device *vdev) if (err) return err; - mutex_init(&pds_vfio->state_mutex); pds_vfio->state = VFIO_DEVICE_STATE_RUNNING; pds_vfio->deferred_reset_state = VFIO_DEVICE_STATE_RUNNING; @@ -199,14 +210,13 @@ static void pds_vfio_close_device(struct vfio_device *vdev) pds_vfio_put_save_file(pds_vfio); pds_vfio_dirty_disable(pds_vfio, true); mutex_unlock(&pds_vfio->state_mutex); - mutex_destroy(&pds_vfio->state_mutex); vfio_pci_core_close_device(vdev); } static const struct vfio_device_ops pds_vfio_ops = { .name = "pds-vfio", .init = pds_vfio_init_device, - .release = vfio_pci_core_release_dev, + .release = pds_vfio_release_device, .open_device = pds_vfio_open_device, .close_device = pds_vfio_close_device, .ioctl = vfio_pci_core_ioctl, From 72654bc1d404e05ab7e7c26cfbc966f5cd63288b Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Wed, 22 Nov 2023 11:25:32 -0800 Subject: [PATCH 0771/1397] vfio/pds: Fix possible sleep while in atomic context [ Upstream commit ae2667cd8a479bb5abd6e24c12fcc9ef5bc06d75 ] The driver could possibly sleep while in atomic context resulting in the following call trace while CONFIG_DEBUG_ATOMIC_SLEEP=y is set: BUG: sleeping function called from invalid context at kernel/locking/mutex.c:283 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 2817, name: bash preempt_count: 1, expected: 0 RCU nest depth: 0, expected: 0 Call Trace: dump_stack_lvl+0x36/0x50 __might_resched+0x123/0x170 mutex_lock+0x1e/0x50 pds_vfio_put_lm_file+0x1e/0xa0 [pds_vfio_pci] pds_vfio_put_save_file+0x19/0x30 [pds_vfio_pci] pds_vfio_state_mutex_unlock+0x2e/0x80 [pds_vfio_pci] pci_reset_function+0x4b/0x70 reset_store+0x5b/0xa0 kernfs_fop_write_iter+0x137/0x1d0 vfs_write+0x2de/0x410 ksys_write+0x5d/0xd0 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 This can happen if pds_vfio_put_restore_file() and/or pds_vfio_put_save_file() grab the mutex_lock(&lm_file->lock) while the spin_lock(&pds_vfio->reset_lock) is held, which can happen during while calling pds_vfio_state_mutex_unlock(). Fix this by changing the reset_lock to reset_mutex so there are no such conerns. Also, make sure to destroy the reset_mutex in the driver specific VFIO device release function. This also fixes a spinlock bad magic BUG that was caused by not calling spinlock_init() on the reset_lock. Since, the lock is being changed to a mutex, make sure to call mutex_init() on it. Reported-by: Dan Carpenter Closes: https://lore.kernel.org/kvm/1f9bc27b-3de9-4891-9687-ba2820c1b390@moroto.mountain/ Fixes: bb500dbe2ac6 ("vfio/pds: Add VFIO live migration support") Signed-off-by: Brett Creeley Reviewed-by: Shannon Nelson Link: https://lore.kernel.org/r/20231122192532.25791-3-brett.creeley@amd.com Signed-off-by: Alex Williamson Signed-off-by: Sasha Levin --- drivers/vfio/pci/pds/pci_drv.c | 4 ++-- drivers/vfio/pci/pds/vfio_dev.c | 14 ++++++++------ drivers/vfio/pci/pds/vfio_dev.h | 2 +- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/vfio/pci/pds/pci_drv.c b/drivers/vfio/pci/pds/pci_drv.c index ab4b5958e413..caffa1a2cf59 100644 --- a/drivers/vfio/pci/pds/pci_drv.c +++ b/drivers/vfio/pci/pds/pci_drv.c @@ -55,10 +55,10 @@ static void pds_vfio_recovery(struct pds_vfio_pci_device *pds_vfio) * VFIO_DEVICE_STATE_RUNNING. */ if (deferred_reset_needed) { - spin_lock(&pds_vfio->reset_lock); + mutex_lock(&pds_vfio->reset_mutex); pds_vfio->deferred_reset = true; pds_vfio->deferred_reset_state = VFIO_DEVICE_STATE_ERROR; - spin_unlock(&pds_vfio->reset_lock); + mutex_unlock(&pds_vfio->reset_mutex); } } diff --git a/drivers/vfio/pci/pds/vfio_dev.c b/drivers/vfio/pci/pds/vfio_dev.c index 8c9fb87b13e1..4c351c59d05a 100644 --- a/drivers/vfio/pci/pds/vfio_dev.c +++ b/drivers/vfio/pci/pds/vfio_dev.c @@ -29,7 +29,7 @@ struct pds_vfio_pci_device *pds_vfio_pci_drvdata(struct pci_dev *pdev) void pds_vfio_state_mutex_unlock(struct pds_vfio_pci_device *pds_vfio) { again: - spin_lock(&pds_vfio->reset_lock); + mutex_lock(&pds_vfio->reset_mutex); if (pds_vfio->deferred_reset) { pds_vfio->deferred_reset = false; if (pds_vfio->state == VFIO_DEVICE_STATE_ERROR) { @@ -39,23 +39,23 @@ void pds_vfio_state_mutex_unlock(struct pds_vfio_pci_device *pds_vfio) } pds_vfio->state = pds_vfio->deferred_reset_state; pds_vfio->deferred_reset_state = VFIO_DEVICE_STATE_RUNNING; - spin_unlock(&pds_vfio->reset_lock); + mutex_unlock(&pds_vfio->reset_mutex); goto again; } mutex_unlock(&pds_vfio->state_mutex); - spin_unlock(&pds_vfio->reset_lock); + mutex_unlock(&pds_vfio->reset_mutex); } void pds_vfio_reset(struct pds_vfio_pci_device *pds_vfio) { - spin_lock(&pds_vfio->reset_lock); + mutex_lock(&pds_vfio->reset_mutex); pds_vfio->deferred_reset = true; pds_vfio->deferred_reset_state = VFIO_DEVICE_STATE_RUNNING; if (!mutex_trylock(&pds_vfio->state_mutex)) { - spin_unlock(&pds_vfio->reset_lock); + mutex_unlock(&pds_vfio->reset_mutex); return; } - spin_unlock(&pds_vfio->reset_lock); + mutex_unlock(&pds_vfio->reset_mutex); pds_vfio_state_mutex_unlock(pds_vfio); } @@ -156,6 +156,7 @@ static int pds_vfio_init_device(struct vfio_device *vdev) pds_vfio->vf_id = vf_id; mutex_init(&pds_vfio->state_mutex); + mutex_init(&pds_vfio->reset_mutex); vdev->migration_flags = VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P; vdev->mig_ops = &pds_vfio_lm_ops; @@ -177,6 +178,7 @@ static void pds_vfio_release_device(struct vfio_device *vdev) vfio_coredev.vdev); mutex_destroy(&pds_vfio->state_mutex); + mutex_destroy(&pds_vfio->reset_mutex); vfio_pci_core_release_dev(vdev); } diff --git a/drivers/vfio/pci/pds/vfio_dev.h b/drivers/vfio/pci/pds/vfio_dev.h index b8f2d667608f..e7b01080a1ec 100644 --- a/drivers/vfio/pci/pds/vfio_dev.h +++ b/drivers/vfio/pci/pds/vfio_dev.h @@ -18,7 +18,7 @@ struct pds_vfio_pci_device { struct pds_vfio_dirty dirty; struct mutex state_mutex; /* protect migration state */ enum vfio_device_mig_state state; - spinlock_t reset_lock; /* protect reset_done flow */ + struct mutex reset_mutex; /* protect reset_done flow */ u8 deferred_reset; enum vfio_device_mig_state deferred_reset_state; struct notifier_block nb; From 602aa0efcf0dba24ef7bd78be9262285b33b4614 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 24 Nov 2023 08:48:52 +0100 Subject: [PATCH 0772/1397] x86/xen: fix percpu vcpu_info allocation [ Upstream commit db2832309a82b9acc4b8cc33a1831d36507ec13e ] Today the percpu struct vcpu_info is allocated via DEFINE_PER_CPU(), meaning that it could cross a page boundary. In this case registering it with the hypervisor will fail, resulting in a panic(). This can easily be fixed by using DEFINE_PER_CPU_ALIGNED() instead, as struct vcpu_info is guaranteed to have a size of 64 bytes, matching the cache line size of x86 64-bit processors (Xen doesn't support 32-bit processors). Fixes: 5ead97c84fa7 ("xen: Core Xen implementation") Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20231124074852.25161-1-jgross@suse.com Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- arch/x86/xen/enlighten.c | 6 +++++- arch/x86/xen/xen-ops.h | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 0337392a3121..3c61bb98c10e 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -33,9 +33,12 @@ EXPORT_SYMBOL_GPL(hypercall_page); * and xen_vcpu_setup for details. By default it points to share_info->vcpu_info * but during boot it is switched to point to xen_vcpu_info. * The pointer is used in xen_evtchn_do_upcall to acknowledge pending events. + * Make sure that xen_vcpu_info doesn't cross a page boundary by making it + * cache-line aligned (the struct is guaranteed to have a size of 64 bytes, + * which matches the cache line size of 64-bit x86 processors). */ DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); -DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); +DEFINE_PER_CPU_ALIGNED(struct vcpu_info, xen_vcpu_info); /* Linux <-> Xen vCPU id mapping */ DEFINE_PER_CPU(uint32_t, xen_vcpu_id); @@ -160,6 +163,7 @@ void xen_vcpu_setup(int cpu) int err; struct vcpu_info *vcpup; + BUILD_BUG_ON(sizeof(*vcpup) > SMP_CACHE_BYTES); BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); /* diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 408a2aa66c69..a87ab36889e7 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -21,7 +21,7 @@ extern void *xen_initial_gdt; struct trap_info; void xen_copy_trap_info(struct trap_info *traps); -DECLARE_PER_CPU(struct vcpu_info, xen_vcpu_info); +DECLARE_PER_CPU_ALIGNED(struct vcpu_info, xen_vcpu_info); DECLARE_PER_CPU(unsigned long, xen_cr3); DECLARE_PER_CPU(unsigned long, xen_current_cr3); From fcda4d22f5df70fac308d7654bcbeb6fea2d2a47 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 29 Nov 2023 16:10:00 -0800 Subject: [PATCH 0773/1397] vfio: Drop vfio_file_iommu_group() stub to fudge around a KVM wart [ Upstream commit 4ea95c04fa6b9043a1a301240996aeebe3cb28ec ] Drop the vfio_file_iommu_group() stub and instead unconditionally declare the function to fudge around a KVM wart where KVM tries to do symbol_get() on vfio_file_iommu_group() (and other VFIO symbols) even if CONFIG_VFIO=n. Ensuring the symbol is always declared fixes a PPC build error when modules are also disabled, in which case symbol_get() simply points at the address of the symbol (with some attributes shenanigans). Because KVM does symbol_get() instead of directly depending on VFIO, the lack of a fully defined symbol is not problematic (ugly, but "fine"). arch/powerpc/kvm/../../../virt/kvm/vfio.c:89:7: error: attribute declaration must precede definition [-Werror,-Wignored-attributes] fn = symbol_get(vfio_file_iommu_group); ^ include/linux/module.h:805:60: note: expanded from macro 'symbol_get' #define symbol_get(x) ({ extern typeof(x) x __attribute__((weak,visibility("hidden"))); &(x); }) ^ include/linux/vfio.h:294:35: note: previous definition is here static inline struct iommu_group *vfio_file_iommu_group(struct file *file) ^ arch/powerpc/kvm/../../../virt/kvm/vfio.c:89:7: error: attribute declaration must precede definition [-Werror,-Wignored-attributes] fn = symbol_get(vfio_file_iommu_group); ^ include/linux/module.h:805:65: note: expanded from macro 'symbol_get' #define symbol_get(x) ({ extern typeof(x) x __attribute__((weak,visibility("hidden"))); &(x); }) ^ include/linux/vfio.h:294:35: note: previous definition is here static inline struct iommu_group *vfio_file_iommu_group(struct file *file) ^ 2 errors generated. Although KVM is firmly in the wrong (there is zero reason for KVM to build virt/kvm/vfio.c when VFIO is disabled), fudge around the error in VFIO as the stub is unnecessary and doesn't serve its intended purpose (KVM is the only external user of vfio_file_iommu_group()), and there is an in-flight series to clean up the entire KVM<->VFIO interaction, i.e. fixing this in KVM would result in more churn in the long run, and the stub needs to go away regardless. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202308251949.5IiaV0sz-lkp@intel.com Closes: https://lore.kernel.org/oe-kbuild-all/202309030741.82aLACDG-lkp@intel.com Closes: https://lore.kernel.org/oe-kbuild-all/202309110914.QLH0LU6L-lkp@intel.com Link: https://lore.kernel.org/all/0-v1-08396538817d+13c5-vfio_kvm_kconfig_jgg@nvidia.com Link: https://lore.kernel.org/all/20230916003118.2540661-1-seanjc@google.com Cc: Nick Desaulniers Cc: Jason Gunthorpe Tested-by: Michael Ellerman Fixes: c1cce6d079b8 ("vfio: Compile vfio_group infrastructure optionally") Signed-off-by: Sean Christopherson Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20231130001000.543240-1-seanjc@google.com Signed-off-by: Alex Williamson Signed-off-by: Sasha Levin --- include/linux/vfio.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 454e9295970c..a65b2513f8cd 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -289,16 +289,12 @@ void vfio_combine_iova_ranges(struct rb_root_cached *root, u32 cur_nodes, /* * External user API */ -#if IS_ENABLED(CONFIG_VFIO_GROUP) struct iommu_group *vfio_file_iommu_group(struct file *file); + +#if IS_ENABLED(CONFIG_VFIO_GROUP) bool vfio_file_is_group(struct file *file); bool vfio_file_has_dev(struct file *file, struct vfio_device *device); #else -static inline struct iommu_group *vfio_file_iommu_group(struct file *file) -{ - return NULL; -} - static inline bool vfio_file_is_group(struct file *file) { return false; From 3318612b308bd4fa8647f9f80becd8ff1e2ef208 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 8 Dec 2023 08:52:25 +0100 Subject: [PATCH 0774/1397] Linux 6.6.5 Link: https://lore.kernel.org/r/20231205031535.163661217@linuxfoundation.org Tested-by: Jon Hunter Tested-by: Takeshi Ogasawara Tested-by: Bagas Sanjaya Tested-by: Harshit Mogalapalli Tested-by: Ronald Warsow Tested-by: Guenter Roeck Tested-by: SeongJae Park Tested-by: Linux Kernel Functional Testing Tested-by: Ron Economos Tested-by: Florian Fainelli Tested-by: Shuah Khan Tested-by: Ricardo B. Marliere Tested-by: Justin M. Forbes Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index cbe63ba9126e..ee4e504a3e78 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 6 -SUBLEVEL = 4 +SUBLEVEL = 5 EXTRAVERSION = NAME = Hurr durr I'ma ninja sloth From 9099d0682462e8664c8cbbe8989de65e50f62b17 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 11 Dec 2023 10:16:15 +0100 Subject: [PATCH 0775/1397] Revert "wifi: cfg80211: fix CQM for non-range use" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 4a7e92551618f3737b305f62451353ee05662f57 which is commit 7e7efdda6adb385fbdfd6f819d76bc68c923c394 upstream. It needed to have commit 076fc8775daf ("wifi: cfg80211: remove wdev mutex") applied to properly work, otherwise regressions happen. Link: https://lore.kernel.org/r/e374bb16-5b13-44cc-b11a-2f4eefb1ecf5@manjaro.org Link: https://lore.kernel.org/r/87sf4belmm.fsf@turtle.gmx.de Link: https://lore.kernel.org/r/20231210213930.61378-1-leo@leolam.fr Reported-by: Léo Lam Reported-by: Sven Joachim Reported-by: Philip Müller Cc: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/core.h | 1 - net/wireless/nl80211.c | 50 ++++++++++++++++-------------------------- 2 files changed, 19 insertions(+), 32 deletions(-) diff --git a/net/wireless/core.h b/net/wireless/core.h index f0a3a2317638..e536c0b615a0 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -299,7 +299,6 @@ struct cfg80211_cqm_config { u32 rssi_hyst; s32 last_rssi_event_value; enum nl80211_cqm_rssi_threshold_event last_rssi_event_type; - bool use_range_api; int n_rssi_thresholds; s32 rssi_thresholds[] __counted_by(n_rssi_thresholds); }; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 6a82dd876f27..931a03f4549c 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -12824,6 +12824,10 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev, int i, n, low_index; int err; + /* RSSI reporting disabled? */ + if (!cqm_config) + return rdev_set_cqm_rssi_range_config(rdev, dev, 0, 0); + /* * Obtain current RSSI value if possible, if not and no RSSI threshold * event has been received yet, we should receive an event after a @@ -12898,6 +12902,18 @@ static int nl80211_set_cqm_rssi(struct genl_info *info, wdev->iftype != NL80211_IFTYPE_P2P_CLIENT) return -EOPNOTSUPP; + if (n_thresholds <= 1 && rdev->ops->set_cqm_rssi_config) { + if (n_thresholds == 0 || thresholds[0] == 0) /* Disabling */ + return rdev_set_cqm_rssi_config(rdev, dev, 0, 0); + + return rdev_set_cqm_rssi_config(rdev, dev, + thresholds[0], hysteresis); + } + + if (!wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_CQM_RSSI_LIST)) + return -EOPNOTSUPP; + if (n_thresholds == 1 && thresholds[0] == 0) /* Disabling */ n_thresholds = 0; @@ -12905,20 +12921,6 @@ static int nl80211_set_cqm_rssi(struct genl_info *info, old = rcu_dereference_protected(wdev->cqm_config, lockdep_is_held(&wdev->mtx)); - /* if already disabled just succeed */ - if (!n_thresholds && !old) - return 0; - - if (n_thresholds > 1) { - if (!wiphy_ext_feature_isset(&rdev->wiphy, - NL80211_EXT_FEATURE_CQM_RSSI_LIST) || - !rdev->ops->set_cqm_rssi_range_config) - return -EOPNOTSUPP; - } else { - if (!rdev->ops->set_cqm_rssi_config) - return -EOPNOTSUPP; - } - if (n_thresholds) { cqm_config = kzalloc(struct_size(cqm_config, rssi_thresholds, n_thresholds), @@ -12933,26 +12935,13 @@ static int nl80211_set_cqm_rssi(struct genl_info *info, memcpy(cqm_config->rssi_thresholds, thresholds, flex_array_size(cqm_config, rssi_thresholds, n_thresholds)); - cqm_config->use_range_api = n_thresholds > 1 || - !rdev->ops->set_cqm_rssi_config; rcu_assign_pointer(wdev->cqm_config, cqm_config); - - if (cqm_config->use_range_api) - err = cfg80211_cqm_rssi_update(rdev, dev, cqm_config); - else - err = rdev_set_cqm_rssi_config(rdev, dev, - thresholds[0], - hysteresis); } else { RCU_INIT_POINTER(wdev->cqm_config, NULL); - /* if enabled as range also disable via range */ - if (old->use_range_api) - err = rdev_set_cqm_rssi_range_config(rdev, dev, 0, 0); - else - err = rdev_set_cqm_rssi_config(rdev, dev, 0, 0); } + err = cfg80211_cqm_rssi_update(rdev, dev, cqm_config); if (err) { rcu_assign_pointer(wdev->cqm_config, old); kfree_rcu(cqm_config, rcu_head); @@ -19142,11 +19131,10 @@ void cfg80211_cqm_rssi_notify_work(struct wiphy *wiphy, struct wiphy_work *work) wdev_lock(wdev); cqm_config = rcu_dereference_protected(wdev->cqm_config, lockdep_is_held(&wdev->mtx)); - if (!cqm_config) + if (!wdev->cqm_config) goto unlock; - if (cqm_config->use_range_api) - cfg80211_cqm_rssi_update(rdev, wdev->netdev, cqm_config); + cfg80211_cqm_rssi_update(rdev, wdev->netdev, cqm_config); rssi_level = cqm_config->last_rssi_event_value; rssi_event = cqm_config->last_rssi_event_type; From 0c38b88c33239b236930264a5467dca6334792f3 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 11 Dec 2023 10:40:17 +0100 Subject: [PATCH 0776/1397] Linux 6.6.6 Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ee4e504a3e78..1eefa893f048 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 6 -SUBLEVEL = 5 +SUBLEVEL = 6 EXTRAVERSION = NAME = Hurr durr I'ma ninja sloth From dd3438abfb85f6071ffafacd05d713a7ee020955 Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Fri, 3 Nov 2023 05:26:27 -0700 Subject: [PATCH 0777/1397] vdpa/mlx5: preserve CVQ vringh index MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 480b3e73720f6b5d76bef2387b1f9d19ed67573b ] mlx5_vdpa does not preserve userland's view of vring base for the control queue in the following sequence: ioctl VHOST_SET_VRING_BASE ioctl VHOST_VDPA_SET_STATUS VIRTIO_CONFIG_S_DRIVER_OK mlx5_vdpa_set_status() setup_cvq_vring() vringh_init_iotlb() vringh_init_kern() vrh->last_avail_idx = 0; ioctl VHOST_GET_VRING_BASE To fix, restore the value of cvq->vring.last_avail_idx after calling vringh_init_iotlb. Fixes: 5262912ef3cf ("vdpa/mlx5: Add support for control VQ and MAC setting") Signed-off-by: Steve Sistare Acked-by: Eugenio Pérez Acked-by: Jason Wang Message-Id: <1699014387-194368-1-git-send-email-steven.sistare@oracle.com> Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 946488b8989f..ca972af3c89a 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -2795,13 +2795,18 @@ static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev) struct mlx5_control_vq *cvq = &mvdev->cvq; int err = 0; - if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) + if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) { + u16 idx = cvq->vring.last_avail_idx; + err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features, MLX5_CVQ_MAX_ENT, false, (struct vring_desc *)(uintptr_t)cvq->desc_addr, (struct vring_avail *)(uintptr_t)cvq->driver_addr, (struct vring_used *)(uintptr_t)cvq->device_addr); + if (!err) + cvq->vring.last_avail_idx = cvq->vring.last_used_idx = idx; + } return err; } From c94c44685d801334b64bd7623bc7db0917304dd9 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 6 Nov 2023 17:13:04 -0600 Subject: [PATCH 0778/1397] scsi: sd: Fix sshdr use in sd_suspend_common() [ Upstream commit 3b83486399a6a9feb9c681b74c21a227d48d7020 ] If scsi_execute_cmd() returns < 0, it doesn't initialize the sshdr, so we shouldn't access the sshdr. If it returns 0, then the cmd executed successfully, so there is no need to check the sshdr. sd_sync_cache() will only access the sshdr if it's been setup because it calls scsi_status_is_check_condition() before accessing it. However, the sd_sync_cache() caller, sd_suspend_common(), does not check. sd_suspend_common() is only checking for ILLEGAL_REQUEST which it's using to determine if the command is supported. If it's not it just ignores the error. So to fix its sshdr use this patch just moves that check to sd_sync_cache() where it converts ILLEGAL_REQUEST to success/0. sd_suspend_common() was ignoring that error and sd_shutdown() doesn't check for errors so there will be no behavior changes. Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20231106231304.5694-2-michael.christie@oracle.com Reviewed-by: Christoph Hellwig Reviewed-by: Martin Wilck Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/sd.c | 53 ++++++++++++++++++++--------------------------- 1 file changed, 23 insertions(+), 30 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index e17509f0b3fa..c2e8d9e27749 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1642,24 +1642,21 @@ static unsigned int sd_check_events(struct gendisk *disk, unsigned int clearing) return disk_changed ? DISK_EVENT_MEDIA_CHANGE : 0; } -static int sd_sync_cache(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr) +static int sd_sync_cache(struct scsi_disk *sdkp) { int retries, res; struct scsi_device *sdp = sdkp->device; const int timeout = sdp->request_queue->rq_timeout * SD_FLUSH_TIMEOUT_MULTIPLIER; - struct scsi_sense_hdr my_sshdr; + struct scsi_sense_hdr sshdr; const struct scsi_exec_args exec_args = { .req_flags = BLK_MQ_REQ_PM, - /* caller might not be interested in sense, but we need it */ - .sshdr = sshdr ? : &my_sshdr, + .sshdr = &sshdr, }; if (!scsi_device_online(sdp)) return -ENODEV; - sshdr = exec_args.sshdr; - for (retries = 3; retries > 0; --retries) { unsigned char cmd[16] = { 0 }; @@ -1684,15 +1681,23 @@ static int sd_sync_cache(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr) return res; if (scsi_status_is_check_condition(res) && - scsi_sense_valid(sshdr)) { - sd_print_sense_hdr(sdkp, sshdr); + scsi_sense_valid(&sshdr)) { + sd_print_sense_hdr(sdkp, &sshdr); /* we need to evaluate the error return */ - if (sshdr->asc == 0x3a || /* medium not present */ - sshdr->asc == 0x20 || /* invalid command */ - (sshdr->asc == 0x74 && sshdr->ascq == 0x71)) /* drive is password locked */ + if (sshdr.asc == 0x3a || /* medium not present */ + sshdr.asc == 0x20 || /* invalid command */ + (sshdr.asc == 0x74 && sshdr.ascq == 0x71)) /* drive is password locked */ /* this is no error here */ return 0; + /* + * This drive doesn't support sync and there's not much + * we can do because this is called during shutdown + * or suspend so just return success so those operations + * can proceed. + */ + if (sshdr.sense_key == ILLEGAL_REQUEST) + return 0; } switch (host_byte(res)) { @@ -3847,7 +3852,7 @@ static void sd_shutdown(struct device *dev) if (sdkp->WCE && sdkp->media_present) { sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n"); - sd_sync_cache(sdkp, NULL); + sd_sync_cache(sdkp); } if ((system_state != SYSTEM_RESTART && @@ -3868,7 +3873,6 @@ static inline bool sd_do_start_stop(struct scsi_device *sdev, bool runtime) static int sd_suspend_common(struct device *dev, bool runtime) { struct scsi_disk *sdkp = dev_get_drvdata(dev); - struct scsi_sense_hdr sshdr; int ret = 0; if (!sdkp) /* E.g.: runtime suspend following sd_remove() */ @@ -3877,24 +3881,13 @@ static int sd_suspend_common(struct device *dev, bool runtime) if (sdkp->WCE && sdkp->media_present) { if (!sdkp->device->silence_suspend) sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n"); - ret = sd_sync_cache(sdkp, &sshdr); - - if (ret) { - /* ignore OFFLINE device */ - if (ret == -ENODEV) - return 0; - - if (!scsi_sense_valid(&sshdr) || - sshdr.sense_key != ILLEGAL_REQUEST) - return ret; + ret = sd_sync_cache(sdkp); + /* ignore OFFLINE device */ + if (ret == -ENODEV) + return 0; - /* - * sshdr.sense_key == ILLEGAL_REQUEST means this drive - * doesn't support sync. There's not much to do and - * suspend shouldn't fail. - */ - ret = 0; - } + if (ret) + return ret; } if (sd_do_start_stop(sdkp->device, runtime)) { From 53f408cad05bb987af860af22f4151e5a18e6ee8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Nov 2023 15:57:13 +0100 Subject: [PATCH 0779/1397] hrtimers: Push pending hrtimers away from outgoing CPU earlier [ Upstream commit 5c0930ccaad5a74d74e8b18b648c5eb21ed2fe94 ] 2b8272ff4a70 ("cpu/hotplug: Prevent self deadlock on CPU hot-unplug") solved the straight forward CPU hotplug deadlock vs. the scheduler bandwidth timer. Yu discovered a more involved variant where a task which has a bandwidth timer started on the outgoing CPU holds a lock and then gets throttled. If the lock required by one of the CPU hotplug callbacks the hotplug operation deadlocks because the unthrottling timer event is not handled on the dying CPU and can only be recovered once the control CPU reaches the hotplug state which pulls the pending hrtimers from the dead CPU. Solve this by pushing the hrtimers away from the dying CPU in the dying callbacks. Nothing can queue a hrtimer on the dying CPU at that point because all other CPUs spin in stop_machine() with interrupts disabled and once the operation is finished the CPU is marked offline. Reported-by: Yu Liao Signed-off-by: Thomas Gleixner Tested-by: Liu Tie Link: https://lore.kernel.org/r/87a5rphara.ffs@tglx Signed-off-by: Sasha Levin --- include/linux/cpuhotplug.h | 1 + include/linux/hrtimer.h | 4 ++-- kernel/cpu.c | 8 +++++++- kernel/time/hrtimer.c | 33 ++++++++++++--------------------- 4 files changed, 22 insertions(+), 24 deletions(-) diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 28c1d3d77b70..624d4a38c358 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -194,6 +194,7 @@ enum cpuhp_state { CPUHP_AP_ARM_CORESIGHT_CTI_STARTING, CPUHP_AP_ARM64_ISNDEP_STARTING, CPUHP_AP_SMPCFD_DYING, + CPUHP_AP_HRTIMERS_DYING, CPUHP_AP_X86_TBOOT_DYING, CPUHP_AP_ARM_CACHE_B15_RAC_DYING, CPUHP_AP_ONLINE, diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 0ee140176f10..f2044d5a652b 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -531,9 +531,9 @@ extern void sysrq_timer_list_show(void); int hrtimers_prepare_cpu(unsigned int cpu); #ifdef CONFIG_HOTPLUG_CPU -int hrtimers_dead_cpu(unsigned int cpu); +int hrtimers_cpu_dying(unsigned int cpu); #else -#define hrtimers_dead_cpu NULL +#define hrtimers_cpu_dying NULL #endif #endif diff --git a/kernel/cpu.c b/kernel/cpu.c index 303cb0591b4b..72e0f5380bf6 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2109,7 +2109,7 @@ static struct cpuhp_step cpuhp_hp_states[] = { [CPUHP_HRTIMERS_PREPARE] = { .name = "hrtimers:prepare", .startup.single = hrtimers_prepare_cpu, - .teardown.single = hrtimers_dead_cpu, + .teardown.single = NULL, }, [CPUHP_SMPCFD_PREPARE] = { .name = "smpcfd:prepare", @@ -2201,6 +2201,12 @@ static struct cpuhp_step cpuhp_hp_states[] = { .startup.single = NULL, .teardown.single = smpcfd_dying_cpu, }, + [CPUHP_AP_HRTIMERS_DYING] = { + .name = "hrtimers:dying", + .startup.single = NULL, + .teardown.single = hrtimers_cpu_dying, + }, + /* Entry state on starting. Interrupts enabled from here on. Transient * state for synchronsization */ [CPUHP_AP_ONLINE] = { diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 238262e4aba7..760793998cdd 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -2219,29 +2219,22 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, } } -int hrtimers_dead_cpu(unsigned int scpu) +int hrtimers_cpu_dying(unsigned int dying_cpu) { struct hrtimer_cpu_base *old_base, *new_base; - int i; + int i, ncpu = cpumask_first(cpu_active_mask); - BUG_ON(cpu_online(scpu)); - tick_cancel_sched_timer(scpu); + tick_cancel_sched_timer(dying_cpu); + + old_base = this_cpu_ptr(&hrtimer_bases); + new_base = &per_cpu(hrtimer_bases, ncpu); - /* - * this BH disable ensures that raise_softirq_irqoff() does - * not wakeup ksoftirqd (and acquire the pi-lock) while - * holding the cpu_base lock - */ - local_bh_disable(); - local_irq_disable(); - old_base = &per_cpu(hrtimer_bases, scpu); - new_base = this_cpu_ptr(&hrtimer_bases); /* * The caller is globally serialized and nobody else * takes two locks at once, deadlock is not possible. */ - raw_spin_lock(&new_base->lock); - raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); + raw_spin_lock(&old_base->lock); + raw_spin_lock_nested(&new_base->lock, SINGLE_DEPTH_NESTING); for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { migrate_hrtimer_list(&old_base->clock_base[i], @@ -2252,15 +2245,13 @@ int hrtimers_dead_cpu(unsigned int scpu) * The migration might have changed the first expiring softirq * timer on this CPU. Update it. */ - hrtimer_update_softirq_timer(new_base, false); + __hrtimer_get_next_event(new_base, HRTIMER_ACTIVE_SOFT); + /* Tell the other CPU to retrigger the next event */ + smp_call_function_single(ncpu, retrigger_next_event, NULL, 0); - raw_spin_unlock(&old_base->lock); raw_spin_unlock(&new_base->lock); + raw_spin_unlock(&old_base->lock); - /* Check, if we got expired work to do */ - __hrtimer_peek_ahead_timers(); - local_irq_enable(); - local_bh_enable(); return 0; } From f5d10651396c52b95dc1b6e9867385e2675cff53 Mon Sep 17 00:00:00 2001 From: Jan Bottorff Date: Thu, 9 Nov 2023 03:19:27 +0000 Subject: [PATCH 0780/1397] i2c: designware: Fix corrupted memory seen in the ISR [ Upstream commit f726eaa787e9f9bc858c902d18a09af6bcbfcdaf ] When running on a many core ARM64 server, errors were happening in the ISR that looked like corrupted memory. These corruptions would fix themselves if small delays were inserted in the ISR. Errors reported by the driver included "i2c_designware APMC0D0F:00: i2c_dw_xfer_msg: invalid target address" and "i2c_designware APMC0D0F:00:controller timed out" during in-band IPMI SSIF stress tests. The problem was determined to be memory writes in the driver were not becoming visible to all cores when execution rapidly shifted between cores, like when a register write immediately triggers an ISR. Processors with weak memory ordering, like ARM64, make no guarantees about the order normal memory writes become globally visible, unless barrier instructions are used to control ordering. To solve this, regmap accessor functions configured by this driver were changed to use non-relaxed forms of the low-level register access functions, which include a barrier on platforms that require it. This assures memory writes before a controller register access are visible to all cores. The community concluded defaulting to correct operation outweighed defaulting to the small performance gains from using relaxed access functions. Being a low speed device added weight to this choice of default register access behavior. Signed-off-by: Jan Bottorff Acked-by: Jarkko Nikula Tested-by: Serge Semin Reviewed-by: Serge Semin Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-designware-common.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/i2c/busses/i2c-designware-common.c b/drivers/i2c/busses/i2c-designware-common.c index affcfb243f0f..35f762872b8a 100644 --- a/drivers/i2c/busses/i2c-designware-common.c +++ b/drivers/i2c/busses/i2c-designware-common.c @@ -63,7 +63,7 @@ static int dw_reg_read(void *context, unsigned int reg, unsigned int *val) { struct dw_i2c_dev *dev = context; - *val = readl_relaxed(dev->base + reg); + *val = readl(dev->base + reg); return 0; } @@ -72,7 +72,7 @@ static int dw_reg_write(void *context, unsigned int reg, unsigned int val) { struct dw_i2c_dev *dev = context; - writel_relaxed(val, dev->base + reg); + writel(val, dev->base + reg); return 0; } @@ -81,7 +81,7 @@ static int dw_reg_read_swab(void *context, unsigned int reg, unsigned int *val) { struct dw_i2c_dev *dev = context; - *val = swab32(readl_relaxed(dev->base + reg)); + *val = swab32(readl(dev->base + reg)); return 0; } @@ -90,7 +90,7 @@ static int dw_reg_write_swab(void *context, unsigned int reg, unsigned int val) { struct dw_i2c_dev *dev = context; - writel_relaxed(swab32(val), dev->base + reg); + writel(swab32(val), dev->base + reg); return 0; } @@ -99,8 +99,8 @@ static int dw_reg_read_word(void *context, unsigned int reg, unsigned int *val) { struct dw_i2c_dev *dev = context; - *val = readw_relaxed(dev->base + reg) | - (readw_relaxed(dev->base + reg + 2) << 16); + *val = readw(dev->base + reg) | + (readw(dev->base + reg + 2) << 16); return 0; } @@ -109,8 +109,8 @@ static int dw_reg_write_word(void *context, unsigned int reg, unsigned int val) { struct dw_i2c_dev *dev = context; - writew_relaxed(val, dev->base + reg); - writew_relaxed(val >> 16, dev->base + reg + 2); + writew(val, dev->base + reg); + writew(val >> 16, dev->base + reg + 2); return 0; } From 22a59e51ded2d02dd146d140389e31796327ac63 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sun, 12 Nov 2023 18:32:45 -0800 Subject: [PATCH 0781/1397] i2c: ocores: Move system PM hooks to the NOIRQ phase [ Upstream commit 382561d16854a747e6df71034da08d20d6013dfe ] When an I2C device contains a wake IRQ subordinate to a regmap-irq chip, the regmap-irq code must be able to perform I2C transactions during suspend_device_irqs() and resume_device_irqs(). Therefore, the bus must be suspended/resumed during the NOIRQ phase. Signed-off-by: Samuel Holland Acked-by: Peter Korsgaard Reviewed-by: Andi Shyti Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-ocores.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-ocores.c b/drivers/i2c/busses/i2c-ocores.c index 041a76f71a49..e106af83cef4 100644 --- a/drivers/i2c/busses/i2c-ocores.c +++ b/drivers/i2c/busses/i2c-ocores.c @@ -771,8 +771,8 @@ static int ocores_i2c_resume(struct device *dev) return ocores_init(dev, i2c); } -static DEFINE_SIMPLE_DEV_PM_OPS(ocores_i2c_pm, - ocores_i2c_suspend, ocores_i2c_resume); +static DEFINE_NOIRQ_DEV_PM_OPS(ocores_i2c_pm, + ocores_i2c_suspend, ocores_i2c_resume); static struct platform_driver ocores_i2c_driver = { .probe = ocores_i2c_probe, From 23c31036f862582f98386120aee55c9ae23d7899 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 13 Nov 2023 21:13:23 +0100 Subject: [PATCH 0782/1397] netfilter: ipset: fix race condition between swap/destroy and kernel side add/del/test [ Upstream commit 28628fa952fefc7f2072ce6e8016968cc452b1ba ] Linkui Xiao reported that there's a race condition when ipset swap and destroy is called, which can lead to crash in add/del/test element operations. Swap then destroy are usual operations to replace a set with another one in a production system. The issue can in some cases be reproduced with the script: ipset create hash_ip1 hash:net family inet hashsize 1024 maxelem 1048576 ipset add hash_ip1 172.20.0.0/16 ipset add hash_ip1 192.168.0.0/16 iptables -A INPUT -m set --match-set hash_ip1 src -j ACCEPT while [ 1 ] do # ... Ongoing traffic... ipset create hash_ip2 hash:net family inet hashsize 1024 maxelem 1048576 ipset add hash_ip2 172.20.0.0/16 ipset swap hash_ip1 hash_ip2 ipset destroy hash_ip2 sleep 0.05 done In the race case the possible order of the operations are CPU0 CPU1 ip_set_test ipset swap hash_ip1 hash_ip2 ipset destroy hash_ip2 hash_net_kadt Swap replaces hash_ip1 with hash_ip2 and then destroy removes hash_ip2 which is the original hash_ip1. ip_set_test was called on hash_ip1 and because destroy removed it, hash_net_kadt crashes. The fix is to force ip_set_swap() to wait for all readers to finish accessing the old set pointers by calling synchronize_rcu(). The first version of the patch was written by Linkui Xiao . v2: synchronize_rcu() is moved into ip_set_swap() in order not to burden ip_set_destroy() unnecessarily when all sets are destroyed. v3: Florian Westphal pointed out that all netfilter hooks run with rcu_read_lock() held and em_ipset.c wraps the entire ip_set_test() in rcu read lock/unlock pair. So there's no need to extend the rcu read locked area in ipset itself. Closes: https://lore.kernel.org/all/69e7963b-e7f8-3ad0-210-7b86eebf7f78@netfilter.org/ Reported by: Linkui Xiao Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/ipset/ip_set_core.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 35d2f9c9ada0..4c133e06be1d 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -61,6 +61,8 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); ip_set_dereference((inst)->ip_set_list)[id] #define ip_set_ref_netlink(inst,id) \ rcu_dereference_raw((inst)->ip_set_list)[id] +#define ip_set_dereference_nfnl(p) \ + rcu_dereference_check(p, lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET)) /* The set types are implemented in modules and registered set types * can be found in ip_set_type_list. Adding/deleting types is @@ -708,15 +710,10 @@ __ip_set_put_netlink(struct ip_set *set) static struct ip_set * ip_set_rcu_get(struct net *net, ip_set_id_t index) { - struct ip_set *set; struct ip_set_net *inst = ip_set_pernet(net); - rcu_read_lock(); - /* ip_set_list itself needs to be protected */ - set = rcu_dereference(inst->ip_set_list)[index]; - rcu_read_unlock(); - - return set; + /* ip_set_list and the set pointer need to be protected */ + return ip_set_dereference_nfnl(inst->ip_set_list)[index]; } static inline void @@ -1397,6 +1394,9 @@ static int ip_set_swap(struct sk_buff *skb, const struct nfnl_info *info, ip_set(inst, to_id) = from; write_unlock_bh(&ip_set_ref_lock); + /* Make sure all readers of the old set pointers are completed. */ + synchronize_rcu(); + return 0; } From f5fb5ac7cee29cea9156e734fd652a66417d32fc Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 7 Nov 2023 15:32:55 +1000 Subject: [PATCH 0783/1397] nouveau: use an rwlock for the event lock. [ Upstream commit a2e36cd56041e277d7d81d35638fd8d9731e21f5 ] This allows it to break the following circular locking dependency. Aug 10 07:01:29 dg1test kernel: ====================================================== Aug 10 07:01:29 dg1test kernel: WARNING: possible circular locking dependency detected Aug 10 07:01:29 dg1test kernel: 6.4.0-rc7+ #10 Not tainted Aug 10 07:01:29 dg1test kernel: ------------------------------------------------------ Aug 10 07:01:29 dg1test kernel: wireplumber/2236 is trying to acquire lock: Aug 10 07:01:29 dg1test kernel: ffff8fca5320da18 (&fctx->lock){-...}-{2:2}, at: nouveau_fence_wait_uevent_handler+0x2b/0x100 [nouveau] Aug 10 07:01:29 dg1test kernel: but task is already holding lock: Aug 10 07:01:29 dg1test kernel: ffff8fca41208610 (&event->list_lock#2){-...}-{2:2}, at: nvkm_event_ntfy+0x50/0xf0 [nouveau] Aug 10 07:01:29 dg1test kernel: which lock already depends on the new lock. Aug 10 07:01:29 dg1test kernel: the existing dependency chain (in reverse order) is: Aug 10 07:01:29 dg1test kernel: -> #3 (&event->list_lock#2){-...}-{2:2}: Aug 10 07:01:29 dg1test kernel: _raw_spin_lock_irqsave+0x4b/0x70 Aug 10 07:01:29 dg1test kernel: nvkm_event_ntfy+0x50/0xf0 [nouveau] Aug 10 07:01:29 dg1test kernel: ga100_fifo_nonstall_intr+0x24/0x30 [nouveau] Aug 10 07:01:29 dg1test kernel: nvkm_intr+0x12c/0x240 [nouveau] Aug 10 07:01:29 dg1test kernel: __handle_irq_event_percpu+0x88/0x240 Aug 10 07:01:29 dg1test kernel: handle_irq_event+0x38/0x80 Aug 10 07:01:29 dg1test kernel: handle_edge_irq+0xa3/0x240 Aug 10 07:01:29 dg1test kernel: __common_interrupt+0x72/0x160 Aug 10 07:01:29 dg1test kernel: common_interrupt+0x60/0xe0 Aug 10 07:01:29 dg1test kernel: asm_common_interrupt+0x26/0x40 Aug 10 07:01:29 dg1test kernel: -> #2 (&device->intr.lock){-...}-{2:2}: Aug 10 07:01:29 dg1test kernel: _raw_spin_lock_irqsave+0x4b/0x70 Aug 10 07:01:29 dg1test kernel: nvkm_inth_allow+0x2c/0x80 [nouveau] Aug 10 07:01:29 dg1test kernel: nvkm_event_ntfy_state+0x181/0x250 [nouveau] Aug 10 07:01:29 dg1test kernel: nvkm_event_ntfy_allow+0x63/0xd0 [nouveau] Aug 10 07:01:29 dg1test kernel: nvkm_uevent_mthd+0x4d/0x70 [nouveau] Aug 10 07:01:29 dg1test kernel: nvkm_ioctl+0x10b/0x250 [nouveau] Aug 10 07:01:29 dg1test kernel: nvif_object_mthd+0xa8/0x1f0 [nouveau] Aug 10 07:01:29 dg1test kernel: nvif_event_allow+0x2a/0xa0 [nouveau] Aug 10 07:01:29 dg1test kernel: nouveau_fence_enable_signaling+0x78/0x80 [nouveau] Aug 10 07:01:29 dg1test kernel: __dma_fence_enable_signaling+0x5e/0x100 Aug 10 07:01:29 dg1test kernel: dma_fence_add_callback+0x4b/0xd0 Aug 10 07:01:29 dg1test kernel: nouveau_cli_work_queue+0xae/0x110 [nouveau] Aug 10 07:01:29 dg1test kernel: nouveau_gem_object_close+0x1d1/0x2a0 [nouveau] Aug 10 07:01:29 dg1test kernel: drm_gem_handle_delete+0x70/0xe0 [drm] Aug 10 07:01:29 dg1test kernel: drm_ioctl_kernel+0xa5/0x150 [drm] Aug 10 07:01:29 dg1test kernel: drm_ioctl+0x256/0x490 [drm] Aug 10 07:01:29 dg1test kernel: nouveau_drm_ioctl+0x5a/0xb0 [nouveau] Aug 10 07:01:29 dg1test kernel: __x64_sys_ioctl+0x91/0xd0 Aug 10 07:01:29 dg1test kernel: do_syscall_64+0x3c/0x90 Aug 10 07:01:29 dg1test kernel: entry_SYSCALL_64_after_hwframe+0x72/0xdc Aug 10 07:01:29 dg1test kernel: -> #1 (&event->refs_lock#4){....}-{2:2}: Aug 10 07:01:29 dg1test kernel: _raw_spin_lock_irqsave+0x4b/0x70 Aug 10 07:01:29 dg1test kernel: nvkm_event_ntfy_state+0x37/0x250 [nouveau] Aug 10 07:01:29 dg1test kernel: nvkm_event_ntfy_allow+0x63/0xd0 [nouveau] Aug 10 07:01:29 dg1test kernel: nvkm_uevent_mthd+0x4d/0x70 [nouveau] Aug 10 07:01:29 dg1test kernel: nvkm_ioctl+0x10b/0x250 [nouveau] Aug 10 07:01:29 dg1test kernel: nvif_object_mthd+0xa8/0x1f0 [nouveau] Aug 10 07:01:29 dg1test kernel: nvif_event_allow+0x2a/0xa0 [nouveau] Aug 10 07:01:29 dg1test kernel: nouveau_fence_enable_signaling+0x78/0x80 [nouveau] Aug 10 07:01:29 dg1test kernel: __dma_fence_enable_signaling+0x5e/0x100 Aug 10 07:01:29 dg1test kernel: dma_fence_add_callback+0x4b/0xd0 Aug 10 07:01:29 dg1test kernel: nouveau_cli_work_queue+0xae/0x110 [nouveau] Aug 10 07:01:29 dg1test kernel: nouveau_gem_object_close+0x1d1/0x2a0 [nouveau] Aug 10 07:01:29 dg1test kernel: drm_gem_handle_delete+0x70/0xe0 [drm] Aug 10 07:01:29 dg1test kernel: drm_ioctl_kernel+0xa5/0x150 [drm] Aug 10 07:01:29 dg1test kernel: drm_ioctl+0x256/0x490 [drm] Aug 10 07:01:29 dg1test kernel: nouveau_drm_ioctl+0x5a/0xb0 [nouveau] Aug 10 07:01:29 dg1test kernel: __x64_sys_ioctl+0x91/0xd0 Aug 10 07:01:29 dg1test kernel: do_syscall_64+0x3c/0x90 Aug 10 07:01:29 dg1test kernel: entry_SYSCALL_64_after_hwframe+0x72/0xdc Aug 10 07:01:29 dg1test kernel: -> #0 (&fctx->lock){-...}-{2:2}: Aug 10 07:01:29 dg1test kernel: __lock_acquire+0x14e3/0x2240 Aug 10 07:01:29 dg1test kernel: lock_acquire+0xc8/0x2a0 Aug 10 07:01:29 dg1test kernel: _raw_spin_lock_irqsave+0x4b/0x70 Aug 10 07:01:29 dg1test kernel: nouveau_fence_wait_uevent_handler+0x2b/0x100 [nouveau] Aug 10 07:01:29 dg1test kernel: nvkm_client_event+0xf/0x20 [nouveau] Aug 10 07:01:29 dg1test kernel: nvkm_event_ntfy+0x9b/0xf0 [nouveau] Aug 10 07:01:29 dg1test kernel: ga100_fifo_nonstall_intr+0x24/0x30 [nouveau] Aug 10 07:01:29 dg1test kernel: nvkm_intr+0x12c/0x240 [nouveau] Aug 10 07:01:29 dg1test kernel: __handle_irq_event_percpu+0x88/0x240 Aug 10 07:01:29 dg1test kernel: handle_irq_event+0x38/0x80 Aug 10 07:01:29 dg1test kernel: handle_edge_irq+0xa3/0x240 Aug 10 07:01:29 dg1test kernel: __common_interrupt+0x72/0x160 Aug 10 07:01:29 dg1test kernel: common_interrupt+0x60/0xe0 Aug 10 07:01:29 dg1test kernel: asm_common_interrupt+0x26/0x40 Aug 10 07:01:29 dg1test kernel: other info that might help us debug this: Aug 10 07:01:29 dg1test kernel: Chain exists of: &fctx->lock --> &device->intr.lock --> &event->list_lock#2 Aug 10 07:01:29 dg1test kernel: Possible unsafe locking scenario: Aug 10 07:01:29 dg1test kernel: CPU0 CPU1 Aug 10 07:01:29 dg1test kernel: ---- ---- Aug 10 07:01:29 dg1test kernel: lock(&event->list_lock#2); Aug 10 07:01:29 dg1test kernel: lock(&device->intr.lock); Aug 10 07:01:29 dg1test kernel: lock(&event->list_lock#2); Aug 10 07:01:29 dg1test kernel: lock(&fctx->lock); Aug 10 07:01:29 dg1test kernel: *** DEADLOCK *** Aug 10 07:01:29 dg1test kernel: 2 locks held by wireplumber/2236: Aug 10 07:01:29 dg1test kernel: #0: ffff8fca53177bf8 (&device->intr.lock){-...}-{2:2}, at: nvkm_intr+0x29/0x240 [nouveau] Aug 10 07:01:29 dg1test kernel: #1: ffff8fca41208610 (&event->list_lock#2){-...}-{2:2}, at: nvkm_event_ntfy+0x50/0xf0 [nouveau] Aug 10 07:01:29 dg1test kernel: stack backtrace: Aug 10 07:01:29 dg1test kernel: CPU: 6 PID: 2236 Comm: wireplumber Not tainted 6.4.0-rc7+ #10 Aug 10 07:01:29 dg1test kernel: Hardware name: Gigabyte Technology Co., Ltd. Z390 I AORUS PRO WIFI/Z390 I AORUS PRO WIFI-CF, BIOS F8 11/05/2021 Aug 10 07:01:29 dg1test kernel: Call Trace: Aug 10 07:01:29 dg1test kernel: Aug 10 07:01:29 dg1test kernel: dump_stack_lvl+0x5b/0x90 Aug 10 07:01:29 dg1test kernel: check_noncircular+0xe2/0x110 Aug 10 07:01:29 dg1test kernel: __lock_acquire+0x14e3/0x2240 Aug 10 07:01:29 dg1test kernel: lock_acquire+0xc8/0x2a0 Aug 10 07:01:29 dg1test kernel: ? nouveau_fence_wait_uevent_handler+0x2b/0x100 [nouveau] Aug 10 07:01:29 dg1test kernel: ? lock_acquire+0xc8/0x2a0 Aug 10 07:01:29 dg1test kernel: _raw_spin_lock_irqsave+0x4b/0x70 Aug 10 07:01:29 dg1test kernel: ? nouveau_fence_wait_uevent_handler+0x2b/0x100 [nouveau] Aug 10 07:01:29 dg1test kernel: nouveau_fence_wait_uevent_handler+0x2b/0x100 [nouveau] Aug 10 07:01:29 dg1test kernel: nvkm_client_event+0xf/0x20 [nouveau] Aug 10 07:01:29 dg1test kernel: nvkm_event_ntfy+0x9b/0xf0 [nouveau] Aug 10 07:01:29 dg1test kernel: ga100_fifo_nonstall_intr+0x24/0x30 [nouveau] Aug 10 07:01:29 dg1test kernel: nvkm_intr+0x12c/0x240 [nouveau] Aug 10 07:01:29 dg1test kernel: __handle_irq_event_percpu+0x88/0x240 Aug 10 07:01:29 dg1test kernel: handle_irq_event+0x38/0x80 Aug 10 07:01:29 dg1test kernel: handle_edge_irq+0xa3/0x240 Aug 10 07:01:29 dg1test kernel: __common_interrupt+0x72/0x160 Aug 10 07:01:29 dg1test kernel: common_interrupt+0x60/0xe0 Aug 10 07:01:29 dg1test kernel: asm_common_interrupt+0x26/0x40 Aug 10 07:01:29 dg1test kernel: RIP: 0033:0x7fb66174d700 Aug 10 07:01:29 dg1test kernel: Code: c1 e2 05 29 ca 8d 0c 10 0f be 07 84 c0 75 eb 89 c8 c3 0f 1f 84 00 00 00 00 00 f3 0f 1e fa e9 d7 0f fc ff 0f 1f 80 00 00 00 00 0f 1e fa e9 c7 0f fc> Aug 10 07:01:29 dg1test kernel: RSP: 002b:00007ffdd3c48438 EFLAGS: 00000206 Aug 10 07:01:29 dg1test kernel: RAX: 000055bb758763c0 RBX: 000055bb758752c0 RCX: 00000000000028b0 Aug 10 07:01:29 dg1test kernel: RDX: 000055bb758752c0 RSI: 000055bb75887490 RDI: 000055bb75862950 Aug 10 07:01:29 dg1test kernel: RBP: 00007ffdd3c48490 R08: 000055bb75873b10 R09: 0000000000000001 Aug 10 07:01:29 dg1test kernel: R10: 0000000000000004 R11: 000055bb7587f000 R12: 000055bb75887490 Aug 10 07:01:29 dg1test kernel: R13: 000055bb757f6280 R14: 000055bb758875c0 R15: 000055bb757f6280 Aug 10 07:01:29 dg1test kernel: Signed-off-by: Dave Airlie Tested-by: Danilo Krummrich Reviewed-by: Danilo Krummrich Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20231107053255.2257079-1-airlied@gmail.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/nouveau/include/nvkm/core/event.h | 4 ++-- drivers/gpu/drm/nouveau/nvkm/core/event.c | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/event.h b/drivers/gpu/drm/nouveau/include/nvkm/core/event.h index 82b267c11147..460459af272d 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/event.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/event.h @@ -14,7 +14,7 @@ struct nvkm_event { int index_nr; spinlock_t refs_lock; - spinlock_t list_lock; + rwlock_t list_lock; int *refs; struct list_head ntfy; @@ -38,7 +38,7 @@ nvkm_event_init(const struct nvkm_event_func *func, struct nvkm_subdev *subdev, int types_nr, int index_nr, struct nvkm_event *event) { spin_lock_init(&event->refs_lock); - spin_lock_init(&event->list_lock); + rwlock_init(&event->list_lock); return __nvkm_event_init(func, subdev, types_nr, index_nr, event); } diff --git a/drivers/gpu/drm/nouveau/nvkm/core/event.c b/drivers/gpu/drm/nouveau/nvkm/core/event.c index a6c877135598..61fed7792e41 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/event.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/event.c @@ -81,17 +81,17 @@ nvkm_event_ntfy_state(struct nvkm_event_ntfy *ntfy) static void nvkm_event_ntfy_remove(struct nvkm_event_ntfy *ntfy) { - spin_lock_irq(&ntfy->event->list_lock); + write_lock_irq(&ntfy->event->list_lock); list_del_init(&ntfy->head); - spin_unlock_irq(&ntfy->event->list_lock); + write_unlock_irq(&ntfy->event->list_lock); } static void nvkm_event_ntfy_insert(struct nvkm_event_ntfy *ntfy) { - spin_lock_irq(&ntfy->event->list_lock); + write_lock_irq(&ntfy->event->list_lock); list_add_tail(&ntfy->head, &ntfy->event->ntfy); - spin_unlock_irq(&ntfy->event->list_lock); + write_unlock_irq(&ntfy->event->list_lock); } static void @@ -176,7 +176,7 @@ nvkm_event_ntfy(struct nvkm_event *event, int id, u32 bits) return; nvkm_trace(event->subdev, "event: ntfy %08x on %d\n", bits, id); - spin_lock_irqsave(&event->list_lock, flags); + read_lock_irqsave(&event->list_lock, flags); list_for_each_entry_safe(ntfy, ntmp, &event->ntfy, head) { if (ntfy->id == id && ntfy->bits & bits) { @@ -185,7 +185,7 @@ nvkm_event_ntfy(struct nvkm_event *event, int id, u32 bits) } } - spin_unlock_irqrestore(&event->list_lock, flags); + read_unlock_irqrestore(&event->list_lock, flags); } void From 90ed718d6a8e40ea58573f9fd90c8ebda20ca58d Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Thu, 12 Oct 2023 12:55:34 -0700 Subject: [PATCH 0784/1397] zstd: Fix array-index-out-of-bounds UBSAN warning [ Upstream commit 77618db346455129424fadbbaec596a09feaf3bb ] Zstd used an array of length 1 to mean a flexible array for C89 compatibility. Switch to a C99 flexible array to fix the UBSAN warning. Tested locally by booting the kernel and writing to and reading from a BtrFS filesystem with zstd compression enabled. I was unable to reproduce the issue before the fix, however it is a trivial change. Link: https://lkml.kernel.org/r/20231012213428.1390905-1-nickrterrell@gmail.com Reported-by: syzbot+1f2eb3e8cd123ffce499@syzkaller.appspotmail.com Reported-by: Eric Biggers Reported-by: Kees Cook Signed-off-by: Nick Terrell Reviewed-by: Kees Cook Signed-off-by: Sasha Levin --- lib/zstd/common/fse_decompress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/zstd/common/fse_decompress.c b/lib/zstd/common/fse_decompress.c index a0d06095be83..8dcb8ca39767 100644 --- a/lib/zstd/common/fse_decompress.c +++ b/lib/zstd/common/fse_decompress.c @@ -312,7 +312,7 @@ size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size typedef struct { short ncount[FSE_MAX_SYMBOL_VALUE + 1]; - FSE_DTable dtable[1]; /* Dynamically sized */ + FSE_DTable dtable[]; /* Dynamically sized */ } FSE_DecompressWksp; From 763a1b3177d9cb44e17000473a83a984271f0688 Mon Sep 17 00:00:00 2001 From: Alex Pakhunov Date: Mon, 13 Nov 2023 10:23:49 -0800 Subject: [PATCH 0785/1397] tg3: Move the [rt]x_dropped counters to tg3_napi [ Upstream commit 907d1bdb8b2cc0357d03a1c34d2a08d9943760b1 ] This change moves [rt]x_dropped counters to tg3_napi so that they can be updated by a single writer, race-free. Signed-off-by: Alex Pakhunov Signed-off-by: Vincent Wong Reviewed-by: Michael Chan Link: https://lore.kernel.org/r/20231113182350.37472-1-alexey.pakhunov@spacex.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/tg3.c | 38 +++++++++++++++++++++++++---- drivers/net/ethernet/broadcom/tg3.h | 4 +-- 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 22b00912f7ac..5c18ad10efc3 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -6845,7 +6845,7 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget) desc_idx, *post_ptr); drop_it_no_recycle: /* Other statistics kept track of by card. */ - tp->rx_dropped++; + tnapi->rx_dropped++; goto next_pkt; } @@ -8146,7 +8146,7 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) drop: dev_kfree_skb_any(skb); drop_nofree: - tp->tx_dropped++; + tnapi->tx_dropped++; return NETDEV_TX_OK; } @@ -9325,7 +9325,7 @@ static void __tg3_set_rx_mode(struct net_device *); /* tp->lock is held. */ static int tg3_halt(struct tg3 *tp, int kind, bool silent) { - int err; + int err, i; tg3_stop_fw(tp); @@ -9346,6 +9346,13 @@ static int tg3_halt(struct tg3 *tp, int kind, bool silent) /* And make sure the next sample is new data */ memset(tp->hw_stats, 0, sizeof(struct tg3_hw_stats)); + + for (i = 0; i < TG3_IRQ_MAX_VECS; ++i) { + struct tg3_napi *tnapi = &tp->napi[i]; + + tnapi->rx_dropped = 0; + tnapi->tx_dropped = 0; + } } return err; @@ -11895,6 +11902,9 @@ static void tg3_get_nstats(struct tg3 *tp, struct rtnl_link_stats64 *stats) { struct rtnl_link_stats64 *old_stats = &tp->net_stats_prev; struct tg3_hw_stats *hw_stats = tp->hw_stats; + unsigned long rx_dropped; + unsigned long tx_dropped; + int i; stats->rx_packets = old_stats->rx_packets + get_stat64(&hw_stats->rx_ucast_packets) + @@ -11941,8 +11951,26 @@ static void tg3_get_nstats(struct tg3 *tp, struct rtnl_link_stats64 *stats) stats->rx_missed_errors = old_stats->rx_missed_errors + get_stat64(&hw_stats->rx_discards); - stats->rx_dropped = tp->rx_dropped; - stats->tx_dropped = tp->tx_dropped; + /* Aggregate per-queue counters. The per-queue counters are updated + * by a single writer, race-free. The result computed by this loop + * might not be 100% accurate (counters can be updated in the middle of + * the loop) but the next tg3_get_nstats() will recompute the current + * value so it is acceptable. + * + * Note that these counters wrap around at 4G on 32bit machines. + */ + rx_dropped = (unsigned long)(old_stats->rx_dropped); + tx_dropped = (unsigned long)(old_stats->tx_dropped); + + for (i = 0; i < tp->irq_cnt; i++) { + struct tg3_napi *tnapi = &tp->napi[i]; + + rx_dropped += tnapi->rx_dropped; + tx_dropped += tnapi->tx_dropped; + } + + stats->rx_dropped = rx_dropped; + stats->tx_dropped = tx_dropped; } static int tg3_get_regs_len(struct net_device *dev) diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h index 1000c894064f..8d753f8c5b06 100644 --- a/drivers/net/ethernet/broadcom/tg3.h +++ b/drivers/net/ethernet/broadcom/tg3.h @@ -3018,6 +3018,7 @@ struct tg3_napi { u16 *rx_rcb_prod_idx; struct tg3_rx_prodring_set prodring; struct tg3_rx_buffer_desc *rx_rcb; + unsigned long rx_dropped; u32 tx_prod ____cacheline_aligned; u32 tx_cons; @@ -3026,6 +3027,7 @@ struct tg3_napi { u32 prodmbox; struct tg3_tx_buffer_desc *tx_ring; struct tg3_tx_ring_info *tx_buffers; + unsigned long tx_dropped; dma_addr_t status_mapping; dma_addr_t rx_rcb_mapping; @@ -3219,8 +3221,6 @@ struct tg3 { /* begin "everything else" cacheline(s) section */ - unsigned long rx_dropped; - unsigned long tx_dropped; struct rtnl_link_stats64 net_stats_prev; struct tg3_ethtool_stats estats_prev; From 9a07b662e54ed0c8a795d38f42c731ea58a9f09d Mon Sep 17 00:00:00 2001 From: Alex Pakhunov Date: Mon, 13 Nov 2023 10:23:50 -0800 Subject: [PATCH 0786/1397] tg3: Increment tx_dropped in tg3_tso_bug() [ Upstream commit 17dd5efe5f36a96bd78012594fabe21efb01186b ] tg3_tso_bug() drops a packet if it cannot be segmented for any reason. The number of discarded frames should be incremented accordingly. Signed-off-by: Alex Pakhunov Signed-off-by: Vincent Wong Reviewed-by: Pavan Chebbi Link: https://lore.kernel.org/r/20231113182350.37472-2-alexey.pakhunov@spacex.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/tg3.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 5c18ad10efc3..b7acd994a393 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -7874,8 +7874,10 @@ static int tg3_tso_bug(struct tg3 *tp, struct tg3_napi *tnapi, segs = skb_gso_segment(skb, tp->dev->features & ~(NETIF_F_TSO | NETIF_F_TSO6)); - if (IS_ERR(segs) || !segs) + if (IS_ERR(segs) || !segs) { + tnapi->tx_dropped++; goto tg3_tso_bug_end; + } skb_list_walk_safe(segs, seg, next) { skb_mark_not_on_list(seg); From b1205cc72b47765dbfd40ca6c0cfb33da0274008 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 1 Nov 2023 02:46:27 +0900 Subject: [PATCH 0787/1397] modpost: fix section mismatch message for RELA [ Upstream commit 1c4a7587d1bbee0fd53b63af60e4244a62775f57 ] The section mismatch check prints a bogus symbol name on some architectures. [test code] #include int __initdata foo; int get_foo(void) { return foo; } If you compile it with GCC for riscv or loongarch, modpost will show an incorrect symbol name: WARNING: modpost: vmlinux: section mismatch in reference: get_foo+0x8 (section: .text) -> done (section: .init.data) To get the correct symbol address, the st_value must be added. This issue has never been noticed since commit 93684d3b8062 ("kbuild: include symbol names in section mismatch warnings") presumably because st_value becomes zero on most architectures when the referenced symbol is looked up. It is not true for riscv or loongarch, at least. With this fix, modpost will show the correct symbol name: WARNING: modpost: vmlinux: section mismatch in reference: get_foo+0x8 (section: .text) -> foo (section: .init.data) Signed-off-by: Masahiro Yamada Reviewed-by: Nick Desaulniers Signed-off-by: Sasha Levin --- scripts/mod/modpost.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index b3dee80497cb..ac4ef3e206bb 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1496,13 +1496,15 @@ static void section_rela(struct module *mod, struct elf_info *elf, return; for (rela = start; rela < stop; rela++) { + Elf_Sym *tsym; Elf_Addr taddr, r_offset; unsigned int r_type, r_sym; r_offset = TO_NATIVE(rela->r_offset); get_rel_type_and_sym(elf, rela->r_info, &r_type, &r_sym); - taddr = TO_NATIVE(rela->r_addend); + tsym = elf->symtab_start + r_sym; + taddr = tsym->st_value + TO_NATIVE(rela->r_addend); switch (elf->hdr->e_machine) { case EM_RISCV: @@ -1517,7 +1519,7 @@ static void section_rela(struct module *mod, struct elf_info *elf, break; } - check_section_mismatch(mod, elf, elf->symtab_start + r_sym, + check_section_mismatch(mod, elf, tsym, fsecndx, fromsec, r_offset, taddr); } } From b97debd07a8da5b2d47d5c77aad9b7d4ea53942a Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 15 Nov 2023 13:16:53 +0900 Subject: [PATCH 0788/1397] kconfig: fix memory leak from range properties [ Upstream commit ae1eff0349f2e908fc083630e8441ea6dc434dc0 ] Currently, sym_validate_range() duplicates the range string using xstrdup(), which is overwritten by a subsequent sym_calc_value() call. It results in a memory leak. Instead, only the pointer should be copied. Below is a test case, with a summary from Valgrind. [Test Kconfig] config FOO int "foo" range 10 20 [Test .config] CONFIG_FOO=0 [Before] LEAK SUMMARY: definitely lost: 3 bytes in 1 blocks indirectly lost: 0 bytes in 0 blocks possibly lost: 0 bytes in 0 blocks still reachable: 17,465 bytes in 21 blocks suppressed: 0 bytes in 0 blocks [After] LEAK SUMMARY: definitely lost: 0 bytes in 0 blocks indirectly lost: 0 bytes in 0 blocks possibly lost: 0 bytes in 0 blocks still reachable: 17,462 bytes in 20 blocks suppressed: 0 bytes in 0 blocks Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- scripts/kconfig/symbol.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c index 0572330bf8a7..a76925b46ce6 100644 --- a/scripts/kconfig/symbol.c +++ b/scripts/kconfig/symbol.c @@ -122,9 +122,9 @@ static long long sym_get_range_val(struct symbol *sym, int base) static void sym_validate_range(struct symbol *sym) { struct property *prop; + struct symbol *range_sym; int base; long long val, val2; - char str[64]; switch (sym->type) { case S_INT: @@ -140,17 +140,15 @@ static void sym_validate_range(struct symbol *sym) if (!prop) return; val = strtoll(sym->curr.val, NULL, base); - val2 = sym_get_range_val(prop->expr->left.sym, base); + range_sym = prop->expr->left.sym; + val2 = sym_get_range_val(range_sym, base); if (val >= val2) { - val2 = sym_get_range_val(prop->expr->right.sym, base); + range_sym = prop->expr->right.sym; + val2 = sym_get_range_val(range_sym, base); if (val <= val2) return; } - if (sym->type == S_INT) - sprintf(str, "%lld", val2); - else - sprintf(str, "0x%llx", val2); - sym->curr.val = xstrdup(str); + sym->curr.val = range_sym->curr.val; } static void sym_set_changed(struct symbol *sym) From 3a56e61b57791f2e4db66a80b723b8515ea5880f Mon Sep 17 00:00:00 2001 From: Victor Lu Date: Wed, 4 Oct 2023 14:24:15 -0400 Subject: [PATCH 0789/1397] drm/amdgpu: Do not program VF copy regs in mmhub v1.8 under SRIOV (v2) [ Upstream commit 0288603040c38ccfeb5342f34a52673366d90038 ] MC_VM_AGP_* registers should not be programmed by guest driver. v2: move early return outside of loop Signed-off-by: Victor Lu Reviewed-by: Samir Dhume Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index 784c4e077470..3d8e579d5c4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -130,6 +130,9 @@ static void mmhub_v1_8_init_system_aperture_regs(struct amdgpu_device *adev) uint64_t value; int i; + if (amdgpu_sriov_vf(adev)) + return; + inst_mask = adev->aid_mask; for_each_inst(i, inst_mask) { /* Program the AGP BAR */ @@ -139,9 +142,6 @@ static void mmhub_v1_8_init_system_aperture_regs(struct amdgpu_device *adev) WREG32_SOC15(MMHUB, i, regMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); - if (amdgpu_sriov_vf(adev)) - return; - /* Program the system aperture low logical page number. */ WREG32_SOC15(MMHUB, i, regMC_VM_SYSTEM_APERTURE_LOW_ADDR, min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); From a8f5cd042305f18f67db0be7732c6702798073e6 Mon Sep 17 00:00:00 2001 From: Le Ma Date: Mon, 13 Nov 2023 18:05:34 +0800 Subject: [PATCH 0790/1397] drm/amdgpu: finalizing mem_partitions at the end of GMC v9 sw_fini [ Upstream commit bdb72185d310fc8049c7ea95221d640e9e7165e5 ] The valid num_mem_partitions is required during ttm pool fini, thus move the cleanup at the end of the function. Signed-off-by: Le Ma Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index f9a5a2c0573e..89550d3df68d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -2220,8 +2220,6 @@ static int gmc_v9_0_sw_fini(void *handle) if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) amdgpu_gmc_sysfs_fini(adev); - adev->gmc.num_mem_partitions = 0; - kfree(adev->gmc.mem_partitions); amdgpu_gmc_ras_fini(adev); amdgpu_gem_force_release(adev); @@ -2235,6 +2233,9 @@ static int gmc_v9_0_sw_fini(void *handle) amdgpu_bo_free_kernel(&adev->gmc.pdb0_bo, NULL, &adev->gmc.ptr_pdb0); amdgpu_bo_fini(adev); + adev->gmc.num_mem_partitions = 0; + kfree(adev->gmc.mem_partitions); + return 0; } From 915864d18641ff263efdef887d699b9103ee0878 Mon Sep 17 00:00:00 2001 From: YuanShang Date: Tue, 31 Oct 2023 10:32:37 +0800 Subject: [PATCH 0791/1397] drm/amdgpu: correct chunk_ptr to a pointer to chunk. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 50d51374b498457c4dea26779d32ccfed12ddaff ] The variable "chunk_ptr" should be a pointer pointing to a struct drm_amdgpu_cs_chunk instead of to a pointer of that. Signed-off-by: YuanShang Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index f4fd0d5bd9b6..c0a3afe81bb1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -207,7 +207,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, } for (i = 0; i < p->nchunks; i++) { - struct drm_amdgpu_cs_chunk __user **chunk_ptr = NULL; + struct drm_amdgpu_cs_chunk __user *chunk_ptr = NULL; struct drm_amdgpu_cs_chunk user_chunk; uint32_t __user *cdata; From dfa1898cef4c33fefb402d3aec1cca8df146e34c Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 17 Nov 2023 18:38:33 +0100 Subject: [PATCH 0792/1397] dm-crypt: start allocating with MAX_ORDER [ Upstream commit 13648e04a9b831b3dfa5cf3887dfa6cf8fe5fe69 ] Commit 23baf831a32c ("mm, treewide: redefine MAX_ORDER sanely") changed the meaning of MAX_ORDER from exclusive to inclusive. So, we can allocate compound pages with up to 1 << MAX_ORDER pages. Reflect this change in dm-crypt and start trying to allocate compound pages with MAX_ORDER. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin --- drivers/md/dm-crypt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index cef9353370b2..17ffbf7fbe73 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1679,7 +1679,7 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned int size) unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; gfp_t gfp_mask = GFP_NOWAIT | __GFP_HIGHMEM; unsigned int remaining_size; - unsigned int order = MAX_ORDER - 1; + unsigned int order = MAX_ORDER; retry: if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM)) From f259af26ee04f11cb2c3af0a6873a6bcb67dd338 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Fri, 23 Jun 2023 14:14:04 +0300 Subject: [PATCH 0793/1397] x86: Introduce ia32_enabled() [ upstream commit 1da5c9bc119d3a749b519596b93f9b2667e93c4a ] IA32 support on 64bit kernels depends on whether CONFIG_IA32_EMULATION is selected or not. As it is a compile time option it doesn't provide the flexibility to have distributions set their own policy for IA32 support and give the user the flexibility to override it. As a first step introduce ia32_enabled() which abstracts whether IA32 compat is turned on or off. Upcoming patches will implement the ability to set IA32 compat state at boot time. Signed-off-by: Nikolay Borisov Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230623111409.3047467-2-nik.borisov@suse.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/common.c | 4 ++++ arch/x86/include/asm/ia32.h | 16 +++++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 93c60c0c9d4a..4cec38985414 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -96,6 +96,10 @@ static __always_inline int syscall_32_enter(struct pt_regs *regs) return (int)regs->orig_ax; } +#ifdef CONFIG_IA32_EMULATION +bool __ia32_enabled __ro_after_init = true; +#endif + /* * Invoke a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL. */ diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h index fada857f0a1e..5a2ae24b1204 100644 --- a/arch/x86/include/asm/ia32.h +++ b/arch/x86/include/asm/ia32.h @@ -68,6 +68,20 @@ extern void ia32_pick_mmap_layout(struct mm_struct *mm); #endif -#endif /* CONFIG_IA32_EMULATION */ +extern bool __ia32_enabled; + +static inline bool ia32_enabled(void) +{ + return __ia32_enabled; +} + +#else /* !CONFIG_IA32_EMULATION */ + +static inline bool ia32_enabled(void) +{ + return IS_ENABLED(CONFIG_X86_32); +} + +#endif #endif /* _ASM_X86_IA32_H */ From 34c686e5be2fa1c03ae09568159a9ef37d1c7cf5 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 4 Dec 2023 11:31:38 +0300 Subject: [PATCH 0794/1397] x86/coco: Disable 32-bit emulation by default on TDX and SEV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ upstream commit b82a8dbd3d2f4563156f7150c6f2ecab6e960b30 ] The INT 0x80 instruction is used for 32-bit x86 Linux syscalls. The kernel expects to receive a software interrupt as a result of the INT 0x80 instruction. However, an external interrupt on the same vector triggers the same handler. The kernel interprets an external interrupt on vector 0x80 as a 32-bit system call that came from userspace. A VMM can inject external interrupts on any arbitrary vector at any time. This remains true even for TDX and SEV guests where the VMM is untrusted. Put together, this allows an untrusted VMM to trigger int80 syscall handling at any given point. The content of the guest register file at that moment defines what syscall is triggered and its arguments. It opens the guest OS to manipulation from the VMM side. Disable 32-bit emulation by default for TDX and SEV. User can override it with the ia32_emulation=y command line option. [ dhansen: reword the changelog ] Reported-by: Supraja Sridhara Reported-by: Benedict Schlüter Reported-by: Mark Kuhne Reported-by: Andrin Bertschi Reported-by: Shweta Shinde Signed-off-by: Kirill A. Shutemov Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Reviewed-by: Borislav Petkov (AMD) Cc: # 6.0+: 1da5c9b x86: Introduce ia32_enabled() Cc: # 6.0+ Signed-off-by: Greg Kroah-Hartman --- arch/x86/coco/tdx/tdx.c | 10 ++++++++++ arch/x86/include/asm/ia32.h | 7 +++++++ arch/x86/mm/mem_encrypt_amd.c | 11 +++++++++++ 3 files changed, 28 insertions(+) diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 1d6b863c42b0..70d708cae961 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -820,5 +821,14 @@ void __init tdx_early_init(void) */ x86_cpuinit.parallel_bringup = false; + /* + * The VMM is capable of injecting interrupt 0x80 and triggering the + * compatibility syscall path. + * + * By default, the 32-bit emulation is disabled in order to ensure + * the safety of the VM. + */ + ia32_disable(); + pr_info("Guest detected\n"); } diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h index 5a2ae24b1204..9805629479d9 100644 --- a/arch/x86/include/asm/ia32.h +++ b/arch/x86/include/asm/ia32.h @@ -75,6 +75,11 @@ static inline bool ia32_enabled(void) return __ia32_enabled; } +static inline void ia32_disable(void) +{ + __ia32_enabled = false; +} + #else /* !CONFIG_IA32_EMULATION */ static inline bool ia32_enabled(void) @@ -82,6 +87,8 @@ static inline bool ia32_enabled(void) return IS_ENABLED(CONFIG_X86_32); } +static inline void ia32_disable(void) {} + #endif #endif /* _ASM_X86_IA32_H */ diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index 6faea41e99b6..45ff95264a09 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "mm_internal.h" @@ -517,6 +518,16 @@ void __init sme_early_init(void) */ if (sev_status & MSR_AMD64_SEV_ES_ENABLED) x86_cpuinit.parallel_bringup = false; + + /* + * The VMM is capable of injecting interrupt 0x80 and triggering the + * compatibility syscall path. + * + * By default, the 32-bit emulation is disabled in order to ensure + * the safety of the VM. + */ + if (sev_status & MSR_AMD64_SEV_ENABLED) + ia32_disable(); } void __init mem_encrypt_free_decrypted_mem(void) From 4591766ff6552339fbaa3d3c71814faef1988c2f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Dec 2023 11:31:39 +0300 Subject: [PATCH 0795/1397] x86/entry: Convert INT 0x80 emulation to IDTENTRY [ upstream commit be5341eb0d43b1e754799498bd2e8756cc167a41 ] There is no real reason to have a separate ASM entry point implementation for the legacy INT 0x80 syscall emulation on 64-bit. IDTENTRY provides all the functionality needed with the only difference that it does not: - save the syscall number (AX) into pt_regs::orig_ax - set pt_regs::ax to -ENOSYS Both can be done safely in the C code of an IDTENTRY before invoking any of the syscall related functions which depend on this convention. Aside of ASM code reduction this prepares for detecting and handling a local APIC injected vector 0x80. [ kirill.shutemov: More verbose comments ] Suggested-by: Linus Torvalds Signed-off-by: Thomas Gleixner Signed-off-by: Kirill A. Shutemov Signed-off-by: Dave Hansen Reviewed-by: Borislav Petkov (AMD) Cc: # v6.0+ Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/common.c | 58 +++++++++++++++++++++++- arch/x86/entry/entry_64_compat.S | 77 -------------------------------- arch/x86/include/asm/idtentry.h | 4 ++ arch/x86/include/asm/proto.h | 4 -- arch/x86/kernel/idt.c | 2 +- arch/x86/xen/enlighten_pv.c | 2 +- arch/x86/xen/xen-asm.S | 2 +- 7 files changed, 64 insertions(+), 85 deletions(-) diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 4cec38985414..0f22a037be66 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -119,7 +119,62 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr) } } -/* Handles int $0x80 */ +#ifdef CONFIG_IA32_EMULATION +/** + * int80_emulation - 32-bit legacy syscall entry + * + * This entry point can be used by 32-bit and 64-bit programs to perform + * 32-bit system calls. Instances of INT $0x80 can be found inline in + * various programs and libraries. It is also used by the vDSO's + * __kernel_vsyscall fallback for hardware that doesn't support a faster + * entry method. Restarted 32-bit system calls also fall back to INT + * $0x80 regardless of what instruction was originally used to do the + * system call. + * + * This is considered a slow path. It is not used by most libc + * implementations on modern hardware except during process startup. + * + * The arguments for the INT $0x80 based syscall are on stack in the + * pt_regs structure: + * eax: system call number + * ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6 + */ +DEFINE_IDTENTRY_RAW(int80_emulation) +{ + int nr; + + /* Establish kernel context. */ + enter_from_user_mode(regs); + + instrumentation_begin(); + add_random_kstack_offset(); + + /* + * The low level idtentry code pushed -1 into regs::orig_ax + * and regs::ax contains the syscall number. + * + * User tracing code (ptrace or signal handlers) might assume + * that the regs::orig_ax contains a 32-bit number on invoking + * a 32-bit syscall. + * + * Establish the syscall convention by saving the 32bit truncated + * syscall number in regs::orig_ax and by invalidating regs::ax. + */ + regs->orig_ax = regs->ax & GENMASK(31, 0); + regs->ax = -ENOSYS; + + nr = syscall_32_enter(regs); + + local_irq_enable(); + nr = syscall_enter_from_user_mode_work(regs, nr); + do_syscall_32_irqs_on(regs, nr); + + instrumentation_end(); + syscall_exit_to_user_mode(regs); +} +#else /* CONFIG_IA32_EMULATION */ + +/* Handles int $0x80 on a 32bit kernel */ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs) { int nr = syscall_32_enter(regs); @@ -138,6 +193,7 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs) instrumentation_end(); syscall_exit_to_user_mode(regs); } +#endif /* !CONFIG_IA32_EMULATION */ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) { diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 70150298f8bd..4e88f8438706 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -276,80 +276,3 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL) ANNOTATE_NOENDBR int3 SYM_CODE_END(entry_SYSCALL_compat) - -/* - * 32-bit legacy system call entry. - * - * 32-bit x86 Linux system calls traditionally used the INT $0x80 - * instruction. INT $0x80 lands here. - * - * This entry point can be used by 32-bit and 64-bit programs to perform - * 32-bit system calls. Instances of INT $0x80 can be found inline in - * various programs and libraries. It is also used by the vDSO's - * __kernel_vsyscall fallback for hardware that doesn't support a faster - * entry method. Restarted 32-bit system calls also fall back to INT - * $0x80 regardless of what instruction was originally used to do the - * system call. - * - * This is considered a slow path. It is not used by most libc - * implementations on modern hardware except during process startup. - * - * Arguments: - * eax system call number - * ebx arg1 - * ecx arg2 - * edx arg3 - * esi arg4 - * edi arg5 - * ebp arg6 - */ -SYM_CODE_START(entry_INT80_compat) - UNWIND_HINT_ENTRY - ENDBR - /* - * Interrupts are off on entry. - */ - ASM_CLAC /* Do this early to minimize exposure */ - ALTERNATIVE "swapgs", "", X86_FEATURE_XENPV - - /* - * User tracing code (ptrace or signal handlers) might assume that - * the saved RAX contains a 32-bit number when we're invoking a 32-bit - * syscall. Just in case the high bits are nonzero, zero-extend - * the syscall number. (This could almost certainly be deleted - * with no ill effects.) - */ - movl %eax, %eax - - /* switch to thread stack expects orig_ax and rdi to be pushed */ - pushq %rax /* pt_regs->orig_ax */ - - /* Need to switch before accessing the thread stack. */ - SWITCH_TO_KERNEL_CR3 scratch_reg=%rax - - /* In the Xen PV case we already run on the thread stack. */ - ALTERNATIVE "", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV - - movq %rsp, %rax - movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp - - pushq 5*8(%rax) /* regs->ss */ - pushq 4*8(%rax) /* regs->rsp */ - pushq 3*8(%rax) /* regs->eflags */ - pushq 2*8(%rax) /* regs->cs */ - pushq 1*8(%rax) /* regs->ip */ - pushq 0*8(%rax) /* regs->orig_ax */ -.Lint80_keep_stack: - - PUSH_AND_CLEAR_REGS rax=$-ENOSYS - UNWIND_HINT_REGS - - cld - - IBRS_ENTER - UNTRAIN_RET - - movq %rsp, %rdi - call do_int80_syscall_32 - jmp swapgs_restore_regs_and_return_to_usermode -SYM_CODE_END(entry_INT80_compat) diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 05fd175cec7d..13639e57e1f8 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -569,6 +569,10 @@ DECLARE_IDTENTRY_RAW(X86_TRAP_UD, exc_invalid_op); DECLARE_IDTENTRY_RAW(X86_TRAP_BP, exc_int3); DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_PF, exc_page_fault); +#if defined(CONFIG_IA32_EMULATION) +DECLARE_IDTENTRY_RAW(IA32_SYSCALL_VECTOR, int80_emulation); +#endif + #ifdef CONFIG_X86_MCE #ifdef CONFIG_X86_64 DECLARE_IDTENTRY_MCE(X86_TRAP_MC, exc_machine_check); diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 12ef86b19910..84294b66b916 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -32,10 +32,6 @@ void entry_SYSCALL_compat(void); void entry_SYSCALL_compat_safe_stack(void); void entry_SYSRETL_compat_unsafe_stack(void); void entry_SYSRETL_compat_end(void); -void entry_INT80_compat(void); -#ifdef CONFIG_XEN_PV -void xen_entry_INT80_compat(void); -#endif #endif void x86_configure_nx(void); diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index b786d48f5a0f..fc77a96040b7 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -117,7 +117,7 @@ static const __initconst struct idt_data def_idts[] = { SYSG(X86_TRAP_OF, asm_exc_overflow), #if defined(CONFIG_IA32_EMULATION) - SYSG(IA32_SYSCALL_VECTOR, entry_INT80_compat), + SYSG(IA32_SYSCALL_VECTOR, asm_int80_emulation), #elif defined(CONFIG_X86_32) SYSG(IA32_SYSCALL_VECTOR, entry_INT80_32), #endif diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index bbbfdd495ebd..aeb33e0a3f76 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -704,7 +704,7 @@ static struct trap_array_entry trap_array[] = { TRAP_ENTRY(exc_int3, false ), TRAP_ENTRY(exc_overflow, false ), #ifdef CONFIG_IA32_EMULATION - { entry_INT80_compat, xen_entry_INT80_compat, false }, + TRAP_ENTRY(int80_emulation, false ), #endif TRAP_ENTRY(exc_page_fault, false ), TRAP_ENTRY(exc_divide_error, false ), diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 9e5e68008785..1a9cd18dfbd3 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -156,7 +156,7 @@ xen_pv_trap asm_xenpv_exc_machine_check #endif /* CONFIG_X86_MCE */ xen_pv_trap asm_exc_simd_coprocessor_error #ifdef CONFIG_IA32_EMULATION -xen_pv_trap entry_INT80_compat +xen_pv_trap asm_int80_emulation #endif xen_pv_trap asm_exc_xen_unknown_trap xen_pv_trap asm_exc_xen_hypervisor_callback From 22ca647c8f880f21881e9b2d38070dc61196a39d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Dec 2023 11:31:40 +0300 Subject: [PATCH 0796/1397] x86/entry: Do not allow external 0x80 interrupts [ upstream commit 55617fb991df535f953589586468612351575704 ] The INT 0x80 instruction is used for 32-bit x86 Linux syscalls. The kernel expects to receive a software interrupt as a result of the INT 0x80 instruction. However, an external interrupt on the same vector also triggers the same codepath. An external interrupt on vector 0x80 will currently be interpreted as a 32-bit system call, and assuming that it was a user context. Panic on external interrupts on the vector. To distinguish software interrupts from external ones, the kernel checks the APIC ISR bit relevant to the 0x80 vector. For software interrupts, this bit will be 0. Signed-off-by: Thomas Gleixner Signed-off-by: Kirill A. Shutemov Signed-off-by: Dave Hansen Reviewed-by: Borislav Petkov (AMD) Cc: # v6.0+ Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/common.c | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 0f22a037be66..9c0b26ae5106 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -25,6 +25,7 @@ #include #endif +#include #include #include #include @@ -120,6 +121,25 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr) } #ifdef CONFIG_IA32_EMULATION +static __always_inline bool int80_is_external(void) +{ + const unsigned int offs = (0x80 / 32) * 0x10; + const u32 bit = BIT(0x80 % 32); + + /* The local APIC on XENPV guests is fake */ + if (cpu_feature_enabled(X86_FEATURE_XENPV)) + return false; + + /* + * If vector 0x80 is set in the APIC ISR then this is an external + * interrupt. Either from broken hardware or injected by a VMM. + * + * Note: In guest mode this is only valid for secure guests where + * the secure module fully controls the vAPIC exposed to the guest. + */ + return apic_read(APIC_ISR + offs) & bit; +} + /** * int80_emulation - 32-bit legacy syscall entry * @@ -143,12 +163,27 @@ DEFINE_IDTENTRY_RAW(int80_emulation) { int nr; - /* Establish kernel context. */ + /* Kernel does not use INT $0x80! */ + if (unlikely(!user_mode(regs))) { + irqentry_enter(regs); + instrumentation_begin(); + panic("Unexpected external interrupt 0x80\n"); + } + + /* + * Establish kernel context for instrumentation, including for + * int80_is_external() below which calls into the APIC driver. + * Identical for soft and external interrupts. + */ enter_from_user_mode(regs); instrumentation_begin(); add_random_kstack_offset(); + /* Validate that this is a soft interrupt to the extent possible */ + if (unlikely(int80_is_external())) + panic("Unexpected external interrupt 0x80\n"); + /* * The low level idtentry code pushed -1 into regs::orig_ax * and regs::ax contains the syscall number. From 239bff0171a86e1bafd7da03631d74df1dfec6f1 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 4 Dec 2023 11:31:41 +0300 Subject: [PATCH 0797/1397] x86/tdx: Allow 32-bit emulation by default [ upstream commit f4116bfc44621882556bbf70f5284fbf429a5cf6 ] 32-bit emulation was disabled on TDX to prevent a possible attack by a VMM injecting an interrupt on vector 0x80. Now that int80_emulation() has a check for external interrupts the limitation can be lifted. To distinguish software interrupts from external ones, int80_emulation() checks the APIC ISR bit relevant to the 0x80 vector. For software interrupts, this bit will be 0. On TDX, the VAPIC state (including ISR) is protected and cannot be manipulated by the VMM. The ISR bit is set by the microcode flow during the handling of posted interrupts. [ dhansen: more changelog tweaks ] Signed-off-by: Kirill A. Shutemov Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Reviewed-by: Borislav Petkov (AMD) Cc: # v6.0+ Signed-off-by: Greg Kroah-Hartman --- arch/x86/coco/tdx/tdx.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 70d708cae961..f3c75809fed2 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -821,14 +821,5 @@ void __init tdx_early_init(void) */ x86_cpuinit.parallel_bringup = false; - /* - * The VMM is capable of injecting interrupt 0x80 and triggering the - * compatibility syscall path. - * - * By default, the 32-bit emulation is disabled in order to ensure - * the safety of the VM. - */ - ia32_disable(); - pr_info("Guest detected\n"); } From c35bcede4ffa43a581460ff79ec969d535db0b84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Mon, 28 Aug 2023 17:13:10 -0400 Subject: [PATCH 0798/1397] dt: dt-extract-compatibles: Handle cfile arguments in generator function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit eb2139fc0da63b89a2ad565ecd8133a37e8b7c4f ] Move the handling of the cfile arguments to a separate generator function to avoid redundancy. Signed-off-by: Nícolas F. R. A. Prado Link: https://lore.kernel.org/r/20230828211424.2964562-2-nfraprado@collabora.com Signed-off-by: Rob Herring Stable-dep-of: 8f51593cdcab ("dt: dt-extract-compatibles: Don't follow symlinks when walking tree") Signed-off-by: Sasha Levin --- scripts/dtc/dt-extract-compatibles | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/scripts/dtc/dt-extract-compatibles b/scripts/dtc/dt-extract-compatibles index 9df9f1face83..2b6d228602e8 100755 --- a/scripts/dtc/dt-extract-compatibles +++ b/scripts/dtc/dt-extract-compatibles @@ -49,6 +49,14 @@ def print_compat(filename, compatibles): else: print(*compatibles, sep='\n') +def files_to_parse(path_args): + for f in path_args: + if os.path.isdir(f): + for filename in glob.iglob(f + "/**/*.c", recursive=True): + yield filename + else: + yield f + show_filename = False if __name__ == "__main__": @@ -59,11 +67,6 @@ if __name__ == "__main__": show_filename = args.with_filename - for f in args.cfile: - if os.path.isdir(f): - for filename in glob.iglob(f + "/**/*.c", recursive=True): - compat_list = parse_compatibles(filename) - print_compat(filename, compat_list) - else: - compat_list = parse_compatibles(f) - print_compat(f, compat_list) + for f in files_to_parse(args.cfile): + compat_list = parse_compatibles(f) + print_compat(f, compat_list) From 7cb7001ecd0f7cbeaf0f64ec035bc6bc3ad6dc17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Tue, 7 Nov 2023 17:55:28 -0500 Subject: [PATCH 0799/1397] dt: dt-extract-compatibles: Don't follow symlinks when walking tree MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 8f51593cdcab82fb23ef2e1a0010b2e6f99aae02 ] The iglob function, which we use to find C source files in the kernel tree, always follows symbolic links. This can cause unintentional recursions whenever a symbolic link points to a parent directory. A common scenario is building the kernel with the output set to a directory inside the kernel tree, which will contain such a symlink. Instead of using the iglob function, use os.walk to traverse the directory tree, which by default doesn't follow symbolic links. fnmatch is then used to match the glob on the filename, as well as ignore hidden files (which were ignored by default with iglob). This approach runs just as fast as using iglob. Fixes: b6acf8073517 ("dt: Add a check for undocumented compatible strings in kernel") Reported-by: Aishwarya TCV Closes: https://lore.kernel.org/all/e90cb52f-d55b-d3ba-3933-6cc7b43fcfbc@arm.com Signed-off-by: "Nícolas F. R. A. Prado" Link: https://lore.kernel.org/r/20231107225624.9811-1-nfraprado@collabora.com Signed-off-by: Rob Herring Signed-off-by: Sasha Levin --- scripts/dtc/dt-extract-compatibles | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/scripts/dtc/dt-extract-compatibles b/scripts/dtc/dt-extract-compatibles index 2b6d228602e8..2f9d0eb59f5b 100755 --- a/scripts/dtc/dt-extract-compatibles +++ b/scripts/dtc/dt-extract-compatibles @@ -1,8 +1,8 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0-only +import fnmatch import os -import glob import re import argparse @@ -49,10 +49,20 @@ def print_compat(filename, compatibles): else: print(*compatibles, sep='\n') +def glob_without_symlinks(root, glob): + for path, dirs, files in os.walk(root): + # Ignore hidden directories + for d in dirs: + if fnmatch.fnmatch(d, ".*"): + dirs.remove(d) + for f in files: + if fnmatch.fnmatch(f, glob): + yield os.path.join(path, f) + def files_to_parse(path_args): for f in path_args: if os.path.isdir(f): - for filename in glob.iglob(f + "/**/*.c", recursive=True): + for filename in glob_without_symlinks(f, "*.c"): yield filename else: yield f From cb6b8919ebf81fcac7e1fb623925af1f7411a198 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 20 Nov 2023 16:42:33 +0100 Subject: [PATCH 0800/1397] platform/x86: asus-wmi: Move i8042 filter install to shared asus-wmi code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b52cbca22cbf6c9d2700c1e576d0ddcc670e49d5 ] asus-nb-wmi calls i8042_install_filter() in some cases, but it never calls i8042_remove_filter(). This means that a dangling pointer to the filter function is left after rmmod leading to crashes. Fix this by moving the i8042-filter installation to the shared asus-wmi code and also remove it from the shared code on driver unbind. Fixes: b5643539b825 ("platform/x86: asus-wmi: Filter buggy scan codes on ASUS Q500A") Cc: Oleksij Rempel Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20231120154235.610808-2-hdegoede@redhat.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen Signed-off-by: Sasha Levin --- drivers/platform/x86/Kconfig | 2 +- drivers/platform/x86/asus-nb-wmi.c | 11 ----------- drivers/platform/x86/asus-wmi.c | 8 ++++++++ 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 2a1070543391..07eea525091b 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -263,6 +263,7 @@ config ASUS_WMI depends on RFKILL || RFKILL = n depends on HOTPLUG_PCI depends on ACPI_VIDEO || ACPI_VIDEO = n + depends on SERIO_I8042 || SERIO_I8042 = n select INPUT_SPARSEKMAP select LEDS_CLASS select NEW_LEDS @@ -279,7 +280,6 @@ config ASUS_WMI config ASUS_NB_WMI tristate "Asus Notebook WMI Driver" depends on ASUS_WMI - depends on SERIO_I8042 || SERIO_I8042 = n help This is a driver for newer Asus notebooks. It adds extra features like wireless radio and bluetooth control, leds, hotkeys, backlight... diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index df1db54d4e18..af3da303e2b1 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -501,8 +501,6 @@ static const struct dmi_system_id asus_quirks[] = { static void asus_nb_wmi_quirks(struct asus_wmi_driver *driver) { - int ret; - quirks = &quirk_asus_unknown; dmi_check_system(asus_quirks); @@ -517,15 +515,6 @@ static void asus_nb_wmi_quirks(struct asus_wmi_driver *driver) if (tablet_mode_sw != -1) quirks->tablet_switch_mode = tablet_mode_sw; - - if (quirks->i8042_filter) { - ret = i8042_install_filter(quirks->i8042_filter); - if (ret) { - pr_warn("Unable to install key filter\n"); - return; - } - pr_info("Using i8042 filter function for receiving events\n"); - } } static const struct key_entry asus_nb_wmi_keymap[] = { diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 19bfd30861aa..9c6321c2fc3c 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -4437,6 +4437,12 @@ static int asus_wmi_add(struct platform_device *pdev) goto fail_wmi_handler; } + if (asus->driver->quirks->i8042_filter) { + err = i8042_install_filter(asus->driver->quirks->i8042_filter); + if (err) + pr_warn("Unable to install key filter - %d\n", err); + } + asus_wmi_battery_init(asus); asus_wmi_debugfs_init(asus); @@ -4471,6 +4477,8 @@ static int asus_wmi_remove(struct platform_device *device) struct asus_wmi *asus; asus = platform_get_drvdata(device); + if (asus->driver->quirks->i8042_filter) + i8042_remove_filter(asus->driver->quirks->i8042_filter); wmi_remove_notify_handler(asus->driver->event_guid); asus_wmi_backlight_exit(asus); asus_wmi_input_exit(asus); From 30460e7e27dab2757841941cabb5956f90d5b5fa Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Thu, 23 Nov 2023 15:47:18 +0100 Subject: [PATCH 0801/1397] of: dynamic: Fix of_reconfig_get_state_change() return value documentation [ Upstream commit d79972789d17499b6091ded2fc0c6763c501a5ba ] The documented numeric return values do not match the actual returned values. Fix them by using the enum names instead of raw numbers. Fixes: b53a2340d0d3 ("of/reconfig: Add of_reconfig_get_state_change() of notifier helper.") Signed-off-by: Luca Ceresoli Link: https://lore.kernel.org/r/20231123-fix-of_reconfig_get_state_change-docs-v1-1-f51892050ff9@bootlin.com Signed-off-by: Rob Herring Signed-off-by: Sasha Levin --- drivers/of/dynamic.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index f63250c650ca..3bf27052832f 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -98,8 +98,9 @@ int of_reconfig_notify(unsigned long action, struct of_reconfig_data *p) * * Returns the new state of a device based on the notifier used. * - * Return: 0 on device going from enabled to disabled, 1 on device - * going from disabled to enabled and -1 on no change. + * Return: OF_RECONFIG_CHANGE_REMOVE on device going from enabled to + * disabled, OF_RECONFIG_CHANGE_ADD on device going from disabled to + * enabled and OF_RECONFIG_NO_CHANGE on no change. */ int of_reconfig_get_state_change(unsigned long action, struct of_reconfig_data *pr) { From 6e33e81b785f41bf24b1c14728f47f467e184be7 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Wed, 29 Nov 2023 19:16:54 +0100 Subject: [PATCH 0802/1397] platform/x86: wmi: Skip blocks with zero instances MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit cbf54f37600e874d82886aa3b2f471778cae01ce ] Some machines like the HP Omen 17 ck2000nf contain WMI blocks with zero instances, so any WMI driver which tries to handle the associated WMI device will fail. Skip such WMI blocks to avoid confusing any WMI drivers. Reported-by: Alexis Belmonte Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218188 Fixes: bff431e49ff5 ("ACPI: WMI: Add ACPI-WMI mapping driver") Tested-by: Alexis Belmonte Signed-off-by: Armin Wolf Link: https://lore.kernel.org/r/20231129181654.5800-1-W_Armin@gmx.de Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen Signed-off-by: Sasha Levin --- drivers/platform/x86/wmi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index 317c90730414..d75a0ae9cd0c 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -1285,6 +1285,11 @@ static int parse_wdg(struct device *wmi_bus_dev, struct acpi_device *device) if (debug_dump_wdg) wmi_dump_wdg(&gblock[i]); + if (!gblock[i].instance_count) { + dev_info(wmi_bus_dev, FW_INFO "%pUL has zero instances\n", &gblock[i].guid); + continue; + } + if (guid_already_parsed_for_legacy(device, &gblock[i].guid)) continue; From a08768110a6574e4d994b28f448ca5c016fbf0d4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 29 Nov 2023 16:06:30 +0000 Subject: [PATCH 0803/1397] ipv6: fix potential NULL deref in fib6_add() [ Upstream commit 75475bb51e78a3f54ad2f69380f2a1c985e85f2d ] If fib6_find_prefix() returns NULL, we should silently fallback using fib6_null_entry regardless of RT6_DEBUG value. syzbot reported: WARNING: CPU: 0 PID: 5477 at net/ipv6/ip6_fib.c:1516 fib6_add+0x310d/0x3fa0 net/ipv6/ip6_fib.c:1516 Modules linked in: CPU: 0 PID: 5477 Comm: syz-executor.0 Not tainted 6.7.0-rc2-syzkaller-00029-g9b6de136b5f0 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/10/2023 RIP: 0010:fib6_add+0x310d/0x3fa0 net/ipv6/ip6_fib.c:1516 Code: 00 48 8b 54 24 68 e8 42 22 00 00 48 85 c0 74 14 49 89 c6 e8 d5 d3 c2 f7 eb 5d e8 ce d3 c2 f7 e9 ca 00 00 00 e8 c4 d3 c2 f7 90 <0f> 0b 90 48 b8 00 00 00 00 00 fc ff df 48 8b 4c 24 38 80 3c 01 00 RSP: 0018:ffffc90005067740 EFLAGS: 00010293 RAX: ffffffff89cba5bc RBX: ffffc90005067ab0 RCX: ffff88801a2e9dc0 RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000000 RBP: ffffc90005067980 R08: ffffffff89cbca85 R09: 1ffff110040d4b85 R10: dffffc0000000000 R11: ffffed10040d4b86 R12: 00000000ffffffff R13: 1ffff110051c3904 R14: ffff8880206a5c00 R15: ffff888028e1c820 FS: 00007f763783c6c0(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f763783bff8 CR3: 000000007f74d000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __ip6_ins_rt net/ipv6/route.c:1303 [inline] ip6_route_add+0x88/0x120 net/ipv6/route.c:3847 ipv6_route_ioctl+0x525/0x7b0 net/ipv6/route.c:4467 inet6_ioctl+0x21a/0x270 net/ipv6/af_inet6.c:575 sock_do_ioctl+0x152/0x460 net/socket.c:1220 sock_ioctl+0x615/0x8c0 net/socket.c:1339 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:871 [inline] __se_sys_ioctl+0xf8/0x170 fs/ioctl.c:857 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x45/0x110 arch/x86/entry/common.c:82 Fixes: 7bbfe00e0252 ("ipv6: fix general protection fault in fib6_add()") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Wei Wang Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20231129160630.3509216-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv6/ip6_fib.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 28b01a068412..7772f42ff2b9 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1511,13 +1511,9 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt, if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) { pn_leaf = fib6_find_prefix(info->nl_net, table, pn); -#if RT6_DEBUG >= 2 - if (!pn_leaf) { - WARN_ON(!pn_leaf); + if (!pn_leaf) pn_leaf = info->nl_net->ipv6.fib6_null_entry; - } -#endif fib6_info_hold(pn_leaf); rcu_assign_pointer(pn->leaf, pn_leaf); } From 6ddf005fd6cc82a94b8008e14b63ec6a03790fbb Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Wed, 29 Nov 2023 10:53:42 +0530 Subject: [PATCH 0804/1397] octeontx2-pf: Add missing mutex lock in otx2_get_pauseparam [ Upstream commit 9572c949385aa2ef10368287c439bcb7935137c8 ] All the mailbox messages sent to AF needs to be guarded by mutex lock. Add the missing lock in otx2_get_pauseparam function. Fixes: 75f36270990c ("octeontx2-pf: Support to enable/disable pause frames via ethtool") Signed-off-by: Subbaraya Sundeep Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c index 9efcec549834..53f6258a973c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c @@ -334,9 +334,12 @@ static void otx2_get_pauseparam(struct net_device *netdev, if (is_otx2_lbkvf(pfvf->pdev)) return; + mutex_lock(&pfvf->mbox.lock); req = otx2_mbox_alloc_msg_cgx_cfg_pause_frm(&pfvf->mbox); - if (!req) + if (!req) { + mutex_unlock(&pfvf->mbox.lock); return; + } if (!otx2_sync_mbox_msg(&pfvf->mbox)) { rsp = (struct cgx_pause_frm_cfg *) @@ -344,6 +347,7 @@ static void otx2_get_pauseparam(struct net_device *netdev, pause->rx_pause = rsp->rx_pause; pause->tx_pause = rsp->tx_pause; } + mutex_unlock(&pfvf->mbox.lock); } static int otx2_set_pauseparam(struct net_device *netdev, From c3287140282b3c2fef18dfef6b013e4f9d89f9f0 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Wed, 29 Nov 2023 11:11:48 +0530 Subject: [PATCH 0805/1397] octeontx2-af: Check return value of nix_get_nixlf before using nixlf [ Upstream commit 830139e7b6911266a84a77e1f18abf758995cc89 ] If a NIXLF is not attached to a PF/VF device then nix_get_nixlf function fails and returns proper error code. But npc_get_default_entry_action does not check it and uses garbage value in subsequent calls. Fix this by cheking the return value of nix_get_nixlf. Fixes: 967db3529eca ("octeontx2-af: add support for multicast/promisc packet replication feature") Signed-off-by: Subbaraya Sundeep Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 16cfc802e348..f65805860c8d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -389,7 +389,13 @@ static u64 npc_get_default_entry_action(struct rvu *rvu, struct npc_mcam *mcam, int bank, nixlf, index; /* get ucast entry rule entry index */ - nix_get_nixlf(rvu, pf_func, &nixlf, NULL); + if (nix_get_nixlf(rvu, pf_func, &nixlf, NULL)) { + dev_err(rvu->dev, "%s: nixlf not attached to pcifunc:0x%x\n", + __func__, pf_func); + /* Action 0 is drop */ + return 0; + } + index = npc_get_nixlf_mcam_index(mcam, pf_func, nixlf, NIXLF_UCAST_ENTRY); bank = npc_get_bank(mcam, index); From 28b8ed4a02fa95a013a79059f882e17207fe3d9d Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 30 Nov 2023 18:46:40 -0800 Subject: [PATCH 0806/1397] bpf: Fix a verifier bug due to incorrect branch offset comparison with cpu=v4 [ Upstream commit dfce9cb3140592b886838e06f3e0c25fea2a9cae ] Bpf cpu=v4 support is introduced in [1] and Commit 4cd58e9af8b9 ("bpf: Support new 32bit offset jmp instruction") added support for new 32bit offset jmp instruction. Unfortunately, in function bpf_adj_delta_to_off(), for new branch insn with 32bit offset, the offset (plus/minor a small delta) compares to 16-bit offset bound [S16_MIN, S16_MAX], which caused the following verification failure: $ ./test_progs-cpuv4 -t verif_scale_pyperf180 ... insn 10 cannot be patched due to 16-bit range ... libbpf: failed to load object 'pyperf180.bpf.o' scale_test:FAIL:expect_success unexpected error: -12 (errno 12) #405 verif_scale_pyperf180:FAIL Note that due to recent llvm18 development, the patch [2] (already applied in bpf-next) needs to be applied to bpf tree for testing purpose. The fix is rather simple. For 32bit offset branch insn, the adjusted offset compares to [S32_MIN, S32_MAX] and then verification succeeded. [1] https://lore.kernel.org/all/20230728011143.3710005-1-yonghong.song@linux.dev [2] https://lore.kernel.org/bpf/20231110193644.3130906-1-yonghong.song@linux.dev Fixes: 4cd58e9af8b9 ("bpf: Support new 32bit offset jmp instruction") Signed-off-by: Yonghong Song Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20231201024640.3417057-1-yonghong.song@linux.dev Signed-off-by: Sasha Levin --- kernel/bpf/core.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 64fcd81ad3da..5d1efe5200ba 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -371,14 +371,18 @@ static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, s32 end_old, static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, s32 end_old, s32 end_new, s32 curr, const bool probe_pass) { - const s32 off_min = S16_MIN, off_max = S16_MAX; + s64 off_min, off_max, off; s32 delta = end_new - end_old; - s32 off; - if (insn->code == (BPF_JMP32 | BPF_JA)) + if (insn->code == (BPF_JMP32 | BPF_JA)) { off = insn->imm; - else + off_min = S32_MIN; + off_max = S32_MAX; + } else { off = insn->off; + off_min = S16_MIN; + off_max = S16_MAX; + } if (curr < pos && curr + off + 1 >= end_old) off += delta; From b47e71da4776339851eeb1ae3a1376d18c0767cb Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 29 Nov 2023 21:58:53 -0800 Subject: [PATCH 0807/1397] hv_netvsc: rndis_filter needs to select NLS [ Upstream commit 6c89f49964375c904cea33c0247467873f4daf2c ] rndis_filter uses utf8s_to_utf16s() which is provided by setting NLS, so select NLS to fix the build error: ERROR: modpost: "utf8s_to_utf16s" [drivers/net/hyperv/hv_netvsc.ko] undefined! Fixes: 1ce09e899d28 ("hyperv: Add support for setting MAC from within guests") Signed-off-by: Randy Dunlap Cc: Haiyang Zhang Cc: K. Y. Srinivasan Cc: Wei Liu Cc: Dexuan Cui Reviewed-by: Simon Horman Tested-by: Simon Horman # build-tested Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20231130055853.19069-1-rdunlap@infradead.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/hyperv/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/hyperv/Kconfig b/drivers/net/hyperv/Kconfig index ca7bf7f897d3..c8cbd85adcf9 100644 --- a/drivers/net/hyperv/Kconfig +++ b/drivers/net/hyperv/Kconfig @@ -3,5 +3,6 @@ config HYPERV_NET tristate "Microsoft Hyper-V virtual network driver" depends on HYPERV select UCS2_STRING + select NLS help Select this option to enable the Hyper-V virtual network driver. From 8defe1643dd36219c552752d417c34a073592984 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 29 Nov 2023 13:25:20 -0800 Subject: [PATCH 0808/1397] r8152: Hold the rtnl_lock for all of reset [ Upstream commit e62adaeecdc6a1e8ae86e7f3f9f8223a3ede94f5 ] As of commit d9962b0d4202 ("r8152: Block future register access if register access fails") there is a race condition that can happen between the USB device reset thread and napi_enable() (not) getting called during rtl8152_open(). Specifically: * While rtl8152_open() is running we get a register access error that's _not_ -ENODEV and queue up a USB reset. * rtl8152_open() exits before calling napi_enable() due to any reason (including usb_submit_urb() returning an error). In that case: * Since the USB reset is perform in a separate thread asynchronously, it can run at anytime USB device lock is not held - even before rtl8152_open() has exited with an error and caused __dev_open() to clear the __LINK_STATE_START bit. * The rtl8152_pre_reset() will notice that the netif_running() returns true (since __LINK_STATE_START wasn't cleared) so it won't exit early. * rtl8152_pre_reset() will then hang in napi_disable() because napi_enable() was never called. We can fix the race by making sure that the r8152 reset routines don't run at the same time as we're opening the device. Specifically we need the reset routines in their entirety rely on the return value of netif_running(). The only way to reliably depend on that is for them to hold the rntl_lock() mutex for the duration of reset. Grabbing the rntl_lock() mutex for the duration of reset seems like a long time, but reset is not expected to be common and the rtnl_lock() mutex is already held for long durations since the core grabs it around the open/close calls. Fixes: d9962b0d4202 ("r8152: Block future register access if register access fails") Reviewed-by: Grant Grundler Signed-off-by: Douglas Anderson Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/r8152.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index be18d72cefcc..77408265e612 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -8364,6 +8364,8 @@ static int rtl8152_pre_reset(struct usb_interface *intf) struct r8152 *tp = usb_get_intfdata(intf); struct net_device *netdev; + rtnl_lock(); + if (!tp || !test_bit(PROBED_WITH_NO_ERRORS, &tp->flags)) return 0; @@ -8395,20 +8397,17 @@ static int rtl8152_post_reset(struct usb_interface *intf) struct sockaddr sa; if (!tp || !test_bit(PROBED_WITH_NO_ERRORS, &tp->flags)) - return 0; + goto exit; rtl_set_accessible(tp); /* reset the MAC address in case of policy change */ - if (determine_ethernet_addr(tp, &sa) >= 0) { - rtnl_lock(); + if (determine_ethernet_addr(tp, &sa) >= 0) dev_set_mac_address (tp->netdev, &sa, NULL); - rtnl_unlock(); - } netdev = tp->netdev; if (!netif_running(netdev)) - return 0; + goto exit; set_bit(WORK_ENABLE, &tp->flags); if (netif_carrier_ok(netdev)) { @@ -8427,6 +8426,8 @@ static int rtl8152_post_reset(struct usb_interface *intf) if (!list_empty(&tp->rx_done)) napi_schedule(&tp->napi); +exit: + rtnl_unlock(); return 0; } From b7416e0a4d48fc00baa6a5d863898361312b2a73 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 29 Nov 2023 13:25:21 -0800 Subject: [PATCH 0809/1397] r8152: Add RTL8152_INACCESSIBLE checks to more loops [ Upstream commit 32a574c7e2685aa8138754d4d755f9246cc6bd48 ] Previous commits added checks for RTL8152_INACCESSIBLE in the loops in the driver. There are still a few more that keep tripping the driver up in error cases and make things take longer than they should. Add those in. All the loops that are part of this commit existed in some form or another since the r8152 driver was first introduced, though RTL8152_INACCESSIBLE was known as RTL8152_UNPLUG before commit 715f67f33af4 ("r8152: Rename RTL8152_UNPLUG to RTL8152_INACCESSIBLE") Fixes: ac718b69301c ("net/usb: new driver for RTL8152") Reviewed-by: Grant Grundler Signed-off-by: Douglas Anderson Acked-by: Hayes Wang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/r8152.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 77408265e612..bbb2824f285d 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -2967,6 +2967,8 @@ static void rtl8152_nic_reset(struct r8152 *tp) ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CR, CR_RST); for (i = 0; i < 1000; i++) { + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) + break; if (!(ocp_read_byte(tp, MCU_TYPE_PLA, PLA_CR) & CR_RST)) break; usleep_range(100, 400); @@ -3296,6 +3298,8 @@ static void rtl_disable(struct r8152 *tp) rxdy_gated_en(tp, true); for (i = 0; i < 1000; i++) { + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) + break; ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL); if ((ocp_data & FIFO_EMPTY) == FIFO_EMPTY) break; @@ -3303,6 +3307,8 @@ static void rtl_disable(struct r8152 *tp) } for (i = 0; i < 1000; i++) { + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) + break; if (ocp_read_word(tp, MCU_TYPE_PLA, PLA_TCR0) & TCR0_TX_EMPTY) break; usleep_range(1000, 2000); @@ -5466,6 +5472,8 @@ static void wait_oob_link_list_ready(struct r8152 *tp) int i; for (i = 0; i < 1000; i++) { + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) + break; ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL); if (ocp_data & LINK_LIST_READY) break; From 9bda33e87114fa9b95c4b7c02776516457f48d60 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 29 Nov 2023 13:25:22 -0800 Subject: [PATCH 0810/1397] r8152: Add RTL8152_INACCESSIBLE to r8156b_wait_loading_flash() [ Upstream commit 8a67b47fced9f6a84101eb9ec5ce4c7d64204bc7 ] Delay loops in r8152 should break out if RTL8152_INACCESSIBLE is set so that they don't delay too long if the device becomes inaccessible. Add the break to the loop in r8156b_wait_loading_flash(). Fixes: 195aae321c82 ("r8152: support new chips") Reviewed-by: Grant Grundler Signed-off-by: Douglas Anderson Acked-by: Hayes Wang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/r8152.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index bbb2824f285d..ec43279a482f 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -5488,6 +5488,8 @@ static void r8156b_wait_loading_flash(struct r8152 *tp) int i; for (i = 0; i < 100; i++) { + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) + break; if (ocp_read_word(tp, MCU_TYPE_USB, USB_GPHY_CTRL) & GPHY_PATCH_DONE) break; usleep_range(1000, 2000); From 4bc63784d6422e2e2965513ac16bf34c765a4fba Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 29 Nov 2023 13:25:23 -0800 Subject: [PATCH 0811/1397] r8152: Add RTL8152_INACCESSIBLE to r8153_pre_firmware_1() [ Upstream commit 8c53a7bd706535a9cf4e2ec3a4e8d61d46353ca0 ] Delay loops in r8152 should break out if RTL8152_INACCESSIBLE is set so that they don't delay too long if the device becomes inaccessible. Add the break to the loop in r8153_pre_firmware_1(). Fixes: 9370f2d05a2a ("r8152: support request_firmware for RTL8153") Reviewed-by: Grant Grundler Signed-off-by: Douglas Anderson Acked-by: Hayes Wang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/r8152.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index ec43279a482f..4393ffd3b294 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -5612,6 +5612,8 @@ static int r8153_pre_firmware_1(struct r8152 *tp) for (i = 0; i < 104; i++) { u32 ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_WDT1_CTRL); + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) + return -ENODEV; if (!(ocp_data & WTD1_EN)) break; usleep_range(1000, 2000); From 9b4a883854355e822f727ac4777b9170a62bac18 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 29 Nov 2023 13:25:24 -0800 Subject: [PATCH 0812/1397] r8152: Add RTL8152_INACCESSIBLE to r8153_aldps_en() [ Upstream commit 79321a793945fdbff2f405f84712d0ab81bed287 ] Delay loops in r8152 should break out if RTL8152_INACCESSIBLE is set so that they don't delay too long if the device becomes inaccessible. Add the break to the loop in r8153_aldps_en(). Fixes: 4214cc550bf9 ("r8152: check if disabling ALDPS is finished") Reviewed-by: Grant Grundler Signed-off-by: Douglas Anderson Acked-by: Hayes Wang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/r8152.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 4393ffd3b294..7611c406fdb9 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -5770,6 +5770,8 @@ static void r8153_aldps_en(struct r8152 *tp, bool enable) data &= ~EN_ALDPS; ocp_reg_write(tp, OCP_POWER_CFG, data); for (i = 0; i < 20; i++) { + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) + return; usleep_range(1000, 2000); if (ocp_read_word(tp, MCU_TYPE_PLA, 0xe000) & 0x0100) break; From 76914ea593bbdcb5b24210fb24eabf3a89d1a381 Mon Sep 17 00:00:00 2001 From: David Thompson Date: Thu, 30 Nov 2023 13:35:15 -0500 Subject: [PATCH 0813/1397] mlxbf-bootctl: correctly identify secure boot with development keys MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d4eef75279f5e9d594f5785502038c763ce42268 ] The secure boot state of the BlueField SoC is represented by two bits: 0 = production state 1 = secure boot enabled 2 = non-secure (secure boot disabled) 3 = RMA state There is also a single bit to indicate whether production keys or development keys are being used when secure boot is enabled. This single bit (specified by MLXBF_BOOTCTL_SB_DEV_MASK) only has meaning if secure boot state equals 1 (secure boot enabled). The secure boot states are as follows: - “GA secured” is when secure boot is enabled with official production keys. - “Secured (development)” is when secure boot is enabled with development keys. Without this fix “GA Secured” is displayed on development cards which is misleading. This patch updates the logic in "lifecycle_state_show()" to handle the case where the SoC is configured for secure boot and is using development keys. Fixes: 79e29cb8fbc5c ("platform/mellanox: Add bootctl driver for Mellanox BlueField Soc") Reviewed-by: Khalil Blaiech Signed-off-by: David Thompson Link: https://lore.kernel.org/r/20231130183515.17214-1-davthompson@nvidia.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen Signed-off-by: Sasha Levin --- drivers/platform/mellanox/mlxbf-bootctl.c | 39 +++++++++++++++-------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/drivers/platform/mellanox/mlxbf-bootctl.c b/drivers/platform/mellanox/mlxbf-bootctl.c index 4ee7bb431b7c..e278092f889b 100644 --- a/drivers/platform/mellanox/mlxbf-bootctl.c +++ b/drivers/platform/mellanox/mlxbf-bootctl.c @@ -20,6 +20,7 @@ #define MLXBF_BOOTCTL_SB_SECURE_MASK 0x03 #define MLXBF_BOOTCTL_SB_TEST_MASK 0x0c +#define MLXBF_BOOTCTL_SB_DEV_MASK BIT(4) #define MLXBF_SB_KEY_NUM 4 @@ -40,11 +41,18 @@ static struct mlxbf_bootctl_name boot_names[] = { { MLXBF_BOOTCTL_NONE, "none" }, }; +enum { + MLXBF_BOOTCTL_SB_LIFECYCLE_PRODUCTION = 0, + MLXBF_BOOTCTL_SB_LIFECYCLE_GA_SECURE = 1, + MLXBF_BOOTCTL_SB_LIFECYCLE_GA_NON_SECURE = 2, + MLXBF_BOOTCTL_SB_LIFECYCLE_RMA = 3 +}; + static const char * const mlxbf_bootctl_lifecycle_states[] = { - [0] = "Production", - [1] = "GA Secured", - [2] = "GA Non-Secured", - [3] = "RMA", + [MLXBF_BOOTCTL_SB_LIFECYCLE_PRODUCTION] = "Production", + [MLXBF_BOOTCTL_SB_LIFECYCLE_GA_SECURE] = "GA Secured", + [MLXBF_BOOTCTL_SB_LIFECYCLE_GA_NON_SECURE] = "GA Non-Secured", + [MLXBF_BOOTCTL_SB_LIFECYCLE_RMA] = "RMA", }; /* Log header format. */ @@ -247,25 +255,30 @@ static ssize_t second_reset_action_store(struct device *dev, static ssize_t lifecycle_state_show(struct device *dev, struct device_attribute *attr, char *buf) { + int status_bits; + int use_dev_key; + int test_state; int lc_state; - lc_state = mlxbf_bootctl_smc(MLXBF_BOOTCTL_GET_TBB_FUSE_STATUS, - MLXBF_BOOTCTL_FUSE_STATUS_LIFECYCLE); - if (lc_state < 0) - return lc_state; + status_bits = mlxbf_bootctl_smc(MLXBF_BOOTCTL_GET_TBB_FUSE_STATUS, + MLXBF_BOOTCTL_FUSE_STATUS_LIFECYCLE); + if (status_bits < 0) + return status_bits; - lc_state &= - MLXBF_BOOTCTL_SB_TEST_MASK | MLXBF_BOOTCTL_SB_SECURE_MASK; + use_dev_key = status_bits & MLXBF_BOOTCTL_SB_DEV_MASK; + test_state = status_bits & MLXBF_BOOTCTL_SB_TEST_MASK; + lc_state = status_bits & MLXBF_BOOTCTL_SB_SECURE_MASK; /* * If the test bits are set, we specify that the current state may be * due to using the test bits. */ - if (lc_state & MLXBF_BOOTCTL_SB_TEST_MASK) { - lc_state &= MLXBF_BOOTCTL_SB_SECURE_MASK; - + if (test_state) { return sprintf(buf, "%s(test)\n", mlxbf_bootctl_lifecycle_states[lc_state]); + } else if (use_dev_key && + (lc_state == MLXBF_BOOTCTL_SB_LIFECYCLE_GA_SECURE)) { + return sprintf(buf, "Secured (development)\n"); } return sprintf(buf, "%s\n", mlxbf_bootctl_lifecycle_states[lc_state]); From 8b2688f9c5897a1ed712d1da4b2721de22a5bbc7 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Fri, 1 Dec 2023 13:54:47 +0800 Subject: [PATCH 0814/1397] platform/mellanox: Add null pointer checks for devm_kasprintf() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 2c7c857f5fed997be93047d2de853d7f10c8defe ] devm_kasprintf() returns a pointer to dynamically allocated memory which can be NULL upon failure. Compile-tested only. Fixes: 1a218d312e65 ("platform/mellanox: mlxbf-pmc: Add Mellanox BlueField PMC driver") Suggested-by: Ilpo Järvinen Suggested-by: Vadim Pasternak Signed-off-by: Kunwu Chan Reviewed-by: Vadim Pasternak Link: https://lore.kernel.org/r/20231201055447.2356001-1-chentao@kylinos.cn [ij: split the change into two] Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen Signed-off-by: Sasha Levin --- drivers/platform/mellanox/mlxbf-pmc.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/platform/mellanox/mlxbf-pmc.c b/drivers/platform/mellanox/mlxbf-pmc.c index 2d4bbe99959e..925bfc4aef8c 100644 --- a/drivers/platform/mellanox/mlxbf-pmc.c +++ b/drivers/platform/mellanox/mlxbf-pmc.c @@ -1202,6 +1202,8 @@ static int mlxbf_pmc_init_perftype_counter(struct device *dev, int blk_num) attr->dev_attr.show = mlxbf_pmc_event_list_show; attr->nr = blk_num; attr->dev_attr.attr.name = devm_kasprintf(dev, GFP_KERNEL, "event_list"); + if (!attr->dev_attr.attr.name) + return -ENOMEM; pmc->block[blk_num].block_attr[i] = &attr->dev_attr.attr; attr = NULL; @@ -1214,6 +1216,8 @@ static int mlxbf_pmc_init_perftype_counter(struct device *dev, int blk_num) attr->nr = blk_num; attr->dev_attr.attr.name = devm_kasprintf(dev, GFP_KERNEL, "enable"); + if (!attr->dev_attr.attr.name) + return -ENOMEM; pmc->block[blk_num].block_attr[++i] = &attr->dev_attr.attr; attr = NULL; } @@ -1240,6 +1244,8 @@ static int mlxbf_pmc_init_perftype_counter(struct device *dev, int blk_num) attr->nr = blk_num; attr->dev_attr.attr.name = devm_kasprintf(dev, GFP_KERNEL, "counter%d", j); + if (!attr->dev_attr.attr.name) + return -ENOMEM; pmc->block[blk_num].block_attr[++i] = &attr->dev_attr.attr; attr = NULL; @@ -1251,6 +1257,8 @@ static int mlxbf_pmc_init_perftype_counter(struct device *dev, int blk_num) attr->nr = blk_num; attr->dev_attr.attr.name = devm_kasprintf(dev, GFP_KERNEL, "event%d", j); + if (!attr->dev_attr.attr.name) + return -ENOMEM; pmc->block[blk_num].block_attr[++i] = &attr->dev_attr.attr; attr = NULL; } @@ -1283,6 +1291,8 @@ static int mlxbf_pmc_init_perftype_reg(struct device *dev, int blk_num) attr->nr = blk_num; attr->dev_attr.attr.name = devm_kasprintf(dev, GFP_KERNEL, events[j].evt_name); + if (!attr->dev_attr.attr.name) + return -ENOMEM; pmc->block[blk_num].block_attr[i] = &attr->dev_attr.attr; attr = NULL; i++; @@ -1311,6 +1321,8 @@ static int mlxbf_pmc_create_groups(struct device *dev, int blk_num) pmc->block[blk_num].block_attr_grp.attrs = pmc->block[blk_num].block_attr; pmc->block[blk_num].block_attr_grp.name = devm_kasprintf( dev, GFP_KERNEL, pmc->block_name[blk_num]); + if (!pmc->block[blk_num].block_attr_grp.name) + return -ENOMEM; pmc->groups[blk_num] = &pmc->block[blk_num].block_attr_grp; return 0; From 15e1490022e7c89bee335ea742f77eebf2a23a89 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Fri, 1 Dec 2023 13:54:47 +0800 Subject: [PATCH 0815/1397] platform/mellanox: Check devm_hwmon_device_register_with_groups() return value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3494a594315b56516988afb6854d75dee5b501db ] devm_hwmon_device_register_with_groups() returns an error pointer upon failure. Check its return value for errors. Compile-tested only. Fixes: 1a218d312e65 ("platform/mellanox: mlxbf-pmc: Add Mellanox BlueField PMC driver") Suggested-by: Ilpo Järvinen Suggested-by: Vadim Pasternak Signed-off-by: Kunwu Chan Reviewed-by: Vadim Pasternak Link: https://lore.kernel.org/r/20231201055447.2356001-1-chentao@kylinos.cn [ij: split the change into two] Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen Signed-off-by: Sasha Levin --- drivers/platform/mellanox/mlxbf-pmc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/platform/mellanox/mlxbf-pmc.c b/drivers/platform/mellanox/mlxbf-pmc.c index 925bfc4aef8c..db7a1d360cd2 100644 --- a/drivers/platform/mellanox/mlxbf-pmc.c +++ b/drivers/platform/mellanox/mlxbf-pmc.c @@ -1454,6 +1454,8 @@ static int mlxbf_pmc_probe(struct platform_device *pdev) pmc->hwmon_dev = devm_hwmon_device_register_with_groups( dev, "bfperf", pmc, pmc->groups); + if (IS_ERR(pmc->hwmon_dev)) + return PTR_ERR(pmc->hwmon_dev); platform_set_drvdata(pdev, pmc); return 0; From 5106d7adb74bc6160806b45ffd2321b10ca14ee0 Mon Sep 17 00:00:00 2001 From: Thomas Reichinger Date: Thu, 30 Nov 2023 12:35:03 +0100 Subject: [PATCH 0816/1397] arcnet: restoring support for multiple Sohard Arcnet cards [ Upstream commit 6b17a597fc2f13aaaa0a2780eb7edb9ae7ac9aea ] Probe of Sohard Arcnet cards fails, if 2 or more cards are installed in a system. See kernel log: [ 2.759203] arcnet: arcnet loaded [ 2.763648] arcnet:com20020: COM20020 chipset support (by David Woodhouse et al.) [ 2.770585] arcnet:com20020_pci: COM20020 PCI support [ 2.772295] com20020 0000:02:00.0: enabling device (0000 -> 0003) [ 2.772354] (unnamed net_device) (uninitialized): PLX-PCI Controls ... [ 3.071301] com20020 0000:02:00.0 arc0-0 (uninitialized): PCI COM20020: station FFh found at F080h, IRQ 101. [ 3.071305] com20020 0000:02:00.0 arc0-0 (uninitialized): Using CKP 64 - data rate 2.5 Mb/s [ 3.071534] com20020 0000:07:00.0: enabling device (0000 -> 0003) [ 3.071581] (unnamed net_device) (uninitialized): PLX-PCI Controls ... [ 3.369501] com20020 0000:07:00.0: Led pci:green:tx:0-0 renamed to pci:green:tx:0-0_1 due to name collision [ 3.369535] com20020 0000:07:00.0: Led pci:red:recon:0-0 renamed to pci:red:recon:0-0_1 due to name collision [ 3.370586] com20020 0000:07:00.0 arc0-0 (uninitialized): PCI COM20020: station E1h found at C000h, IRQ 35. [ 3.370589] com20020 0000:07:00.0 arc0-0 (uninitialized): Using CKP 64 - data rate 2.5 Mb/s [ 3.370608] com20020: probe of 0000:07:00.0 failed with error -5 commit 5ef216c1f848 ("arcnet: com20020-pci: add rotary index support") changes the device name of all COM20020 based PCI cards, even if only some cards support this: snprintf(dev->name, sizeof(dev->name), "arc%d-%d", dev->dev_id, i); The error happens because all Sohard Arcnet cards would be called arc0-0, since the Sohard Arcnet cards don't have a PLX rotary coder. I.e. EAE Arcnet cards have a PLX rotary coder, which sets the first decimal, ensuring unique devices names. This patch adds two new card feature flags to indicate which cards support LEDs and the PLX rotary coder. For EAE based cards the names still depend on the PLX rotary coder (untested, since missing EAE hardware). For Sohard based cards, this patch will result in devices being called arc0, arc1, ... (tested). Signed-off-by: Thomas Reichinger Fixes: 5ef216c1f848 ("arcnet: com20020-pci: add rotary index support") Link: https://lore.kernel.org/r/20231130113503.6812-1-thomas.reichinger@sohard.de Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/arcnet/arcdevice.h | 2 + drivers/net/arcnet/com20020-pci.c | 89 ++++++++++++++++--------------- 2 files changed, 48 insertions(+), 43 deletions(-) diff --git a/drivers/net/arcnet/arcdevice.h b/drivers/net/arcnet/arcdevice.h index 19e996a829c9..b54275389f8a 100644 --- a/drivers/net/arcnet/arcdevice.h +++ b/drivers/net/arcnet/arcdevice.h @@ -186,6 +186,8 @@ do { \ #define ARC_IS_5MBIT 1 /* card default speed is 5MBit */ #define ARC_CAN_10MBIT 2 /* card uses COM20022, supporting 10MBit, but default is 2.5MBit. */ +#define ARC_HAS_LED 4 /* card has software controlled LEDs */ +#define ARC_HAS_ROTARY 8 /* card has rotary encoder */ /* information needed to define an encapsulation driver */ struct ArcProto { diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c index c580acb8b1d3..7b5c8bb02f11 100644 --- a/drivers/net/arcnet/com20020-pci.c +++ b/drivers/net/arcnet/com20020-pci.c @@ -213,12 +213,13 @@ static int com20020pci_probe(struct pci_dev *pdev, if (!strncmp(ci->name, "EAE PLX-PCI FB2", 15)) lp->backplane = 1; - /* Get the dev_id from the PLX rotary coder */ - if (!strncmp(ci->name, "EAE PLX-PCI MA1", 15)) - dev_id_mask = 0x3; - dev->dev_id = (inb(priv->misc + ci->rotary) >> 4) & dev_id_mask; - - snprintf(dev->name, sizeof(dev->name), "arc%d-%d", dev->dev_id, i); + if (ci->flags & ARC_HAS_ROTARY) { + /* Get the dev_id from the PLX rotary coder */ + if (!strncmp(ci->name, "EAE PLX-PCI MA1", 15)) + dev_id_mask = 0x3; + dev->dev_id = (inb(priv->misc + ci->rotary) >> 4) & dev_id_mask; + snprintf(dev->name, sizeof(dev->name), "arc%d-%d", dev->dev_id, i); + } if (arcnet_inb(ioaddr, COM20020_REG_R_STATUS) == 0xFF) { pr_err("IO address %Xh is empty!\n", ioaddr); @@ -230,6 +231,10 @@ static int com20020pci_probe(struct pci_dev *pdev, goto err_free_arcdev; } + ret = com20020_found(dev, IRQF_SHARED); + if (ret) + goto err_free_arcdev; + card = devm_kzalloc(&pdev->dev, sizeof(struct com20020_dev), GFP_KERNEL); if (!card) { @@ -239,41 +244,39 @@ static int com20020pci_probe(struct pci_dev *pdev, card->index = i; card->pci_priv = priv; - card->tx_led.brightness_set = led_tx_set; - card->tx_led.default_trigger = devm_kasprintf(&pdev->dev, - GFP_KERNEL, "arc%d-%d-tx", - dev->dev_id, i); - card->tx_led.name = devm_kasprintf(&pdev->dev, GFP_KERNEL, - "pci:green:tx:%d-%d", - dev->dev_id, i); - - card->tx_led.dev = &dev->dev; - card->recon_led.brightness_set = led_recon_set; - card->recon_led.default_trigger = devm_kasprintf(&pdev->dev, - GFP_KERNEL, "arc%d-%d-recon", - dev->dev_id, i); - card->recon_led.name = devm_kasprintf(&pdev->dev, GFP_KERNEL, - "pci:red:recon:%d-%d", - dev->dev_id, i); - card->recon_led.dev = &dev->dev; - card->dev = dev; - - ret = devm_led_classdev_register(&pdev->dev, &card->tx_led); - if (ret) - goto err_free_arcdev; - ret = devm_led_classdev_register(&pdev->dev, &card->recon_led); - if (ret) - goto err_free_arcdev; - - dev_set_drvdata(&dev->dev, card); - - ret = com20020_found(dev, IRQF_SHARED); - if (ret) - goto err_free_arcdev; - - devm_arcnet_led_init(dev, dev->dev_id, i); + if (ci->flags & ARC_HAS_LED) { + card->tx_led.brightness_set = led_tx_set; + card->tx_led.default_trigger = devm_kasprintf(&pdev->dev, + GFP_KERNEL, "arc%d-%d-tx", + dev->dev_id, i); + card->tx_led.name = devm_kasprintf(&pdev->dev, GFP_KERNEL, + "pci:green:tx:%d-%d", + dev->dev_id, i); + + card->tx_led.dev = &dev->dev; + card->recon_led.brightness_set = led_recon_set; + card->recon_led.default_trigger = devm_kasprintf(&pdev->dev, + GFP_KERNEL, "arc%d-%d-recon", + dev->dev_id, i); + card->recon_led.name = devm_kasprintf(&pdev->dev, GFP_KERNEL, + "pci:red:recon:%d-%d", + dev->dev_id, i); + card->recon_led.dev = &dev->dev; + + ret = devm_led_classdev_register(&pdev->dev, &card->tx_led); + if (ret) + goto err_free_arcdev; + + ret = devm_led_classdev_register(&pdev->dev, &card->recon_led); + if (ret) + goto err_free_arcdev; + + dev_set_drvdata(&dev->dev, card); + devm_arcnet_led_init(dev, dev->dev_id, i); + } + card->dev = dev; list_add(&card->list, &priv->list_dev); continue; @@ -329,7 +332,7 @@ static struct com20020_pci_card_info card_info_5mbit = { }; static struct com20020_pci_card_info card_info_sohard = { - .name = "PLX-PCI", + .name = "SOHARD SH ARC-PCI", .devcount = 1, /* SOHARD needs PCI base addr 4 */ .chan_map_tbl = { @@ -364,7 +367,7 @@ static struct com20020_pci_card_info card_info_eae_arc1 = { }, }, .rotary = 0x0, - .flags = ARC_CAN_10MBIT, + .flags = ARC_HAS_ROTARY | ARC_HAS_LED | ARC_CAN_10MBIT, }; static struct com20020_pci_card_info card_info_eae_ma1 = { @@ -396,7 +399,7 @@ static struct com20020_pci_card_info card_info_eae_ma1 = { }, }, .rotary = 0x0, - .flags = ARC_CAN_10MBIT, + .flags = ARC_HAS_ROTARY | ARC_HAS_LED | ARC_CAN_10MBIT, }; static struct com20020_pci_card_info card_info_eae_fb2 = { @@ -421,7 +424,7 @@ static struct com20020_pci_card_info card_info_eae_fb2 = { }, }, .rotary = 0x0, - .flags = ARC_CAN_10MBIT, + .flags = ARC_HAS_ROTARY | ARC_HAS_LED | ARC_CAN_10MBIT, }; static const struct pci_device_id com20020pci_id_table[] = { From 9cc9fbe5c02804bb83037599591b6bed09d325af Mon Sep 17 00:00:00 2001 From: Naveen Mamindlapalli Date: Fri, 1 Dec 2023 11:03:30 +0530 Subject: [PATCH 0817/1397] octeontx2-pf: consider both Rx and Tx packet stats for adaptive interrupt coalescing [ Upstream commit adbf100fc47001c93d7e513ecac6fd6e04d5b4a1 ] The current adaptive interrupt coalescing code updates only rx packet stats for dim algorithm. This patch also updates tx packet stats which will be useful when there is only tx traffic. Also moved configuring hardware adaptive interrupt setting to driver dim callback. Fixes: 6e144b47f560 ("octeontx2-pf: Add support for adaptive interrupt coalescing") Signed-off-by: Naveen Mamindlapalli Signed-off-by: Suman Ghosh Reviewed-by: Wojciech Drewek Link: https://lore.kernel.org/r/20231201053330.3903694-1-sumang@marvell.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../ethernet/marvell/octeontx2/nic/otx2_pf.c | 9 +++++++++ .../marvell/octeontx2/nic/otx2_txrx.c | 20 +++++++++---------- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 532e324bdcc8..0c17ebdda148 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1688,6 +1688,14 @@ static void otx2_do_set_rx_mode(struct otx2_nic *pf) mutex_unlock(&pf->mbox.lock); } +static void otx2_set_irq_coalesce(struct otx2_nic *pfvf) +{ + int cint; + + for (cint = 0; cint < pfvf->hw.cint_cnt; cint++) + otx2_config_irq_coalescing(pfvf, cint); +} + static void otx2_dim_work(struct work_struct *w) { struct dim_cq_moder cur_moder; @@ -1703,6 +1711,7 @@ static void otx2_dim_work(struct work_struct *w) CQ_TIMER_THRESH_MAX : cur_moder.usec; pfvf->hw.cq_ecount_wait = (cur_moder.pkts > NAPI_POLL_WEIGHT) ? NAPI_POLL_WEIGHT : cur_moder.pkts; + otx2_set_irq_coalesce(pfvf); dim->state = DIM_START_MEASURE; } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c index 6ee15f3c25ed..4d519ea833b2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c @@ -512,11 +512,18 @@ static void otx2_adjust_adaptive_coalese(struct otx2_nic *pfvf, struct otx2_cq_p { struct dim_sample dim_sample; u64 rx_frames, rx_bytes; + u64 tx_frames, tx_bytes; rx_frames = OTX2_GET_RX_STATS(RX_BCAST) + OTX2_GET_RX_STATS(RX_MCAST) + OTX2_GET_RX_STATS(RX_UCAST); rx_bytes = OTX2_GET_RX_STATS(RX_OCTS); - dim_update_sample(pfvf->napi_events, rx_frames, rx_bytes, &dim_sample); + tx_bytes = OTX2_GET_TX_STATS(TX_OCTS); + tx_frames = OTX2_GET_TX_STATS(TX_UCAST); + + dim_update_sample(pfvf->napi_events, + rx_frames + tx_frames, + rx_bytes + tx_bytes, + &dim_sample); net_dim(&cq_poll->dim, dim_sample); } @@ -558,16 +565,9 @@ int otx2_napi_handler(struct napi_struct *napi, int budget) if (pfvf->flags & OTX2_FLAG_INTF_DOWN) return workdone; - /* Check for adaptive interrupt coalesce */ - if (workdone != 0 && - ((pfvf->flags & OTX2_FLAG_ADPTV_INT_COAL_ENABLED) == - OTX2_FLAG_ADPTV_INT_COAL_ENABLED)) { - /* Adjust irq coalese using net_dim */ + /* Adjust irq coalese using net_dim */ + if (pfvf->flags & OTX2_FLAG_ADPTV_INT_COAL_ENABLED) otx2_adjust_adaptive_coalese(pfvf, cq_poll); - /* Update irq coalescing */ - for (i = 0; i < pfvf->hw.cint_cnt; i++) - otx2_config_irq_coalescing(pfvf, i); - } if (unlikely(!filled_cnt)) { struct refill_work *work; From e1fbdef91b1ceb3f98afc47fda725f92a6255b27 Mon Sep 17 00:00:00 2001 From: Jianheng Zhang Date: Fri, 1 Dec 2023 03:22:03 +0000 Subject: [PATCH 0818/1397] net: stmmac: fix FPE events losing [ Upstream commit 37e4b8df27bc68340f3fc80dbb27e3549c7f881c ] The status bits of register MAC_FPE_CTRL_STS are clear on read. Using 32-bit read for MAC_FPE_CTRL_STS in dwmac5_fpe_configure() and dwmac5_fpe_send_mpacket() clear the status bits. Then the stmmac interrupt handler missing FPE event status and leads to FPE handshaking failure and retries. To avoid clear status bits of MAC_FPE_CTRL_STS in dwmac5_fpe_configure() and dwmac5_fpe_send_mpacket(), add fpe_csr to stmmac_fpe_cfg structure to cache the control bits of MAC_FPE_CTRL_STS and to avoid reading MAC_FPE_CTRL_STS in those methods. Fixes: 5a5586112b92 ("net: stmmac: support FPE link partner hand-shaking procedure") Reviewed-by: Serge Semin Signed-off-by: Jianheng Zhang Link: https://lore.kernel.org/r/CY5PR12MB637225A7CF529D5BE0FBE59CBF81A@CY5PR12MB6372.namprd12.prod.outlook.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/dwmac5.c | 45 ++++++++----------- drivers/net/ethernet/stmicro/stmmac/dwmac5.h | 4 +- .../ethernet/stmicro/stmmac/dwxgmac2_core.c | 3 +- drivers/net/ethernet/stmicro/stmmac/hwif.h | 4 +- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 8 +++- .../net/ethernet/stmicro/stmmac/stmmac_tc.c | 1 + include/linux/stmmac.h | 1 + 7 files changed, 36 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c index e95d35f1e5a0..8fd167501fa0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c @@ -710,28 +710,22 @@ void dwmac5_est_irq_status(void __iomem *ioaddr, struct net_device *dev, } } -void dwmac5_fpe_configure(void __iomem *ioaddr, u32 num_txq, u32 num_rxq, +void dwmac5_fpe_configure(void __iomem *ioaddr, struct stmmac_fpe_cfg *cfg, + u32 num_txq, u32 num_rxq, bool enable) { u32 value; - if (!enable) { - value = readl(ioaddr + MAC_FPE_CTRL_STS); - - value &= ~EFPE; - - writel(value, ioaddr + MAC_FPE_CTRL_STS); - return; + if (enable) { + cfg->fpe_csr = EFPE; + value = readl(ioaddr + GMAC_RXQ_CTRL1); + value &= ~GMAC_RXQCTRL_FPRQ; + value |= (num_rxq - 1) << GMAC_RXQCTRL_FPRQ_SHIFT; + writel(value, ioaddr + GMAC_RXQ_CTRL1); + } else { + cfg->fpe_csr = 0; } - - value = readl(ioaddr + GMAC_RXQ_CTRL1); - value &= ~GMAC_RXQCTRL_FPRQ; - value |= (num_rxq - 1) << GMAC_RXQCTRL_FPRQ_SHIFT; - writel(value, ioaddr + GMAC_RXQ_CTRL1); - - value = readl(ioaddr + MAC_FPE_CTRL_STS); - value |= EFPE; - writel(value, ioaddr + MAC_FPE_CTRL_STS); + writel(cfg->fpe_csr, ioaddr + MAC_FPE_CTRL_STS); } int dwmac5_fpe_irq_status(void __iomem *ioaddr, struct net_device *dev) @@ -741,6 +735,9 @@ int dwmac5_fpe_irq_status(void __iomem *ioaddr, struct net_device *dev) status = FPE_EVENT_UNKNOWN; + /* Reads from the MAC_FPE_CTRL_STS register should only be performed + * here, since the status flags of MAC_FPE_CTRL_STS are "clear on read" + */ value = readl(ioaddr + MAC_FPE_CTRL_STS); if (value & TRSP) { @@ -766,19 +763,15 @@ int dwmac5_fpe_irq_status(void __iomem *ioaddr, struct net_device *dev) return status; } -void dwmac5_fpe_send_mpacket(void __iomem *ioaddr, enum stmmac_mpacket_type type) +void dwmac5_fpe_send_mpacket(void __iomem *ioaddr, struct stmmac_fpe_cfg *cfg, + enum stmmac_mpacket_type type) { - u32 value; + u32 value = cfg->fpe_csr; - value = readl(ioaddr + MAC_FPE_CTRL_STS); - - if (type == MPACKET_VERIFY) { - value &= ~SRSP; + if (type == MPACKET_VERIFY) value |= SVER; - } else { - value &= ~SVER; + else if (type == MPACKET_RESPONSE) value |= SRSP; - } writel(value, ioaddr + MAC_FPE_CTRL_STS); } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h index 53c138d0ff48..34e620790eb3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h @@ -153,9 +153,11 @@ int dwmac5_est_configure(void __iomem *ioaddr, struct stmmac_est *cfg, unsigned int ptp_rate); void dwmac5_est_irq_status(void __iomem *ioaddr, struct net_device *dev, struct stmmac_extra_stats *x, u32 txqcnt); -void dwmac5_fpe_configure(void __iomem *ioaddr, u32 num_txq, u32 num_rxq, +void dwmac5_fpe_configure(void __iomem *ioaddr, struct stmmac_fpe_cfg *cfg, + u32 num_txq, u32 num_rxq, bool enable); void dwmac5_fpe_send_mpacket(void __iomem *ioaddr, + struct stmmac_fpe_cfg *cfg, enum stmmac_mpacket_type type); int dwmac5_fpe_irq_status(void __iomem *ioaddr, struct net_device *dev); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index 453e88b75be0..a74e71db79f9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -1484,7 +1484,8 @@ static int dwxgmac3_est_configure(void __iomem *ioaddr, struct stmmac_est *cfg, return 0; } -static void dwxgmac3_fpe_configure(void __iomem *ioaddr, u32 num_txq, +static void dwxgmac3_fpe_configure(void __iomem *ioaddr, struct stmmac_fpe_cfg *cfg, + u32 num_txq, u32 num_rxq, bool enable) { u32 value; diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index b95d3e137813..68aa2d5ca6e5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -412,9 +412,11 @@ struct stmmac_ops { unsigned int ptp_rate); void (*est_irq_status)(void __iomem *ioaddr, struct net_device *dev, struct stmmac_extra_stats *x, u32 txqcnt); - void (*fpe_configure)(void __iomem *ioaddr, u32 num_txq, u32 num_rxq, + void (*fpe_configure)(void __iomem *ioaddr, struct stmmac_fpe_cfg *cfg, + u32 num_txq, u32 num_rxq, bool enable); void (*fpe_send_mpacket)(void __iomem *ioaddr, + struct stmmac_fpe_cfg *cfg, enum stmmac_mpacket_type type); int (*fpe_irq_status)(void __iomem *ioaddr, struct net_device *dev); }; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 1fa4da96c8f5..69b9c71f0ede 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -964,7 +964,8 @@ static void stmmac_fpe_link_state_handle(struct stmmac_priv *priv, bool is_up) bool *hs_enable = &fpe_cfg->hs_enable; if (is_up && *hs_enable) { - stmmac_fpe_send_mpacket(priv, priv->ioaddr, MPACKET_VERIFY); + stmmac_fpe_send_mpacket(priv, priv->ioaddr, fpe_cfg, + MPACKET_VERIFY); } else { *lo_state = FPE_STATE_OFF; *lp_state = FPE_STATE_OFF; @@ -5803,6 +5804,7 @@ static void stmmac_fpe_event_status(struct stmmac_priv *priv, int status) /* If user has requested FPE enable, quickly response */ if (*hs_enable) stmmac_fpe_send_mpacket(priv, priv->ioaddr, + fpe_cfg, MPACKET_RESPONSE); } @@ -7227,6 +7229,7 @@ static void stmmac_fpe_lp_task(struct work_struct *work) if (*lo_state == FPE_STATE_ENTERING_ON && *lp_state == FPE_STATE_ENTERING_ON) { stmmac_fpe_configure(priv, priv->ioaddr, + fpe_cfg, priv->plat->tx_queues_to_use, priv->plat->rx_queues_to_use, *enable); @@ -7245,6 +7248,7 @@ static void stmmac_fpe_lp_task(struct work_struct *work) netdev_info(priv->dev, SEND_VERIFY_MPAKCET_FMT, *lo_state, *lp_state); stmmac_fpe_send_mpacket(priv, priv->ioaddr, + fpe_cfg, MPACKET_VERIFY); } /* Sleep then retry */ @@ -7259,6 +7263,7 @@ void stmmac_fpe_handshake(struct stmmac_priv *priv, bool enable) if (priv->plat->fpe_cfg->hs_enable != enable) { if (enable) { stmmac_fpe_send_mpacket(priv, priv->ioaddr, + priv->plat->fpe_cfg, MPACKET_VERIFY); } else { priv->plat->fpe_cfg->lo_fpe_state = FPE_STATE_OFF; @@ -7719,6 +7724,7 @@ int stmmac_suspend(struct device *dev) if (priv->dma_cap.fpesel) { /* Disable FPE */ stmmac_fpe_configure(priv, priv->ioaddr, + priv->plat->fpe_cfg, priv->plat->tx_queues_to_use, priv->plat->rx_queues_to_use, false); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index ac41ef4cbd2f..6ad3e0a11936 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -1079,6 +1079,7 @@ static int tc_setup_taprio(struct stmmac_priv *priv, priv->plat->fpe_cfg->enable = false; stmmac_fpe_configure(priv, priv->ioaddr, + priv->plat->fpe_cfg, priv->plat->tx_queues_to_use, priv->plat->rx_queues_to_use, false); diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index ce89cc3e4913..e3f7ee169c08 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -174,6 +174,7 @@ struct stmmac_fpe_cfg { bool hs_enable; /* FPE handshake enable */ enum stmmac_fpe_state lp_fpe_state; /* Link Partner FPE state */ enum stmmac_fpe_state lo_fpe_state; /* Local station FPE state */ + u32 fpe_csr; /* MAC_FPE_CTRL_STS reg cache */ }; struct stmmac_safety_feature_cfg { From d7666bfd788418af70da8672fbe6a763d8868e20 Mon Sep 17 00:00:00 2001 From: Yewon Choi Date: Fri, 1 Dec 2023 15:10:52 +0900 Subject: [PATCH 0819/1397] xsk: Skip polling event check for unbound socket [ Upstream commit e4d008d49a7135214e0ee70537405b6a069e3a3f ] In xsk_poll(), checking available events and setting mask bits should be executed only when a socket has been bound. Setting mask bits for unbound socket is meaningless. Currently, it checks events even when xsk_check_common() failed. To prevent this, we move goto location (skip_tx) after that checking. Fixes: 1596dae2f17e ("xsk: check IFF_UP earlier in Tx path") Signed-off-by: Yewon Choi Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/20231201061048.GA1510@libra05 Signed-off-by: Sasha Levin --- net/xdp/xsk.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 55f8b9b0e06d..3515e19852d8 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -919,7 +919,7 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock, rcu_read_lock(); if (xsk_check_common(xs)) - goto skip_tx; + goto out; pool = xs->pool; @@ -931,12 +931,11 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock, xsk_generic_xmit(sk); } -skip_tx: if (xs->rx && !xskq_prod_is_empty(xs->rx)) mask |= EPOLLIN | EPOLLRDNORM; if (xs->tx && xsk_tx_writeable(xs)) mask |= EPOLLOUT | EPOLLWRNORM; - +out: rcu_read_unlock(); return mask; } From 33755da7ae2b47cc1f449ee29da3c7a52c7db600 Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Sat, 2 Dec 2023 17:59:02 +0800 Subject: [PATCH 0820/1397] octeontx2-af: fix a use-after-free in rvu_npa_register_reporters [ Upstream commit 3c91c909f13f0c32b0d54d75c3f798479b1a84f5 ] The rvu_dl will be freed in rvu_npa_health_reporters_destroy(rvu_dl) after the create_workqueue fails, and after that free, the rvu_dl will be translate back through rvu_npa_health_reporters_create, rvu_health_reporters_create, and rvu_register_dl. Finally it goes to the err_dl_health label, being freed again in rvu_health_reporters_destroy(rvu) by rvu_npa_health_reporters_destroy. In the second calls of rvu_npa_health_reporters_destroy, however, it uses rvu_dl->rvu_npa_health_reporter, which is already freed at the end of rvu_npa_health_reporters_destroy in the first call. So this patch prevents the first destroy by instantly returning -ENONMEN when create_workqueue fails. In addition, since the failure of create_workqueue is the only entrence of label err, it has been integrated into the error-handling path of create_workqueue. Fixes: f1168d1e207c ("octeontx2-af: Add devlink health reporters for NPA") Signed-off-by: Zhipeng Lu Acked-by: Paolo Abeni Acked-by: Geethasowjanya Akula Link: https://lore.kernel.org/r/20231202095902.3264863-1-alexious@zju.edu.cn Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c index 41df5ac23f92..058f75dc4c8a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c @@ -1285,7 +1285,7 @@ static int rvu_npa_register_reporters(struct rvu_devlink *rvu_dl) rvu_dl->devlink_wq = create_workqueue("rvu_devlink_wq"); if (!rvu_dl->devlink_wq) - goto err; + return -ENOMEM; INIT_WORK(&rvu_reporters->intr_work, rvu_npa_intr_work); INIT_WORK(&rvu_reporters->err_work, rvu_npa_err_work); @@ -1293,9 +1293,6 @@ static int rvu_npa_register_reporters(struct rvu_devlink *rvu_dl) INIT_WORK(&rvu_reporters->ras_work, rvu_npa_ras_work); return 0; -err: - rvu_npa_health_reporters_destroy(rvu_dl); - return -ENOMEM; } static int rvu_npa_health_reporters_create(struct rvu_devlink *rvu_dl) From 2442e9cbb79c8336de06423193b9e9adb662665c Mon Sep 17 00:00:00 2001 From: Marcin Szycik Date: Tue, 7 Nov 2023 14:51:38 +0100 Subject: [PATCH 0821/1397] ice: Restore fix disabling RX VLAN filtering [ Upstream commit 4e7f0087b058cc3cab8f3c32141b51aa5457d298 ] Fix setting dis_rx_filtering depending on whether port vlan is being turned on or off. This was originally fixed in commit c793f8ea15e3 ("ice: Fix disabling Rx VLAN filtering with port VLAN enabled"), but while refactoring ice_vf_vsi_init_vlan_ops(), the fix has been lost. Restore the fix along with the original comment from that change. Also delete duplicate lines in ice_port_vlan_on(). Fixes: 2946204b3fa8 ("ice: implement bridge port vlan") Reviewed-by: Wojciech Drewek Signed-off-by: Marcin Szycik Reviewed-by: Simon Horman Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c index d7b10dc67f03..80dc4bcdd3a4 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c @@ -32,7 +32,6 @@ static void ice_port_vlan_on(struct ice_vsi *vsi) /* setup outer VLAN ops */ vlan_ops->set_port_vlan = ice_vsi_set_outer_port_vlan; vlan_ops->clear_port_vlan = ice_vsi_clear_outer_port_vlan; - vlan_ops->clear_port_vlan = ice_vsi_clear_outer_port_vlan; /* setup inner VLAN ops */ vlan_ops = &vsi->inner_vlan_ops; @@ -47,8 +46,13 @@ static void ice_port_vlan_on(struct ice_vsi *vsi) vlan_ops->set_port_vlan = ice_vsi_set_inner_port_vlan; vlan_ops->clear_port_vlan = ice_vsi_clear_inner_port_vlan; - vlan_ops->clear_port_vlan = ice_vsi_clear_inner_port_vlan; } + + /* all Rx traffic should be in the domain of the assigned port VLAN, + * so prevent disabling Rx VLAN filtering + */ + vlan_ops->dis_rx_filtering = noop_vlan; + vlan_ops->ena_rx_filtering = ice_vsi_ena_rx_vlan_filtering; } @@ -77,6 +81,8 @@ static void ice_port_vlan_off(struct ice_vsi *vsi) vlan_ops->del_vlan = ice_vsi_del_vlan; } + vlan_ops->dis_rx_filtering = ice_vsi_dis_rx_vlan_filtering; + if (!test_bit(ICE_FLAG_VF_VLAN_PRUNING, pf->flags)) vlan_ops->ena_rx_filtering = noop_vlan; else @@ -141,7 +147,6 @@ void ice_vf_vsi_init_vlan_ops(struct ice_vsi *vsi) &vsi->outer_vlan_ops : &vsi->inner_vlan_ops; vlan_ops->add_vlan = ice_vsi_add_vlan; - vlan_ops->dis_rx_filtering = ice_vsi_dis_rx_vlan_filtering; vlan_ops->ena_tx_filtering = ice_vsi_ena_tx_vlan_filtering; vlan_ops->dis_tx_filtering = ice_vsi_dis_tx_vlan_filtering; } From ab3a730feab91750448d886d06a41a013bd259e1 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Fri, 10 Nov 2023 09:12:09 +0100 Subject: [PATCH 0822/1397] i40e: Fix unexpected MFS warning message [ Upstream commit 7d9f22b3d3ef379ed05bd3f3e2de83dfa8da8258 ] Commit 3a2c6ced90e1 ("i40e: Add a check to see if MFS is set") added a warning message that reports unexpected size of port's MFS (max frame size) value. This message use for the port number local variable 'i' that is wrong. In i40e_probe() this 'i' variable is used only to iterate VSIs to find FDIR VSI: ... /* if FDIR VSI was set up, start it now */ for (i = 0; i < pf->num_alloc_vsi; i++) { if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR) { i40e_vsi_open(pf->vsi[i]); break; } } ... So the warning message use for the port number index of FDIR VSI if this exists or pf->num_alloc_vsi if not. Fix the message by using 'pf->hw.port' for the port number. Fixes: 3a2c6ced90e1 ("i40e: Add a check to see if MFS is set") Signed-off-by: Ivan Vecera Reviewed-by: Simon Horman Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 00ca2b88165c..a9f5a8a7d3f0 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -16195,7 +16195,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) I40E_PRTGL_SAH_MFS_MASK) >> I40E_PRTGL_SAH_MFS_SHIFT; if (val < MAX_FRAME_SIZE_DEFAULT) dev_warn(&pdev->dev, "MFS for port %x has been set below the default: %x\n", - i, val); + pf->hw.port, val); /* Add a filter to drop all Flow control frames from any VSI from being * transmitted. By doing so we stop a malicious VF from sending out From d88c7cf589ded96e2e25b69b2696f993c9417835 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 27 Nov 2023 15:33:50 -0800 Subject: [PATCH 0823/1397] iavf: validate tx_coalesce_usecs even if rx_coalesce_usecs is zero [ Upstream commit a206d9959f5ccd0fb2d54a997c993947ae0e881c ] In __iavf_set_coalesce, the driver checks both ec->rx_coalesce_usecs and ec->tx_coalesce_usecs for validity. It does this via a chain if if/else-if blocks. If every single branch of the series of if statements exited, this would be fine. However, the rx_coalesce_usecs is checked against zero to print an informative message if use_adaptive_rx_coalesce is enabled. If this check is true, it short circuits the entire chain of statements, preventing validation of the tx_coalesce_usecs field. Indeed, since commit e792779e6b63 ("iavf: Prevent changing static ITR values if adaptive moderation is on") the iavf driver actually rejects any change to the tx_coalesce_usecs or rx_coalesce_usecs when use_adaptive_tx_coalesce or use_adaptive_rx_coalesce is enabled, making this checking a bit redundant. Fix this error by removing the unnecessary and redundant checks for use_adaptive_rx_coalesce and use_adaptive_tx_coalesce. Since zero is a valid value, and since the tx_coalesce_usecs and rx_coalesce_usecs fields are already unsigned, remove the minimum value check. This allows assigning an ITR value ranging from 0-8160 as described by the printed message. Fixes: 65e87c0398f5 ("i40evf: support queue-specific settings for interrupt moderation") Signed-off-by: Jacob Keller Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/iavf/iavf_ethtool.c | 12 ++---------- drivers/net/ethernet/intel/iavf/iavf_txrx.h | 1 - 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index 90397293525f..1b412754aa42 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -829,18 +829,10 @@ static int __iavf_set_coalesce(struct net_device *netdev, struct iavf_adapter *adapter = netdev_priv(netdev); int i; - if (ec->rx_coalesce_usecs == 0) { - if (ec->use_adaptive_rx_coalesce) - netif_info(adapter, drv, netdev, "rx-usecs=0, need to disable adaptive-rx for a complete disable\n"); - } else if ((ec->rx_coalesce_usecs < IAVF_MIN_ITR) || - (ec->rx_coalesce_usecs > IAVF_MAX_ITR)) { + if (ec->rx_coalesce_usecs > IAVF_MAX_ITR) { netif_info(adapter, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n"); return -EINVAL; - } else if (ec->tx_coalesce_usecs == 0) { - if (ec->use_adaptive_tx_coalesce) - netif_info(adapter, drv, netdev, "tx-usecs=0, need to disable adaptive-tx for a complete disable\n"); - } else if ((ec->tx_coalesce_usecs < IAVF_MIN_ITR) || - (ec->tx_coalesce_usecs > IAVF_MAX_ITR)) { + } else if (ec->tx_coalesce_usecs > IAVF_MAX_ITR) { netif_info(adapter, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n"); return -EINVAL; } diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.h b/drivers/net/ethernet/intel/iavf/iavf_txrx.h index 7e6ee32d19b6..10ba36602c0c 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.h +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.h @@ -15,7 +15,6 @@ */ #define IAVF_ITR_DYNAMIC 0x8000 /* use top bit as a flag */ #define IAVF_ITR_MASK 0x1FFE /* mask for ITR register value */ -#define IAVF_MIN_ITR 2 /* reg uses 2 usec resolution */ #define IAVF_ITR_100K 10 /* all values below must be even */ #define IAVF_ITR_50K 20 #define IAVF_ITR_20K 50 From dc903ddc826f020baf193d8befb6c822baa530e3 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Mon, 4 Dec 2023 10:40:04 +0800 Subject: [PATCH 0824/1397] net: bnxt: fix a potential use-after-free in bnxt_init_tc [ Upstream commit d007caaaf052f82ca2340d4c7b32d04a3f5dbf3f ] When flow_indr_dev_register() fails, bnxt_init_tc will free bp->tc_info through kfree(). However, the caller function bnxt_init_one() will ignore this failure and call bnxt_shutdown_tc() on failure of bnxt_dl_register(), where a use-after-free happens. Fix this issue by setting bp->tc_info to NULL after kfree(). Fixes: 627c89d00fb9 ("bnxt_en: flow_offload: offload tunnel decap rules via indirect callbacks") Signed-off-by: Dinghao Liu Reviewed-by: Pavan Chebbi Reviewed-by: Michael Chan Reviewed-by: Somnath Kotur Link: https://lore.kernel.org/r/20231204024004.8245-1-dinghao.liu@zju.edu.cn Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 38d89d80b4a9..273c9ba48f09 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -2075,6 +2075,7 @@ int bnxt_init_tc(struct bnxt *bp) rhashtable_destroy(&tc_info->flow_table); free_tc_info: kfree(tc_info); + bp->tc_info = NULL; return rc; } From e4a2a4328c85f1830c8caf5bc8fa566057404fe6 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 4 Dec 2023 17:08:05 +0100 Subject: [PATCH 0825/1397] tcp: fix mid stream window clamp. [ Upstream commit 58d3aade20cdddbac6c9707ac0f3f5f8c1278b74 ] After the blamed commit below, if the user-space application performs window clamping when tp->rcv_wnd is 0, the TCP socket will never be able to announce a non 0 receive window, even after completely emptying the receive buffer and re-setting the window clamp to higher values. Refactor tcp_set_window_clamp() to address the issue: when the user decreases the current clamp value, set rcv_ssthresh according to the same logic used at buffer initialization, but ensuring reserved mem provisioning. To avoid code duplication factor-out the relevant bits from tcp_adjust_rcv_ssthresh() in a new helper and reuse it in the above scenario. When increasing the clamp value, give the rcv_ssthresh a chance to grow according to previously implemented heuristic. Fixes: 3aa7857fe1d7 ("tcp: enable mid stream window clamp") Reported-by: David Gibson Reported-by: Stefano Brivio Signed-off-by: Paolo Abeni Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/705dad54e6e6e9a010e571bf58e0b35a8ae70503.1701706073.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/net/tcp.h | 9 +++++++-- net/ipv4/tcp.c | 22 +++++++++++++++++++--- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 0239e815edf7..a88bf8f6db23 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1480,17 +1480,22 @@ static inline int tcp_full_space(const struct sock *sk) return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf)); } -static inline void tcp_adjust_rcv_ssthresh(struct sock *sk) +static inline void __tcp_adjust_rcv_ssthresh(struct sock *sk, u32 new_ssthresh) { int unused_mem = sk_unused_reserved_mem(sk); struct tcp_sock *tp = tcp_sk(sk); - tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); + tp->rcv_ssthresh = min(tp->rcv_ssthresh, new_ssthresh); if (unused_mem) tp->rcv_ssthresh = max_t(u32, tp->rcv_ssthresh, tcp_win_from_space(sk, unused_mem)); } +static inline void tcp_adjust_rcv_ssthresh(struct sock *sk) +{ + __tcp_adjust_rcv_ssthresh(sk, 4U * tcp_sk(sk)->advmss); +} + void tcp_cleanup_rbuf(struct sock *sk, int copied); void __tcp_cleanup_rbuf(struct sock *sk, int copied); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3d3a24f79573..ec46d74c2093 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3368,9 +3368,25 @@ int tcp_set_window_clamp(struct sock *sk, int val) return -EINVAL; tp->window_clamp = 0; } else { - tp->window_clamp = val < SOCK_MIN_RCVBUF / 2 ? - SOCK_MIN_RCVBUF / 2 : val; - tp->rcv_ssthresh = min(tp->rcv_wnd, tp->window_clamp); + u32 new_rcv_ssthresh, old_window_clamp = tp->window_clamp; + u32 new_window_clamp = val < SOCK_MIN_RCVBUF / 2 ? + SOCK_MIN_RCVBUF / 2 : val; + + if (new_window_clamp == old_window_clamp) + return 0; + + tp->window_clamp = new_window_clamp; + if (new_window_clamp < old_window_clamp) { + /* need to apply the reserved mem provisioning only + * when shrinking the window clamp + */ + __tcp_adjust_rcv_ssthresh(sk, tp->window_clamp); + + } else { + new_rcv_ssthresh = min(tp->rcv_wnd, tp->window_clamp); + tp->rcv_ssthresh = max(new_rcv_ssthresh, + tp->rcv_ssthresh); + } } return 0; } From 4dacbf99cebfa9a3736112d6ab103563b99a8515 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Mon, 4 Dec 2023 11:22:33 -0800 Subject: [PATCH 0826/1397] ionic: fix snprintf format length warning [ Upstream commit 0ceb3860a67652f9d36dfdecfcd2cb3eb2f4537d ] Our friendly kernel test robot has reminded us that with a new check we have a warning about a potential string truncation. In this case it really doesn't hurt anything, but it is worth addressing especially since there really is no reason to reserve so many bytes for our queue names. It seems that cutting the queue name buffer length in half stops the complaint. Fixes: c06107cabea3 ("ionic: more ionic name tweaks") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202311300201.lO8v7mKU-lkp@intel.com/ Signed-off-by: Shannon Nelson Reviewed-by: Brett Creeley Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20231204192234.21017-2-shannon.nelson@amd.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/pensando/ionic/ionic_dev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h index aae4131f146a..bd2d4a26f543 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h @@ -222,7 +222,7 @@ struct ionic_desc_info { void *cb_arg; }; -#define IONIC_QUEUE_NAME_MAX_SZ 32 +#define IONIC_QUEUE_NAME_MAX_SZ 16 struct ionic_queue { struct device *dev; From 77c02d10c4077256f4bbbe4dd73e277acf7d9235 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Mon, 4 Dec 2023 11:22:34 -0800 Subject: [PATCH 0827/1397] ionic: Fix dim work handling in split interrupt mode [ Upstream commit 4115ba677c35f694b62298e55f0e04ce84eed469 ] Currently ionic_dim_work() is incorrect when in split interrupt mode. This is because the interrupt rate is only being changed for the Rx side even for dim running on Tx. Fix this by using the qcq from the container_of macro. Also, introduce some local variables for a bit of cleanup. Fixes: a6ff85e0a2d9 ("ionic: remove intr coalesce update from napi") Signed-off-by: Brett Creeley Signed-off-by: Shannon Nelson Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20231204192234.21017-3-shannon.nelson@amd.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 2c3e36b2dd7f..c9bd2c57a37d 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -49,24 +49,24 @@ static void ionic_lif_queue_identify(struct ionic_lif *lif); static void ionic_dim_work(struct work_struct *work) { struct dim *dim = container_of(work, struct dim, work); + struct ionic_intr_info *intr; struct dim_cq_moder cur_moder; struct ionic_qcq *qcq; + struct ionic_lif *lif; u32 new_coal; cur_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix); qcq = container_of(dim, struct ionic_qcq, dim); - new_coal = ionic_coal_usec_to_hw(qcq->q.lif->ionic, cur_moder.usec); + lif = qcq->q.lif; + new_coal = ionic_coal_usec_to_hw(lif->ionic, cur_moder.usec); new_coal = new_coal ? new_coal : 1; - if (qcq->intr.dim_coal_hw != new_coal) { - unsigned int qi = qcq->cq.bound_q->index; - struct ionic_lif *lif = qcq->q.lif; - - qcq->intr.dim_coal_hw = new_coal; + intr = &qcq->intr; + if (intr->dim_coal_hw != new_coal) { + intr->dim_coal_hw = new_coal; ionic_intr_coal_init(lif->ionic->idev.intr_ctrl, - lif->rxqcqs[qi]->intr.index, - qcq->intr.dim_coal_hw); + intr->index, intr->dim_coal_hw); } dim->state = DIM_START_MEASURE; From 08e3e3da6c39e74b2789e9dfb8a2d734c7885ecd Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Sun, 3 Dec 2023 01:14:41 +0900 Subject: [PATCH 0828/1397] ipv4: ip_gre: Avoid skb_pull() failure in ipgre_xmit() [ Upstream commit 80d875cfc9d3711a029f234ef7d680db79e8fa4b ] In ipgre_xmit(), skb_pull() may fail even if pskb_inet_may_pull() returns true. For example, applications can use PF_PACKET to create a malformed packet with no IP header. This type of packet causes a problem such as uninit-value access. This patch ensures that skb_pull() can pull the required size by checking the skb with pskb_network_may_pull() before skb_pull(). Fixes: c54419321455 ("GRE: Refactor GRE tunneling code.") Signed-off-by: Shigeru Yoshida Reviewed-by: Eric Dumazet Reviewed-by: Suman Ghosh Link: https://lore.kernel.org/r/20231202161441.221135-1-syoshida@redhat.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/ipv4/ip_gre.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 22a26d1d29a0..5169c3c72cff 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -635,15 +635,18 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb, } if (dev->header_ops) { + int pull_len = tunnel->hlen + sizeof(struct iphdr); + if (skb_cow_head(skb, 0)) goto free_skb; tnl_params = (const struct iphdr *)skb->data; - /* Pull skb since ip_tunnel_xmit() needs skb->data pointing - * to gre header. - */ - skb_pull(skb, tunnel->hlen + sizeof(struct iphdr)); + if (!pskb_network_may_pull(skb, pull_len)) + goto free_skb; + + /* ip_tunnel_xmit() needs skb->data pointing to gre header. */ + skb_pull(skb, pull_len); skb_reset_mac_header(skb); if (skb->ip_summed == CHECKSUM_PARTIAL && From e30ad4729c57b66bb2c8a73abce9618df2bd94b1 Mon Sep 17 00:00:00 2001 From: Daniil Maximov Date: Mon, 4 Dec 2023 11:58:10 +0300 Subject: [PATCH 0829/1397] net: atlantic: Fix NULL dereference of skb pointer in [ Upstream commit cbe860be36095e68e4e5561ab43610982fb429fd ] If is_ptp_ring == true in the loop of __aq_ring_xdp_clean function, then a timestamp is stored from a packet in a field of skb object, which is not allocated at the moment of the call (skb == NULL). Generalize aq_ptp_extract_ts and other affected functions so they don't work with struct sk_buff*, but with struct skb_shared_hwtstamps*. Found by Linux Verification Center (linuxtesting.org) with SVACE Fixes: 26efaef759a1 ("net: atlantic: Implement xdp data plane") Signed-off-by: Daniil Maximov Reviewed-by: Igor Russkikh Link: https://lore.kernel.org/r/20231204085810.1681386-1-daniil31415it@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- .../net/ethernet/aquantia/atlantic/aq_ptp.c | 10 +++++----- .../net/ethernet/aquantia/atlantic/aq_ptp.h | 4 ++-- .../net/ethernet/aquantia/atlantic/aq_ring.c | 18 ++++++++++++------ 3 files changed, 19 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c index 80b44043e6c5..28c9b6f1a54f 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c @@ -553,17 +553,17 @@ void aq_ptp_tx_hwtstamp(struct aq_nic_s *aq_nic, u64 timestamp) /* aq_ptp_rx_hwtstamp - utility function which checks for RX time stamp * @adapter: pointer to adapter struct - * @skb: particular skb to send timestamp with + * @shhwtstamps: particular skb_shared_hwtstamps to save timestamp * * if the timestamp is valid, we convert it into the timecounter ns * value, then store that result into the hwtstamps structure which * is passed up the network stack */ -static void aq_ptp_rx_hwtstamp(struct aq_ptp_s *aq_ptp, struct sk_buff *skb, +static void aq_ptp_rx_hwtstamp(struct aq_ptp_s *aq_ptp, struct skb_shared_hwtstamps *shhwtstamps, u64 timestamp) { timestamp -= atomic_read(&aq_ptp->offset_ingress); - aq_ptp_convert_to_hwtstamp(aq_ptp, skb_hwtstamps(skb), timestamp); + aq_ptp_convert_to_hwtstamp(aq_ptp, shhwtstamps, timestamp); } void aq_ptp_hwtstamp_config_get(struct aq_ptp_s *aq_ptp, @@ -639,7 +639,7 @@ bool aq_ptp_ring(struct aq_nic_s *aq_nic, struct aq_ring_s *ring) &aq_ptp->ptp_rx == ring || &aq_ptp->hwts_rx == ring; } -u16 aq_ptp_extract_ts(struct aq_nic_s *aq_nic, struct sk_buff *skb, u8 *p, +u16 aq_ptp_extract_ts(struct aq_nic_s *aq_nic, struct skb_shared_hwtstamps *shhwtstamps, u8 *p, unsigned int len) { struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp; @@ -648,7 +648,7 @@ u16 aq_ptp_extract_ts(struct aq_nic_s *aq_nic, struct sk_buff *skb, u8 *p, p, len, ×tamp); if (ret > 0) - aq_ptp_rx_hwtstamp(aq_ptp, skb, timestamp); + aq_ptp_rx_hwtstamp(aq_ptp, shhwtstamps, timestamp); return ret; } diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.h b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.h index 28ccb7ca2df9..210b723f2207 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.h @@ -67,7 +67,7 @@ int aq_ptp_hwtstamp_config_set(struct aq_ptp_s *aq_ptp, /* Return either ring is belong to PTP or not*/ bool aq_ptp_ring(struct aq_nic_s *aq_nic, struct aq_ring_s *ring); -u16 aq_ptp_extract_ts(struct aq_nic_s *aq_nic, struct sk_buff *skb, u8 *p, +u16 aq_ptp_extract_ts(struct aq_nic_s *aq_nic, struct skb_shared_hwtstamps *shhwtstamps, u8 *p, unsigned int len); struct ptp_clock *aq_ptp_get_ptp_clock(struct aq_ptp_s *aq_ptp); @@ -143,7 +143,7 @@ static inline bool aq_ptp_ring(struct aq_nic_s *aq_nic, struct aq_ring_s *ring) } static inline u16 aq_ptp_extract_ts(struct aq_nic_s *aq_nic, - struct sk_buff *skb, u8 *p, + struct skb_shared_hwtstamps *shhwtstamps, u8 *p, unsigned int len) { return 0; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 4de22eed099a..694daeaf3e61 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -647,7 +647,7 @@ static int __aq_ring_rx_clean(struct aq_ring_s *self, struct napi_struct *napi, } if (is_ptp_ring) buff->len -= - aq_ptp_extract_ts(self->aq_nic, skb, + aq_ptp_extract_ts(self->aq_nic, skb_hwtstamps(skb), aq_buf_vaddr(&buff->rxdata), buff->len); @@ -742,6 +742,8 @@ static int __aq_ring_xdp_clean(struct aq_ring_s *rx_ring, struct aq_ring_buff_s *buff = &rx_ring->buff_ring[rx_ring->sw_head]; bool is_ptp_ring = aq_ptp_ring(rx_ring->aq_nic, rx_ring); struct aq_ring_buff_s *buff_ = NULL; + u16 ptp_hwtstamp_len = 0; + struct skb_shared_hwtstamps shhwtstamps; struct sk_buff *skb = NULL; unsigned int next_ = 0U; struct xdp_buff xdp; @@ -810,11 +812,12 @@ static int __aq_ring_xdp_clean(struct aq_ring_s *rx_ring, hard_start = page_address(buff->rxdata.page) + buff->rxdata.pg_off - rx_ring->page_offset; - if (is_ptp_ring) - buff->len -= - aq_ptp_extract_ts(rx_ring->aq_nic, skb, - aq_buf_vaddr(&buff->rxdata), - buff->len); + if (is_ptp_ring) { + ptp_hwtstamp_len = aq_ptp_extract_ts(rx_ring->aq_nic, &shhwtstamps, + aq_buf_vaddr(&buff->rxdata), + buff->len); + buff->len -= ptp_hwtstamp_len; + } xdp_init_buff(&xdp, frame_sz, &rx_ring->xdp_rxq); xdp_prepare_buff(&xdp, hard_start, rx_ring->page_offset, @@ -834,6 +837,9 @@ static int __aq_ring_xdp_clean(struct aq_ring_s *rx_ring, if (IS_ERR(skb) || !skb) continue; + if (ptp_hwtstamp_len > 0) + *skb_hwtstamps(skb) = shhwtstamps; + if (buff->is_vlan) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), buff->vlan_rx_tag); From 613b5db4c55f68930ead209670dd53ca0c4dfaf4 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Mon, 4 Dec 2023 22:32:31 +0800 Subject: [PATCH 0830/1397] net: hns: fix wrong head when modify the tx feature when sending packets [ Upstream commit 84757d0839451b20b11e993128f0a77393ca50c1 ] Upon changing the tx feature, the hns driver will modify the maybe_stop_tx() and fill_desc() functions, if the modify happens during packet sending, will cause the hardware and software pointers do not match, and the port can not work anymore. This patch deletes the maybe_stop_tx() and fill_desc() functions modification when setting tx feature, and use the skb_is_gro() to determine which functions to use in the tx path. Fixes: 38f616da1c28 ("net:hns: Add support of ethtool TSO set option for Hip06 in HNS") Signed-off-by: Yonglong Liu Signed-off-by: Jijie Shao Reviewed-by: Wojciech Drewek Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 53 +++++++++++-------- drivers/net/ethernet/hisilicon/hns/hns_enet.h | 3 +- 2 files changed, 33 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 7cf10d1e2b31..85722afe2177 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -142,7 +142,8 @@ MODULE_DEVICE_TABLE(acpi, hns_enet_acpi_match); static void fill_desc(struct hnae_ring *ring, void *priv, int size, dma_addr_t dma, int frag_end, - int buf_num, enum hns_desc_type type, int mtu) + int buf_num, enum hns_desc_type type, int mtu, + bool is_gso) { struct hnae_desc *desc = &ring->desc[ring->next_to_use]; struct hnae_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use]; @@ -275,6 +276,15 @@ static int hns_nic_maybe_stop_tso( return 0; } +static int hns_nic_maybe_stop_tx_v2(struct sk_buff **out_skb, int *bnum, + struct hnae_ring *ring) +{ + if (skb_is_gso(*out_skb)) + return hns_nic_maybe_stop_tso(out_skb, bnum, ring); + else + return hns_nic_maybe_stop_tx(out_skb, bnum, ring); +} + static void fill_tso_desc(struct hnae_ring *ring, void *priv, int size, dma_addr_t dma, int frag_end, int buf_num, enum hns_desc_type type, int mtu) @@ -300,6 +310,19 @@ static void fill_tso_desc(struct hnae_ring *ring, void *priv, mtu); } +static void fill_desc_v2(struct hnae_ring *ring, void *priv, + int size, dma_addr_t dma, int frag_end, + int buf_num, enum hns_desc_type type, int mtu, + bool is_gso) +{ + if (is_gso) + fill_tso_desc(ring, priv, size, dma, frag_end, buf_num, type, + mtu); + else + fill_v2_desc(ring, priv, size, dma, frag_end, buf_num, type, + mtu); +} + netdev_tx_t hns_nic_net_xmit_hw(struct net_device *ndev, struct sk_buff *skb, struct hns_nic_ring_data *ring_data) @@ -313,6 +336,7 @@ netdev_tx_t hns_nic_net_xmit_hw(struct net_device *ndev, int seg_num; dma_addr_t dma; int size, next_to_use; + bool is_gso; int i; switch (priv->ops.maybe_stop_tx(&skb, &buf_num, ring)) { @@ -339,8 +363,9 @@ netdev_tx_t hns_nic_net_xmit_hw(struct net_device *ndev, ring->stats.sw_err_cnt++; goto out_err_tx_ok; } + is_gso = skb_is_gso(skb); priv->ops.fill_desc(ring, skb, size, dma, seg_num == 1 ? 1 : 0, - buf_num, DESC_TYPE_SKB, ndev->mtu); + buf_num, DESC_TYPE_SKB, ndev->mtu, is_gso); /* fill the fragments */ for (i = 1; i < seg_num; i++) { @@ -354,7 +379,7 @@ netdev_tx_t hns_nic_net_xmit_hw(struct net_device *ndev, } priv->ops.fill_desc(ring, skb_frag_page(frag), size, dma, seg_num - 1 == i ? 1 : 0, buf_num, - DESC_TYPE_PAGE, ndev->mtu); + DESC_TYPE_PAGE, ndev->mtu, is_gso); } /*complete translate all packets*/ @@ -1776,15 +1801,6 @@ static int hns_nic_set_features(struct net_device *netdev, netdev_info(netdev, "enet v1 do not support tso!\n"); break; default: - if (features & (NETIF_F_TSO | NETIF_F_TSO6)) { - priv->ops.fill_desc = fill_tso_desc; - priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tso; - /* The chip only support 7*4096 */ - netif_set_tso_max_size(netdev, 7 * 4096); - } else { - priv->ops.fill_desc = fill_v2_desc; - priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tx; - } break; } netdev->features = features; @@ -2159,16 +2175,9 @@ static void hns_nic_set_priv_ops(struct net_device *netdev) priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tx; } else { priv->ops.get_rxd_bnum = get_v2rx_desc_bnum; - if ((netdev->features & NETIF_F_TSO) || - (netdev->features & NETIF_F_TSO6)) { - priv->ops.fill_desc = fill_tso_desc; - priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tso; - /* This chip only support 7*4096 */ - netif_set_tso_max_size(netdev, 7 * 4096); - } else { - priv->ops.fill_desc = fill_v2_desc; - priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tx; - } + priv->ops.fill_desc = fill_desc_v2; + priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tx_v2; + netif_set_tso_max_size(netdev, 7 * 4096); /* enable tso when init * control tso on/off through TSE bit in bd */ diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.h b/drivers/net/ethernet/hisilicon/hns/hns_enet.h index ffa9d6573f54..3f3ee032f631 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.h @@ -44,7 +44,8 @@ struct hns_nic_ring_data { struct hns_nic_ops { void (*fill_desc)(struct hnae_ring *ring, void *priv, int size, dma_addr_t dma, int frag_end, - int buf_num, enum hns_desc_type type, int mtu); + int buf_num, enum hns_desc_type type, int mtu, + bool is_gso); int (*maybe_stop_tx)(struct sk_buff **out_skb, int *bnum, struct hnae_ring *ring); void (*get_rxd_bnum)(u32 bnum_flag, int *out_bnum); From 2daea952199e3ff0b12a23c15f8d0e5b347ab968 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Mon, 4 Dec 2023 22:32:32 +0800 Subject: [PATCH 0831/1397] net: hns: fix fake link up on xge port [ Upstream commit f708aba40f9c1eeb9c7e93ed4863b5f85b09b288 ] If a xge port just connect with an optical module and no fiber, it may have a fake link up because there may be interference on the hardware. This patch adds an anti-shake to avoid the problem. And the time of anti-shake is base on tests. Fixes: b917078c1c10 ("net: hns: Add ACPI support to check SFP present") Signed-off-by: Yonglong Liu Signed-off-by: Jijie Shao Reviewed-by: Wojciech Drewek Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- .../net/ethernet/hisilicon/hns/hns_dsaf_mac.c | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c index 928d934cb21a..f75668c47935 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c @@ -66,6 +66,27 @@ static enum mac_mode hns_get_enet_interface(const struct hns_mac_cb *mac_cb) } } +static u32 hns_mac_link_anti_shake(struct mac_driver *mac_ctrl_drv) +{ +#define HNS_MAC_LINK_WAIT_TIME 5 +#define HNS_MAC_LINK_WAIT_CNT 40 + + u32 link_status = 0; + int i; + + if (!mac_ctrl_drv->get_link_status) + return link_status; + + for (i = 0; i < HNS_MAC_LINK_WAIT_CNT; i++) { + msleep(HNS_MAC_LINK_WAIT_TIME); + mac_ctrl_drv->get_link_status(mac_ctrl_drv, &link_status); + if (!link_status) + break; + } + + return link_status; +} + void hns_mac_get_link_status(struct hns_mac_cb *mac_cb, u32 *link_status) { struct mac_driver *mac_ctrl_drv; @@ -83,6 +104,14 @@ void hns_mac_get_link_status(struct hns_mac_cb *mac_cb, u32 *link_status) &sfp_prsnt); if (!ret) *link_status = *link_status && sfp_prsnt; + + /* for FIBER port, it may have a fake link up. + * when the link status changes from down to up, we need to do + * anti-shake. the anti-shake time is base on tests. + * only FIBER port need to do this. + */ + if (*link_status && !mac_cb->link) + *link_status = hns_mac_link_anti_shake(mac_ctrl_drv); } mac_cb->link = *link_status; From 00efd99e36b478b012bc9dacd9e0b1e29834c443 Mon Sep 17 00:00:00 2001 From: Nithin Dabilpuram Date: Tue, 5 Dec 2023 13:34:30 +0530 Subject: [PATCH 0832/1397] octeontx2-af: Adjust Tx credits when MCS external bypass is disabled [ Upstream commit dca6fa8644b89f54345e55501b1419316ba5cb29 ] When MCS external bypass is disabled, MCS returns additional 2 credits(32B) for every packet Tx'ed on LMAC. To account for these extra credits, NIX_AF_TX_LINKX_NORM_CREDIT.CC_MCS_CNT needs to be configured as otherwise NIX Tx credits would overflow and will never be returned to idle state credit count causing issues with credit control and MTU change. This patch fixes the same by configuring CC_MCS_CNT at probe time for MCS enabled SoC's Fixes: bd69476e86fc ("octeontx2-af: cn10k: mcs: Install a default TCAM for normal traffic") Signed-off-by: Nithin Dabilpuram Signed-off-by: Geetha sowjanya Signed-off-by: Sunil Goutham Reviewed-by: Wojciech Drewek Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/af/mcs.c | 14 +++++++++++++- drivers/net/ethernet/marvell/octeontx2/af/mcs.h | 2 ++ drivers/net/ethernet/marvell/octeontx2/af/rvu.h | 1 + .../net/ethernet/marvell/octeontx2/af/rvu_nix.c | 8 ++++++++ .../net/ethernet/marvell/octeontx2/af/rvu_reg.h | 1 + 5 files changed, 25 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mcs.c b/drivers/net/ethernet/marvell/octeontx2/af/mcs.c index c43f19dfbd74..bd87507cf8ea 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mcs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/mcs.c @@ -1219,6 +1219,17 @@ struct mcs *mcs_get_pdata(int mcs_id) return NULL; } +bool is_mcs_bypass(int mcs_id) +{ + struct mcs *mcs_dev; + + list_for_each_entry(mcs_dev, &mcs_list, mcs_list) { + if (mcs_dev->mcs_id == mcs_id) + return mcs_dev->bypass; + } + return true; +} + void mcs_set_port_cfg(struct mcs *mcs, struct mcs_port_cfg_set_req *req) { u64 val = 0; @@ -1436,7 +1447,7 @@ static int mcs_x2p_calibration(struct mcs *mcs) return err; } -static void mcs_set_external_bypass(struct mcs *mcs, u8 bypass) +static void mcs_set_external_bypass(struct mcs *mcs, bool bypass) { u64 val; @@ -1447,6 +1458,7 @@ static void mcs_set_external_bypass(struct mcs *mcs, u8 bypass) else val &= ~BIT_ULL(6); mcs_reg_write(mcs, MCSX_MIL_GLOBAL, val); + mcs->bypass = bypass; } static void mcs_global_cfg(struct mcs *mcs) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mcs.h b/drivers/net/ethernet/marvell/octeontx2/af/mcs.h index 0f89dcb76465..f927cc61dfd2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mcs.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mcs.h @@ -149,6 +149,7 @@ struct mcs { u16 num_vec; void *rvu; u16 *tx_sa_active; + bool bypass; }; struct mcs_ops { @@ -206,6 +207,7 @@ void mcs_get_custom_tag_cfg(struct mcs *mcs, struct mcs_custom_tag_cfg_get_req * int mcs_alloc_ctrlpktrule(struct rsrc_bmap *rsrc, u16 *pf_map, u16 offset, u16 pcifunc); int mcs_free_ctrlpktrule(struct mcs *mcs, struct mcs_free_ctrl_pkt_rule_req *req); int mcs_ctrlpktrule_write(struct mcs *mcs, struct mcs_ctrl_pkt_rule_write_req *req); +bool is_mcs_bypass(int mcs_id); /* CN10K-B APIs */ void cn10kb_mcs_set_hw_capabilities(struct mcs *mcs); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index c4d999ef5ab4..cce2806aaa50 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -345,6 +345,7 @@ struct nix_hw { struct nix_txvlan txvlan; struct nix_ipolicer *ipolicer; u64 *tx_credits; + u8 cc_mcs_cnt; }; /* RVU block's capabilities or functionality, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index c112c71ff576..4227ebb4a758 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -12,6 +12,7 @@ #include "rvu_reg.h" #include "rvu.h" #include "npc.h" +#include "mcs.h" #include "cgx.h" #include "lmac_common.h" #include "rvu_npc_hash.h" @@ -4389,6 +4390,12 @@ static void nix_link_config(struct rvu *rvu, int blkaddr, SDP_HW_MAX_FRS << 16 | NIC_HW_MIN_FRS); } + /* Get MCS external bypass status for CN10K-B */ + if (mcs_get_blkcnt() == 1) { + /* Adjust for 2 credits when external bypass is disabled */ + nix_hw->cc_mcs_cnt = is_mcs_bypass(0) ? 0 : 2; + } + /* Set credits for Tx links assuming max packet length allowed. * This will be reconfigured based on MTU set for PF/VF. */ @@ -4412,6 +4419,7 @@ static void nix_link_config(struct rvu *rvu, int blkaddr, tx_credits = (lmac_fifo_len - lmac_max_frs) / 16; /* Enable credits and set credit pkt count to max allowed */ cfg = (tx_credits << 12) | (0x1FF << 2) | BIT_ULL(1); + cfg |= FIELD_PREP(NIX_AF_LINKX_MCS_CNT_MASK, nix_hw->cc_mcs_cnt); link = iter + slink; nix_hw->tx_credits[link] = tx_credits; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h index b42e631e52d0..18c1c9f361cc 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h @@ -437,6 +437,7 @@ #define NIX_AF_LINKX_BASE_MASK GENMASK_ULL(11, 0) #define NIX_AF_LINKX_RANGE_MASK GENMASK_ULL(19, 16) +#define NIX_AF_LINKX_MCS_CNT_MASK GENMASK_ULL(33, 32) /* SSO */ #define SSO_AF_CONST (0x1000) From b783ee126c01d5254f985dced230b7a799a727e9 Mon Sep 17 00:00:00 2001 From: Geetha sowjanya Date: Tue, 5 Dec 2023 13:34:31 +0530 Subject: [PATCH 0833/1397] octeontx2-af: Fix mcs sa cam entries size [ Upstream commit 9723b2cca1f0e980c53156b52ea73b93966b3c8a ] On latest silicon versions SA cam entries increased to 256. This patch fixes the datatype of sa_entries in mcs_hw_info struct to u16 to hold 256 entries. Fixes: 080bbd19c9dd ("octeontx2-af: cn10k: mcs: Add mailboxes for port related operations") Signed-off-by: Geetha sowjanya Reviewed-by: Wojciech Drewek Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/af/mbox.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 6b5b06c2b4e9..31bd9aeb41e7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -1938,7 +1938,7 @@ struct mcs_hw_info { u8 tcam_entries; /* RX/TX Tcam entries per mcs block */ u8 secy_entries; /* RX/TX SECY entries per mcs block */ u8 sc_entries; /* RX/TX SC CAM entries per mcs block */ - u8 sa_entries; /* PN table entries = SA entries */ + u16 sa_entries; /* PN table entries = SA entries */ u64 rsvd[16]; }; From 82b2c5e0b4f4b5c5df319642920bded010c42d81 Mon Sep 17 00:00:00 2001 From: Geetha sowjanya Date: Tue, 5 Dec 2023 13:34:32 +0530 Subject: [PATCH 0834/1397] octeontx2-af: Fix mcs stats register address [ Upstream commit 3ba98a8c6f8ceb4e01a78f973d8d9017020bbd57 ] This patch adds the miss mcs stats register for mcs supported platforms. Fixes: 9312150af8da ("octeontx2-af: cn10k: mcs: Support for stats collection") Signed-off-by: Geetha sowjanya Reviewed-by: Wojciech Drewek Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- .../net/ethernet/marvell/octeontx2/af/mcs.c | 4 +-- .../ethernet/marvell/octeontx2/af/mcs_reg.h | 31 ++++++++++++++++--- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mcs.c b/drivers/net/ethernet/marvell/octeontx2/af/mcs.c index bd87507cf8ea..c1775bd01c2b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mcs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/mcs.c @@ -117,7 +117,7 @@ void mcs_get_rx_secy_stats(struct mcs *mcs, struct mcs_secy_stats *stats, int id reg = MCSX_CSE_RX_MEM_SLAVE_INPKTSSECYTAGGEDCTLX(id); stats->pkt_tagged_ctl_cnt = mcs_reg_read(mcs, reg); - reg = MCSX_CSE_RX_MEM_SLAVE_INPKTSSECYUNTAGGEDORNOTAGX(id); + reg = MCSX_CSE_RX_MEM_SLAVE_INPKTSSECYUNTAGGEDX(id); stats->pkt_untaged_cnt = mcs_reg_read(mcs, reg); reg = MCSX_CSE_RX_MEM_SLAVE_INPKTSSECYCTLX(id); @@ -215,7 +215,7 @@ void mcs_get_sc_stats(struct mcs *mcs, struct mcs_sc_stats *stats, reg = MCSX_CSE_RX_MEM_SLAVE_INPKTSSCNOTVALIDX(id); stats->pkt_notvalid_cnt = mcs_reg_read(mcs, reg); - reg = MCSX_CSE_RX_MEM_SLAVE_INPKTSSCUNCHECKEDOROKX(id); + reg = MCSX_CSE_RX_MEM_SLAVE_INPKTSSCUNCHECKEDX(id); stats->pkt_unchecked_cnt = mcs_reg_read(mcs, reg); if (mcs->hw->mcs_blks > 1) { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mcs_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/mcs_reg.h index f3ab01fc363c..f4c6de89002c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mcs_reg.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mcs_reg.h @@ -810,14 +810,37 @@ offset = 0x9d8ull; \ offset; }) +#define MCSX_CSE_RX_MEM_SLAVE_INPKTSSCUNCHECKEDX(a) ({ \ + u64 offset; \ + \ + offset = 0xee80ull; \ + if (mcs->hw->mcs_blks > 1) \ + offset = 0xe818ull; \ + offset += (a) * 0x8ull; \ + offset; }) + +#define MCSX_CSE_RX_MEM_SLAVE_INPKTSSECYUNTAGGEDX(a) ({ \ + u64 offset; \ + \ + offset = 0xa680ull; \ + if (mcs->hw->mcs_blks > 1) \ + offset = 0xd018ull; \ + offset += (a) * 0x8ull; \ + offset; }) + +#define MCSX_CSE_RX_MEM_SLAVE_INPKTSSCLATEORDELAYEDX(a) ({ \ + u64 offset; \ + \ + offset = 0xf680ull; \ + if (mcs->hw->mcs_blks > 1) \ + offset = 0xe018ull; \ + offset += (a) * 0x8ull; \ + offset; }) + #define MCSX_CSE_RX_MEM_SLAVE_INOCTETSSCDECRYPTEDX(a) (0xe680ull + (a) * 0x8ull) #define MCSX_CSE_RX_MEM_SLAVE_INOCTETSSCVALIDATEX(a) (0xde80ull + (a) * 0x8ull) -#define MCSX_CSE_RX_MEM_SLAVE_INPKTSSECYUNTAGGEDORNOTAGX(a) (0xa680ull + (a) * 0x8ull) #define MCSX_CSE_RX_MEM_SLAVE_INPKTSSECYNOTAGX(a) (0xd218 + (a) * 0x8ull) -#define MCSX_CSE_RX_MEM_SLAVE_INPKTSSECYUNTAGGEDX(a) (0xd018ull + (a) * 0x8ull) -#define MCSX_CSE_RX_MEM_SLAVE_INPKTSSCUNCHECKEDOROKX(a) (0xee80ull + (a) * 0x8ull) #define MCSX_CSE_RX_MEM_SLAVE_INPKTSSECYCTLX(a) (0xb680ull + (a) * 0x8ull) -#define MCSX_CSE_RX_MEM_SLAVE_INPKTSSCLATEORDELAYEDX(a) (0xf680ull + (a) * 0x8ull) #define MCSX_CSE_RX_MEM_SLAVE_INPKTSSAINVALIDX(a) (0x12680ull + (a) * 0x8ull) #define MCSX_CSE_RX_MEM_SLAVE_INPKTSSANOTUSINGSAERRORX(a) (0x15680ull + (a) * 0x8ull) #define MCSX_CSE_RX_MEM_SLAVE_INPKTSSANOTVALIDX(a) (0x13680ull + (a) * 0x8ull) From cd1045acc228c92f597ade777f002432614db566 Mon Sep 17 00:00:00 2001 From: Geetha sowjanya Date: Tue, 5 Dec 2023 13:34:33 +0530 Subject: [PATCH 0835/1397] octeontx2-af: Add missing mcs flr handler call [ Upstream commit d431abd0a9aa27be379fb5f8304062071b0f5a7e ] If mcs resources are attached to PF/VF. These resources need to be freed on FLR. This patch add missing mcs flr call on PF FLR. Fixes: bd69476e86fc ("octeontx2-af: cn10k: mcs: Install a default TCAM for normal traffic") Signed-off-by: Geetha sowjanya Reviewed-by: Wojciech Drewek Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/af/rvu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 22c395c7d040..731bb82b577c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -2631,6 +2631,9 @@ static void __rvu_flr_handler(struct rvu *rvu, u16 pcifunc) rvu_npc_free_mcam_entries(rvu, pcifunc, -1); rvu_mac_reset(rvu, pcifunc); + if (rvu->mcs_blk_cnt) + rvu_mcs_flr_handler(rvu, pcifunc); + mutex_unlock(&rvu->flr_lock); } From b710b35ef66e2397550a4d8d27806964b4bb648f Mon Sep 17 00:00:00 2001 From: Rahul Bhansali Date: Tue, 5 Dec 2023 13:34:34 +0530 Subject: [PATCH 0836/1397] octeontx2-af: Update Tx link register range [ Upstream commit 7336fc196748f82646b630d5a2e9d283e200b988 ] On new silicons the TX channels for transmit level has increased. This patch fixes the respective register offset range to configure the newly added channels. Fixes: b279bbb3314e ("octeontx2-af: NIX Tx scheduler queue config support") Signed-off-by: Rahul Bhansali Signed-off-by: Geetha sowjanya Reviewed-by: Wojciech Drewek Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.c index b3150f053291..d46ac29adb96 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.c @@ -31,8 +31,8 @@ static struct hw_reg_map txsch_reg_map[NIX_TXSCH_LVL_CNT] = { {NIX_TXSCH_LVL_TL4, 3, 0xFFFF, {{0x0B00, 0x0B08}, {0x0B10, 0x0B18}, {0x1200, 0x12E0} } }, {NIX_TXSCH_LVL_TL3, 4, 0xFFFF, {{0x1000, 0x10E0}, {0x1600, 0x1608}, - {0x1610, 0x1618}, {0x1700, 0x17B0} } }, - {NIX_TXSCH_LVL_TL2, 2, 0xFFFF, {{0x0E00, 0x0EE0}, {0x1700, 0x17B0} } }, + {0x1610, 0x1618}, {0x1700, 0x17C8} } }, + {NIX_TXSCH_LVL_TL2, 2, 0xFFFF, {{0x0E00, 0x0EE0}, {0x1700, 0x17C8} } }, {NIX_TXSCH_LVL_TL1, 1, 0xFFFF, {{0x0C00, 0x0D98} } }, }; From a365250a4a65f27d92ad71e2028ac1333038d7e6 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Wed, 29 Nov 2023 20:12:31 +0100 Subject: [PATCH 0837/1397] dt-bindings: interrupt-controller: Allow #power-domain-cells [ Upstream commit c0a2755aced969e0125fd68ccd95269b28d8913a ] MPM provides a single genpd. Allow #power-domain-cells = <0>. Fixes: 54fc9851c0e0 ("dt-bindings: interrupt-controller: Add Qualcomm MPM support") Acked-by: Shawn Guo Acked-by: Krzysztof Kozlowski Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20231129-topic-mpmbindingspd-v2-1-acbe909ceee1@linaro.org Signed-off-by: Rob Herring Signed-off-by: Sasha Levin --- .../devicetree/bindings/interrupt-controller/qcom,mpm.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/interrupt-controller/qcom,mpm.yaml b/Documentation/devicetree/bindings/interrupt-controller/qcom,mpm.yaml index 509d20c091af..6a206111d4e0 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/qcom,mpm.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/qcom,mpm.yaml @@ -62,6 +62,9 @@ properties: - description: MPM pin number - description: GIC SPI number for the MPM pin + '#power-domain-cells': + const: 0 + required: - compatible - reg @@ -93,4 +96,5 @@ examples: <86 183>, <90 260>, <91 260>; + #power-domain-cells = <0>; }; From 83e9863785f4b7ecaf58445c95470138238412b1 Mon Sep 17 00:00:00 2001 From: "D. Wythe" Date: Thu, 30 Nov 2023 15:23:23 +0800 Subject: [PATCH 0838/1397] netfilter: bpf: fix bad registration on nf_defrag [ Upstream commit 1834d62ae88500f37cba4439c3237aa85242272e ] We should pass a pointer to global_hook to the get_proto_defrag_hook() instead of its value, since the passed value won't be updated even if the request module was loaded successfully. Log: [ 54.915713] nf_defrag_ipv4 has bad registration [ 54.915779] WARNING: CPU: 3 PID: 6323 at net/netfilter/nf_bpf_link.c:62 get_proto_defrag_hook+0x137/0x160 [ 54.915835] CPU: 3 PID: 6323 Comm: fentry Kdump: loaded Tainted: G E 6.7.0-rc2+ #35 [ 54.915839] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.15.0-0-g2dd4b9b3f840-prebuilt.qemu.org 04/01/2014 [ 54.915841] RIP: 0010:get_proto_defrag_hook+0x137/0x160 [ 54.915844] Code: 4f 8c e8 2c cf 68 ff 80 3d db 83 9a 01 00 0f 85 74 ff ff ff 48 89 ee 48 c7 c7 8f 12 4f 8c c6 05 c4 83 9a 01 01 e8 09 ee 5f ff <0f> 0b e9 57 ff ff ff 49 8b 3c 24 4c 63 e5 e8 36 28 6c ff 4c 89 e0 [ 54.915849] RSP: 0018:ffffb676003fbdb0 EFLAGS: 00010286 [ 54.915852] RAX: 0000000000000023 RBX: ffff9596503d5600 RCX: ffff95996fce08c8 [ 54.915854] RDX: 00000000ffffffd8 RSI: 0000000000000027 RDI: ffff95996fce08c0 [ 54.915855] RBP: ffffffff8c4f12de R08: 0000000000000000 R09: 00000000fffeffff [ 54.915859] R10: ffffb676003fbc70 R11: ffffffff8d363ae8 R12: 0000000000000000 [ 54.915861] R13: ffffffff8e1f75c0 R14: ffffb676003c9000 R15: 00007ffd15e78ef0 [ 54.915864] FS: 00007fb6e9cab740(0000) GS:ffff95996fcc0000(0000) knlGS:0000000000000000 [ 54.915867] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 54.915868] CR2: 00007ffd15e75c40 CR3: 0000000101e62006 CR4: 0000000000360ef0 [ 54.915870] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 54.915871] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 54.915873] Call Trace: [ 54.915891] [ 54.915894] ? __warn+0x84/0x140 [ 54.915905] ? get_proto_defrag_hook+0x137/0x160 [ 54.915908] ? __report_bug+0xea/0x100 [ 54.915925] ? report_bug+0x2b/0x80 [ 54.915928] ? handle_bug+0x3c/0x70 [ 54.915939] ? exc_invalid_op+0x18/0x70 [ 54.915942] ? asm_exc_invalid_op+0x1a/0x20 [ 54.915948] ? get_proto_defrag_hook+0x137/0x160 [ 54.915950] bpf_nf_link_attach+0x1eb/0x240 [ 54.915953] link_create+0x173/0x290 [ 54.915969] __sys_bpf+0x588/0x8f0 [ 54.915974] __x64_sys_bpf+0x20/0x30 [ 54.915977] do_syscall_64+0x45/0xf0 [ 54.915989] entry_SYSCALL_64_after_hwframe+0x6e/0x76 [ 54.915998] RIP: 0033:0x7fb6e9daa51d [ 54.916001] Code: 00 c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 2b 89 0c 00 f7 d8 64 89 01 48 [ 54.916003] RSP: 002b:00007ffd15e78ed8 EFLAGS: 00000246 ORIG_RAX: 0000000000000141 [ 54.916006] RAX: ffffffffffffffda RBX: 00007ffd15e78fc0 RCX: 00007fb6e9daa51d [ 54.916007] RDX: 0000000000000040 RSI: 00007ffd15e78ef0 RDI: 000000000000001c [ 54.916009] RBP: 000000000000002d R08: 00007fb6e9e73a60 R09: 0000000000000001 [ 54.916010] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000006 [ 54.916012] R13: 0000000000000006 R14: 0000000000000000 R15: 0000000000000000 [ 54.916014] [ 54.916015] ---[ end trace 0000000000000000 ]--- Fixes: 91721c2d02d3 ("netfilter: bpf: Support BPF_F_NETFILTER_IP_DEFRAG in netfilter link") Signed-off-by: D. Wythe Acked-by: Daniel Xu Reviewed-by: Simon Horman Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_bpf_link.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c index e502ec00b2fe..0e4beae421f8 100644 --- a/net/netfilter/nf_bpf_link.c +++ b/net/netfilter/nf_bpf_link.c @@ -31,7 +31,7 @@ struct bpf_nf_link { #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) static const struct nf_defrag_hook * get_proto_defrag_hook(struct bpf_nf_link *link, - const struct nf_defrag_hook __rcu *global_hook, + const struct nf_defrag_hook __rcu **ptr_global_hook, const char *mod) { const struct nf_defrag_hook *hook; @@ -39,7 +39,7 @@ get_proto_defrag_hook(struct bpf_nf_link *link, /* RCU protects us from races against module unloading */ rcu_read_lock(); - hook = rcu_dereference(global_hook); + hook = rcu_dereference(*ptr_global_hook); if (!hook) { rcu_read_unlock(); err = request_module(mod); @@ -47,7 +47,7 @@ get_proto_defrag_hook(struct bpf_nf_link *link, return ERR_PTR(err < 0 ? err : -EINVAL); rcu_read_lock(); - hook = rcu_dereference(global_hook); + hook = rcu_dereference(*ptr_global_hook); } if (hook && try_module_get(hook->owner)) { @@ -78,7 +78,7 @@ static int bpf_nf_enable_defrag(struct bpf_nf_link *link) switch (link->hook_ops.pf) { #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) case NFPROTO_IPV4: - hook = get_proto_defrag_hook(link, nf_defrag_v4_hook, "nf_defrag_ipv4"); + hook = get_proto_defrag_hook(link, &nf_defrag_v4_hook, "nf_defrag_ipv4"); if (IS_ERR(hook)) return PTR_ERR(hook); @@ -87,7 +87,7 @@ static int bpf_nf_enable_defrag(struct bpf_nf_link *link) #endif #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) case NFPROTO_IPV6: - hook = get_proto_defrag_hook(link, nf_defrag_v6_hook, "nf_defrag_ipv6"); + hook = get_proto_defrag_hook(link, &nf_defrag_v6_hook, "nf_defrag_ipv6"); if (IS_ERR(hook)) return PTR_ERR(hook); From 0392d322b45bdd33e7b215caad0f0e581eeb0ee2 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 4 Dec 2023 12:29:54 +0100 Subject: [PATCH 0839/1397] netfilter: nf_tables: fix 'exist' matching on bigendian arches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 63331e37fb227e796894b31d713697612c8dee7f ] Maze reports "tcp option fastopen exists" fails to match on OpenWrt 22.03.5, r20134-5f15225c1e (5.10.176) router. "tcp option fastopen exists" translates to: inet [ exthdr load tcpopt 1b @ 34 + 0 present => reg 1 ] [ cmp eq reg 1 0x00000001 ] .. but existing nft userspace generates a 1-byte compare. On LSB (x86), "*reg32 = 1" is identical to nft_reg_store8(reg32, 1), but not on MSB, which will place the 1 last. IOW, on bigendian aches the cmp8 is awalys false. Make sure we store this in a consistent fashion, so existing userspace will also work on MSB (bigendian). Regardless of this patch we can also change nft userspace to generate 'reg32 == 0' and 'reg32 != 0' instead of u8 == 0 // u8 == 1 when adding 'option x missing/exists' expressions as well. Fixes: 3c1fece8819e ("netfilter: nft_exthdr: Allow checking TCP option presence, too") Fixes: b9f9a485fb0e ("netfilter: nft_exthdr: add boolean DCCP option matching") Fixes: 055c4b34b94f ("netfilter: nft_fib: Support existence check") Reported-by: Maciej Żenczykowski Closes: https://lore.kernel.org/netfilter-devel/CAHo-OozyEqHUjL2-ntATzeZOiuftLWZ_HU6TOM_js4qLfDEAJg@mail.gmail.com/ Signed-off-by: Florian Westphal Acked-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nft_exthdr.c | 4 ++-- net/netfilter/nft_fib.c | 8 ++++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 3fbaa7bf41f9..6eb571d0c3fd 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -214,7 +214,7 @@ static void nft_exthdr_tcp_eval(const struct nft_expr *expr, offset = i + priv->offset; if (priv->flags & NFT_EXTHDR_F_PRESENT) { - *dest = 1; + nft_reg_store8(dest, 1); } else { if (priv->len % NFT_REG32_SIZE) dest[priv->len / NFT_REG32_SIZE] = 0; @@ -461,7 +461,7 @@ static void nft_exthdr_dccp_eval(const struct nft_expr *expr, type = bufp[0]; if (type == priv->type) { - *dest = 1; + nft_reg_store8(dest, 1); return; } diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c index 04b51f285332..ca905aa8227e 100644 --- a/net/netfilter/nft_fib.c +++ b/net/netfilter/nft_fib.c @@ -145,11 +145,15 @@ void nft_fib_store_result(void *reg, const struct nft_fib *priv, switch (priv->result) { case NFT_FIB_RESULT_OIF: index = dev ? dev->ifindex : 0; - *dreg = (priv->flags & NFTA_FIB_F_PRESENT) ? !!index : index; + if (priv->flags & NFTA_FIB_F_PRESENT) + nft_reg_store8(dreg, !!index); + else + *dreg = index; + break; case NFT_FIB_RESULT_OIFNAME: if (priv->flags & NFTA_FIB_F_PRESENT) - *dreg = !!dev; + nft_reg_store8(dreg, !!dev); else strscpy_pad(reg, dev ? dev->name : "", IFNAMSIZ); break; From 5eeba00a89c5c6be4e45b69bef20d1d1b9edb968 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 4 Dec 2023 14:25:33 +0100 Subject: [PATCH 0840/1397] netfilter: nf_tables: bail out on mismatching dynset and set expressions [ Upstream commit 3701cd390fd731ee7ae8b8006246c8db82c72bea ] If dynset expressions provided by userspace is larger than the declared set expressions, then bail out. Fixes: 48b0ae046ee9 ("netfilter: nftables: netlink support for several set element expressions") Reported-by: Xingyuan Mo Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nft_dynset.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 5c5cc01c73c5..629a91a8c614 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -279,10 +279,15 @@ static int nft_dynset_init(const struct nft_ctx *ctx, priv->expr_array[i] = dynset_expr; priv->num_exprs++; - if (set->num_exprs && - dynset_expr->ops != set->exprs[i]->ops) { - err = -EOPNOTSUPP; - goto err_expr_free; + if (set->num_exprs) { + if (i >= set->num_exprs) { + err = -EINVAL; + goto err_expr_free; + } + if (dynset_expr->ops != set->exprs[i]->ops) { + err = -EOPNOTSUPP; + goto err_expr_free; + } } i++; } From fa72b6e62fa52ce777344cf62d9852e7d938f2d2 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 4 Dec 2023 14:51:48 +0100 Subject: [PATCH 0841/1397] netfilter: nf_tables: validate family when identifying table via handle [ Upstream commit f6e1532a2697b81da00bfb184e99d15e01e9d98c ] Validate table family when looking up for it via NFTA_TABLE_HANDLE. Fixes: 3ecbfd65f50e ("netfilter: nf_tables: allocate handle and delete objects via handle") Reported-by: Xingyuan Mo Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 4a450f6d12a5..fb5c62aa8d9c 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -805,7 +805,7 @@ static struct nft_table *nft_table_lookup(const struct net *net, static struct nft_table *nft_table_lookup_byhandle(const struct net *net, const struct nlattr *nla, - u8 genmask, u32 nlpid) + int family, u8 genmask, u32 nlpid) { struct nftables_pernet *nft_net; struct nft_table *table; @@ -813,6 +813,7 @@ static struct nft_table *nft_table_lookup_byhandle(const struct net *net, nft_net = nft_pernet(net); list_for_each_entry(table, &nft_net->tables, list) { if (be64_to_cpu(nla_get_be64(nla)) == table->handle && + table->family == family && nft_active_genmask(table, genmask)) { if (nft_table_has_owner(table) && nlpid && table->nlpid != nlpid) @@ -1546,7 +1547,7 @@ static int nf_tables_deltable(struct sk_buff *skb, const struct nfnl_info *info, if (nla[NFTA_TABLE_HANDLE]) { attr = nla[NFTA_TABLE_HANDLE]; - table = nft_table_lookup_byhandle(net, attr, genmask, + table = nft_table_lookup_byhandle(net, attr, family, genmask, NETLINK_CB(skb).portid); } else { attr = nla[NFTA_TABLE_NAME]; From 1417b7198546edf59b061cd5953b7d8da2c03253 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Tue, 5 Dec 2023 21:58:12 +0100 Subject: [PATCH 0842/1397] netfilter: xt_owner: Fix for unsafe access of sk->sk_socket [ Upstream commit 7ae836a3d630e146b732fe8ef7d86b243748751f ] A concurrently running sock_orphan() may NULL the sk_socket pointer in between check and deref. Follow other users (like nft_meta.c for instance) and acquire sk_callback_lock before dereferencing sk_socket. Fixes: 0265ab44bacc ("[NETFILTER]: merge ipt_owner/ip6t_owner in xt_owner") Reported-by: Jann Horn Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/xt_owner.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c index e85ce69924ae..50332888c8d2 100644 --- a/net/netfilter/xt_owner.c +++ b/net/netfilter/xt_owner.c @@ -76,18 +76,23 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par) */ return false; - filp = sk->sk_socket->file; - if (filp == NULL) + read_lock_bh(&sk->sk_callback_lock); + filp = sk->sk_socket ? sk->sk_socket->file : NULL; + if (filp == NULL) { + read_unlock_bh(&sk->sk_callback_lock); return ((info->match ^ info->invert) & (XT_OWNER_UID | XT_OWNER_GID)) == 0; + } if (info->match & XT_OWNER_UID) { kuid_t uid_min = make_kuid(net->user_ns, info->uid_min); kuid_t uid_max = make_kuid(net->user_ns, info->uid_max); if ((uid_gte(filp->f_cred->fsuid, uid_min) && uid_lte(filp->f_cred->fsuid, uid_max)) ^ - !(info->invert & XT_OWNER_UID)) + !(info->invert & XT_OWNER_UID)) { + read_unlock_bh(&sk->sk_callback_lock); return false; + } } if (info->match & XT_OWNER_GID) { @@ -112,10 +117,13 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par) } } - if (match ^ !(info->invert & XT_OWNER_GID)) + if (match ^ !(info->invert & XT_OWNER_GID)) { + read_unlock_bh(&sk->sk_callback_lock); return false; + } } + read_unlock_bh(&sk->sk_callback_lock); return true; } From 2087d53a66e97a5eb5d1bf558d5bef9e5f891757 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Dec 2023 16:18:41 +0000 Subject: [PATCH 0843/1397] tcp: do not accept ACK of bytes we never sent [ Upstream commit 3d501dd326fb1c73f1b8206d4c6e1d7b15c07e27 ] This patch is based on a detailed report and ideas from Yepeng Pan and Christian Rossow. ACK seq validation is currently following RFC 5961 5.2 guidelines: The ACK value is considered acceptable only if it is in the range of ((SND.UNA - MAX.SND.WND) <= SEG.ACK <= SND.NXT). All incoming segments whose ACK value doesn't satisfy the above condition MUST be discarded and an ACK sent back. It needs to be noted that RFC 793 on page 72 (fifth check) says: "If the ACK is a duplicate (SEG.ACK < SND.UNA), it can be ignored. If the ACK acknowledges something not yet sent (SEG.ACK > SND.NXT) then send an ACK, drop the segment, and return". The "ignored" above implies that the processing of the incoming data segment continues, which means the ACK value is treated as acceptable. This mitigation makes the ACK check more stringent since any ACK < SND.UNA wouldn't be accepted, instead only ACKs that are in the range ((SND.UNA - MAX.SND.WND) <= SEG.ACK <= SND.NXT) get through. This can be refined for new (and possibly spoofed) flows, by not accepting ACK for bytes that were never sent. This greatly improves TCP security at a little cost. I added a Fixes: tag to make sure this patch will reach stable trees, even if the 'blamed' patch was adhering to the RFC. tp->bytes_acked was added in linux-4.2 Following packetdrill test (courtesy of Yepeng Pan) shows the issue at hand: 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 +0 bind(3, ..., ...) = 0 +0 listen(3, 1024) = 0 // ---------------- Handshake ------------------- // // when window scale is set to 14 the window size can be extended to // 65535 * (2^14) = 1073725440. Linux would accept an ACK packet // with ack number in (Server_ISN+1-1073725440. Server_ISN+1) // ,though this ack number acknowledges some data never // sent by the server. +0 < S 0:0(0) win 65535 +0 > S. 0:0(0) ack 1 <...> +0 < . 1:1(0) ack 1 win 65535 +0 accept(3, ..., ...) = 4 // For the established connection, we send an ACK packet, // the ack packet uses ack number 1 - 1073725300 + 2^32, // where 2^32 is used to wrap around. // Note: we used 1073725300 instead of 1073725440 to avoid possible // edge cases. // 1 - 1073725300 + 2^32 = 3221241997 // Oops, old kernels happily accept this packet. +0 < . 1:1001(1000) ack 3221241997 win 65535 // After the kernel fix the following will be replaced by a challenge ACK, // and prior malicious frame would be dropped. +0 > . 1:1(0) ack 1001 Fixes: 354e4aa391ed ("tcp: RFC 5961 5.2 Blind Data Injection Attack Mitigation") Signed-off-by: Eric Dumazet Reported-by: Yepeng Pan Reported-by: Christian Rossow Acked-by: Neal Cardwell Link: https://lore.kernel.org/r/20231205161841.2702925-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/tcp_input.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 1f9d1d445fb3..e6c492954942 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3809,8 +3809,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) * then we can probably ignore it. */ if (before(ack, prior_snd_una)) { + u32 max_window; + + /* do not accept ACK for bytes we never sent. */ + max_window = min_t(u64, tp->max_window, tp->bytes_acked); /* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */ - if (before(ack, prior_snd_una - tp->max_window)) { + if (before(ack, prior_snd_una - max_window)) { if (!(flag & FLAG_NO_CHALLENGE_ACK)) tcp_send_challenge_ack(sk); return -SKB_DROP_REASON_TCP_TOO_OLD_ACK; From 29561ef098a0f6ca5b98f7513580e9f6dfd810fb Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Tue, 5 Dec 2023 23:13:59 +0100 Subject: [PATCH 0844/1397] net: dsa: mv88e6xxx: Restore USXGMII support for 6393X [ Upstream commit 0c7ed1f9197aecada33a08b022e484a97bf584ba ] In 4a56212774ac, USXGMII support was added for 6393X, but this was lost in the PCS conversion (the blamed commit), most likely because these efforts where more or less done in parallel. Restore this feature by porting Michal's patch to fit the new implementation. Reviewed-by: Florian Fainelli Tested-by: Michal Smulski Reviewed-by: Vladimir Oltean Fixes: e5b732a275f5 ("net: dsa: mv88e6xxx: convert 88e639x to phylink_pcs") Signed-off-by: Tobias Waldekranz Link: https://lore.kernel.org/r/20231205221359.3926018-1-tobias@waldekranz.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/dsa/mv88e6xxx/pcs-639x.c | 31 ++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/pcs-639x.c b/drivers/net/dsa/mv88e6xxx/pcs-639x.c index ba373656bfe1..c31f0e54f1e6 100644 --- a/drivers/net/dsa/mv88e6xxx/pcs-639x.c +++ b/drivers/net/dsa/mv88e6xxx/pcs-639x.c @@ -465,6 +465,7 @@ mv88e639x_pcs_select(struct mv88e6xxx_chip *chip, int port, case PHY_INTERFACE_MODE_10GBASER: case PHY_INTERFACE_MODE_XAUI: case PHY_INTERFACE_MODE_RXAUI: + case PHY_INTERFACE_MODE_USXGMII: return &mpcs->xg_pcs; default: @@ -873,7 +874,8 @@ static int mv88e6393x_xg_pcs_post_config(struct phylink_pcs *pcs, struct mv88e639x_pcs *mpcs = xg_pcs_to_mv88e639x_pcs(pcs); int err; - if (interface == PHY_INTERFACE_MODE_10GBASER) { + if (interface == PHY_INTERFACE_MODE_10GBASER || + interface == PHY_INTERFACE_MODE_USXGMII) { err = mv88e6393x_erratum_5_2(mpcs); if (err) return err; @@ -886,12 +888,37 @@ static int mv88e6393x_xg_pcs_post_config(struct phylink_pcs *pcs, return mv88e639x_xg_pcs_enable(mpcs); } +static void mv88e6393x_xg_pcs_get_state(struct phylink_pcs *pcs, + struct phylink_link_state *state) +{ + struct mv88e639x_pcs *mpcs = xg_pcs_to_mv88e639x_pcs(pcs); + u16 status, lp_status; + int err; + + if (state->interface != PHY_INTERFACE_MODE_USXGMII) + return mv88e639x_xg_pcs_get_state(pcs, state); + + state->link = false; + + err = mv88e639x_read(mpcs, MV88E6390_USXGMII_PHY_STATUS, &status); + err = err ? : mv88e639x_read(mpcs, MV88E6390_USXGMII_LP_STATUS, &lp_status); + if (err) { + dev_err(mpcs->mdio.dev.parent, + "can't read USXGMII status: %pe\n", ERR_PTR(err)); + return; + } + + state->link = !!(status & MDIO_USXGMII_LINK); + state->an_complete = state->link; + phylink_decode_usxgmii_word(state, lp_status); +} + static const struct phylink_pcs_ops mv88e6393x_xg_pcs_ops = { .pcs_enable = mv88e6393x_xg_pcs_enable, .pcs_disable = mv88e6393x_xg_pcs_disable, .pcs_pre_config = mv88e6393x_xg_pcs_pre_config, .pcs_post_config = mv88e6393x_xg_pcs_post_config, - .pcs_get_state = mv88e639x_xg_pcs_get_state, + .pcs_get_state = mv88e6393x_xg_pcs_get_state, .pcs_config = mv88e639x_xg_pcs_config, }; From eb30a025982f61910f32e99778db3c0ceceaff5c Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 6 Dec 2023 15:27:05 -0800 Subject: [PATCH 0845/1397] net: tls, update curr on splice as well [ Upstream commit c5a595000e2677e865a39f249c056bc05d6e55fd ] The curr pointer must also be updated on the splice similar to how we do this for other copy types. Fixes: d829e9c4112b ("tls: convert to generic sk_msg interface") Signed-off-by: John Fastabend Reported-by: Jann Horn Link: https://lore.kernel.org/r/20231206232706.374377-2-john.fastabend@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/tls/tls_sw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 779815b885e9..27cc0f0a90e1 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -952,6 +952,8 @@ static int tls_sw_sendmsg_splice(struct sock *sk, struct msghdr *msg, } sk_msg_page_add(msg_pl, page, part, off); + msg_pl->sg.copybreak = 0; + msg_pl->sg.curr = msg_pl->sg.end; sk_mem_charge(sk, part); *copied += part; try_to_copy -= part; From 94e81d1a4bd9cd516da84d060536226a6578565c Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 6 Dec 2023 15:27:06 -0800 Subject: [PATCH 0846/1397] bpf: sockmap, updating the sg structure should also update curr [ Upstream commit bb9aefde5bbaf6c168c77ba635c155b4980c2287 ] Curr pointer should be updated when the sg structure is shifted. Fixes: 7246d8ed4dcce ("bpf: helper to pop data from messages") Signed-off-by: John Fastabend Link: https://lore.kernel.org/r/20231206232706.374377-3-john.fastabend@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/core/filter.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/net/core/filter.c b/net/core/filter.c index b149a165c405..90fe3e754383 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2591,6 +2591,22 @@ BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg *, msg, u32, bytes) return 0; } +static void sk_msg_reset_curr(struct sk_msg *msg) +{ + u32 i = msg->sg.start; + u32 len = 0; + + do { + len += sk_msg_elem(msg, i)->length; + sk_msg_iter_var_next(i); + if (len >= msg->sg.size) + break; + } while (i != msg->sg.end); + + msg->sg.curr = i; + msg->sg.copybreak = 0; +} + static const struct bpf_func_proto bpf_msg_cork_bytes_proto = { .func = bpf_msg_cork_bytes, .gpl_only = false, @@ -2710,6 +2726,7 @@ BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start, msg->sg.end - shift + NR_MSG_FRAG_IDS : msg->sg.end - shift; out: + sk_msg_reset_curr(msg); msg->data = sg_virt(&msg->sg.data[first_sge]) + start - offset; msg->data_end = msg->data + bytes; return 0; @@ -2846,6 +2863,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start, msg->sg.data[new] = rsge; } + sk_msg_reset_curr(msg); sk_msg_compute_data_pointers(msg); return 0; } @@ -3014,6 +3032,7 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, sk_mem_uncharge(msg->sk, len - pop); msg->sg.size -= (len - pop); + sk_msg_reset_curr(msg); sk_msg_compute_data_pointers(msg); return 0; } From 580c80b6f8814e0a2e6f9948c91fb439fc28aeea Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 6 Dec 2023 23:31:01 +0200 Subject: [PATCH 0847/1397] psample: Require 'CAP_NET_ADMIN' when joining "packets" group [ Upstream commit 44ec98ea5ea9cfecd31a5c4cc124703cb5442832 ] The "psample" generic netlink family notifies sampled packets over the "packets" multicast group. This is problematic since by default generic netlink allows non-root users to listen to these notifications. Fix by marking the group with the 'GENL_UNS_ADMIN_PERM' flag. This will prevent non-root users or root without the 'CAP_NET_ADMIN' capability (in the user namespace owning the network namespace) from joining the group. Tested using [1]. Before: # capsh -- -c ./psample_repo # capsh --drop=cap_net_admin -- -c ./psample_repo After: # capsh -- -c ./psample_repo # capsh --drop=cap_net_admin -- -c ./psample_repo Failed to join "packets" multicast group [1] $ cat psample.c #include #include #include #include int join_grp(struct nl_sock *sk, const char *grp_name) { int grp, err; grp = genl_ctrl_resolve_grp(sk, "psample", grp_name); if (grp < 0) { fprintf(stderr, "Failed to resolve \"%s\" multicast group\n", grp_name); return grp; } err = nl_socket_add_memberships(sk, grp, NFNLGRP_NONE); if (err) { fprintf(stderr, "Failed to join \"%s\" multicast group\n", grp_name); return err; } return 0; } int main(int argc, char **argv) { struct nl_sock *sk; int err; sk = nl_socket_alloc(); if (!sk) { fprintf(stderr, "Failed to allocate socket\n"); return -1; } err = genl_connect(sk); if (err) { fprintf(stderr, "Failed to connect socket\n"); return err; } err = join_grp(sk, "config"); if (err) return err; err = join_grp(sk, "packets"); if (err) return err; return 0; } $ gcc -I/usr/include/libnl3 -lnl-3 -lnl-genl-3 -o psample_repo psample.c Fixes: 6ae0a6286171 ("net: Introduce psample, a new genetlink channel for packet sampling") Reported-by: "The UK's National Cyber Security Centre (NCSC)" Signed-off-by: Ido Schimmel Reviewed-by: Jacob Keller Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20231206213102.1824398-2-idosch@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/psample/psample.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/psample/psample.c b/net/psample/psample.c index 81a794e36f53..c34e902855db 100644 --- a/net/psample/psample.c +++ b/net/psample/psample.c @@ -31,7 +31,8 @@ enum psample_nl_multicast_groups { static const struct genl_multicast_group psample_nl_mcgrps[] = { [PSAMPLE_NL_MCGRP_CONFIG] = { .name = PSAMPLE_NL_MCGRP_CONFIG_NAME }, - [PSAMPLE_NL_MCGRP_SAMPLE] = { .name = PSAMPLE_NL_MCGRP_SAMPLE_NAME }, + [PSAMPLE_NL_MCGRP_SAMPLE] = { .name = PSAMPLE_NL_MCGRP_SAMPLE_NAME, + .flags = GENL_UNS_ADMIN_PERM }, }; static struct genl_family psample_nl_family __ro_after_init; From e036a325a97743e2c7252f9aada5427dba4331e8 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 6 Dec 2023 23:31:02 +0200 Subject: [PATCH 0848/1397] drop_monitor: Require 'CAP_SYS_ADMIN' when joining "events" group [ Upstream commit e03781879a0d524ce3126678d50a80484a513c4b ] The "NET_DM" generic netlink family notifies drop locations over the "events" multicast group. This is problematic since by default generic netlink allows non-root users to listen to these notifications. Fix by adding a new field to the generic netlink multicast group structure that when set prevents non-root users or root without the 'CAP_SYS_ADMIN' capability (in the user namespace owning the network namespace) from joining the group. Set this field for the "events" group. Use 'CAP_SYS_ADMIN' rather than 'CAP_NET_ADMIN' because of the nature of the information that is shared over this group. Note that the capability check in this case will always be performed against the initial user namespace since the family is not netns aware and only operates in the initial network namespace. A new field is added to the structure rather than using the "flags" field because the existing field uses uAPI flags and it is inappropriate to add a new uAPI flag for an internal kernel check. In net-next we can rework the "flags" field to use internal flags and fold the new field into it. But for now, in order to reduce the amount of changes, add a new field. Since the information can only be consumed by root, mark the control plane operations that start and stop the tracing as root-only using the 'GENL_ADMIN_PERM' flag. Tested using [1]. Before: # capsh -- -c ./dm_repo # capsh --drop=cap_sys_admin -- -c ./dm_repo After: # capsh -- -c ./dm_repo # capsh --drop=cap_sys_admin -- -c ./dm_repo Failed to join "events" multicast group [1] $ cat dm.c #include #include #include #include int main(int argc, char **argv) { struct nl_sock *sk; int grp, err; sk = nl_socket_alloc(); if (!sk) { fprintf(stderr, "Failed to allocate socket\n"); return -1; } err = genl_connect(sk); if (err) { fprintf(stderr, "Failed to connect socket\n"); return err; } grp = genl_ctrl_resolve_grp(sk, "NET_DM", "events"); if (grp < 0) { fprintf(stderr, "Failed to resolve \"events\" multicast group\n"); return grp; } err = nl_socket_add_memberships(sk, grp, NFNLGRP_NONE); if (err) { fprintf(stderr, "Failed to join \"events\" multicast group\n"); return err; } return 0; } $ gcc -I/usr/include/libnl3 -lnl-3 -lnl-genl-3 -o dm_repo dm.c Fixes: 9a8afc8d3962 ("Network Drop Monitor: Adding drop monitor implementation & Netlink protocol") Reported-by: "The UK's National Cyber Security Centre (NCSC)" Signed-off-by: Ido Schimmel Reviewed-by: Jacob Keller Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20231206213102.1824398-3-idosch@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/net/genetlink.h | 2 ++ net/core/drop_monitor.c | 4 +++- net/netlink/genetlink.c | 3 +++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/include/net/genetlink.h b/include/net/genetlink.h index e18a4c0d69ee..c53244f20437 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -12,10 +12,12 @@ * struct genl_multicast_group - generic netlink multicast group * @name: name of the multicast group, names are per-family * @flags: GENL_* flags (%GENL_ADMIN_PERM or %GENL_UNS_ADMIN_PERM) + * @cap_sys_admin: whether %CAP_SYS_ADMIN is required for binding */ struct genl_multicast_group { char name[GENL_NAMSIZ]; u8 flags; + u8 cap_sys_admin:1; }; struct genl_split_ops; diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index aff31cd944c2..b240d9aae4a6 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -183,7 +183,7 @@ static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data) } static const struct genl_multicast_group dropmon_mcgrps[] = { - { .name = "events", }, + { .name = "events", .cap_sys_admin = 1 }, }; static void send_dm_alert(struct work_struct *work) @@ -1619,11 +1619,13 @@ static const struct genl_small_ops dropmon_ops[] = { .cmd = NET_DM_CMD_START, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = net_dm_cmd_trace, + .flags = GENL_ADMIN_PERM, }, { .cmd = NET_DM_CMD_STOP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = net_dm_cmd_trace, + .flags = GENL_ADMIN_PERM, }, { .cmd = NET_DM_CMD_CONFIG_GET, diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 8315d31b53db..d41c4a936ad0 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -1690,6 +1690,9 @@ static int genl_bind(struct net *net, int group) if ((grp->flags & GENL_UNS_ADMIN_PERM) && !ns_capable(net->user_ns, CAP_NET_ADMIN)) ret = -EPERM; + if (grp->cap_sys_admin && + !ns_capable(net->user_ns, CAP_SYS_ADMIN)) + ret = -EPERM; break; } From b03b2ddf0e9060c327bdfc7141293d16ed454e16 Mon Sep 17 00:00:00 2001 From: Sean Nyekjaer Date: Wed, 6 Dec 2023 08:16:54 +0100 Subject: [PATCH 0849/1397] net: dsa: microchip: provide a list of valid protocols for xmit handler [ Upstream commit 1499b89289bf272fd83cb296c82fb5519d0fe93f ] Provide a list of valid protocols for which the driver will provide it's deferred xmit handler. When using DSA_TAG_PROTO_KSZ8795 protocol, it does not provide a "connect" method, therefor ksz_connect() is not allocating ksz_tagger_data. This avoids the following null pointer dereference: ksz_connect_tag_protocol from dsa_register_switch+0x9ac/0xee0 dsa_register_switch from ksz_switch_register+0x65c/0x828 ksz_switch_register from ksz_spi_probe+0x11c/0x168 ksz_spi_probe from spi_probe+0x84/0xa8 spi_probe from really_probe+0xc8/0x2d8 Fixes: ab32f56a4100 ("net: dsa: microchip: ptp: add packet transmission timestamping") Signed-off-by: Sean Nyekjaer Reviewed-by: Florian Fainelli Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20231206071655.1626479-1-sean@geanix.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/dsa/microchip/ksz_common.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 42db7679c360..286e20f340e5 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -2624,10 +2624,18 @@ static int ksz_connect_tag_protocol(struct dsa_switch *ds, { struct ksz_tagger_data *tagger_data; - tagger_data = ksz_tagger_data(ds); - tagger_data->xmit_work_fn = ksz_port_deferred_xmit; - - return 0; + switch (proto) { + case DSA_TAG_PROTO_KSZ8795: + return 0; + case DSA_TAG_PROTO_KSZ9893: + case DSA_TAG_PROTO_KSZ9477: + case DSA_TAG_PROTO_LAN937X: + tagger_data = ksz_tagger_data(ds); + tagger_data->xmit_work_fn = ksz_port_deferred_xmit; + return 0; + default: + return -EPROTONOSUPPORT; + } } static int ksz_port_vlan_filtering(struct dsa_switch *ds, int port, From f9eef2563234b3f77df279d95673cb6293f2aab1 Mon Sep 17 00:00:00 2001 From: Wen Gu Date: Thu, 7 Dec 2023 01:02:37 +0800 Subject: [PATCH 0850/1397] net/smc: fix missing byte order conversion in CLC handshake [ Upstream commit c5a10397d4571bcfd4bd7ca211ee47bcb6792ec3 ] The byte order conversions of ISM GID and DMB token are missing in process of CLC accept and confirm. So fix it. Fixes: 3d9725a6a133 ("net/smc: common routine for CLC accept and confirm") Signed-off-by: Wen Gu Reviewed-by: Tony Lu Reviewed-by: Alexandra Winter Reviewed-by: Wenjia Zhang Link: https://lore.kernel.org/r/1701882157-87956-1-git-send-email-guwen@linux.alibaba.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/smc/af_smc.c | 4 ++-- net/smc/smc_clc.c | 9 ++++----- net/smc/smc_clc.h | 4 ++-- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 741339ac9483..ef5b5d498ef3 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -723,7 +723,7 @@ static void smcd_conn_save_peer_info(struct smc_sock *smc, int bufsize = smc_uncompress_bufsize(clc->d0.dmbe_size); smc->conn.peer_rmbe_idx = clc->d0.dmbe_idx; - smc->conn.peer_token = clc->d0.token; + smc->conn.peer_token = ntohll(clc->d0.token); /* msg header takes up space in the buffer */ smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg); atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size); @@ -1415,7 +1415,7 @@ static int smc_connect_ism(struct smc_sock *smc, if (rc) return rc; } - ini->ism_peer_gid[ini->ism_selected] = aclc->d0.gid; + ini->ism_peer_gid[ini->ism_selected] = ntohll(aclc->d0.gid); /* there is only one lgr role for SMC-D; use server lock */ mutex_lock(&smc_server_lgr_pending); diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 8deb46c28f1d..72f4d81a3f41 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -1004,6 +1004,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, { struct smc_connection *conn = &smc->conn; struct smc_clc_first_contact_ext_v2x fce; + struct smcd_dev *smcd = conn->lgr->smcd; struct smc_clc_msg_accept_confirm *clc; struct smc_clc_fce_gid_ext gle; struct smc_clc_msg_trail trl; @@ -1021,17 +1022,15 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, memcpy(clc->hdr.eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER)); clc->hdr.typev1 = SMC_TYPE_D; - clc->d0.gid = - conn->lgr->smcd->ops->get_local_gid(conn->lgr->smcd); - clc->d0.token = conn->rmb_desc->token; + clc->d0.gid = htonll(smcd->ops->get_local_gid(smcd)); + clc->d0.token = htonll(conn->rmb_desc->token); clc->d0.dmbe_size = conn->rmbe_size_comp; clc->d0.dmbe_idx = 0; memcpy(&clc->d0.linkid, conn->lgr->id, SMC_LGR_ID_SIZE); if (version == SMC_V1) { clc->hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN); } else { - clc_v2->d1.chid = - htons(smc_ism_get_chid(conn->lgr->smcd)); + clc_v2->d1.chid = htons(smc_ism_get_chid(smcd)); if (eid && eid[0]) memcpy(clc_v2->d1.eid, eid, SMC_MAX_EID_LEN); len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2; diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index c5c8e7db775a..08155a96a02a 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -204,8 +204,8 @@ struct smcr_clc_msg_accept_confirm { /* SMCR accept/confirm */ } __packed; struct smcd_clc_msg_accept_confirm_common { /* SMCD accept/confirm */ - u64 gid; /* Sender GID */ - u64 token; /* DMB token */ + __be64 gid; /* Sender GID */ + __be64 token; /* DMB token */ u8 dmbe_idx; /* DMBE index */ #if defined(__BIG_ENDIAN_BITFIELD) u8 dmbe_size : 4, /* buf size (compressed) */ From ffc6425512621b1040596fee6ebedb734251cdb0 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 24 Aug 2023 08:36:56 +0100 Subject: [PATCH 0851/1397] drm/amd/amdgpu/amdgpu_doorbell_mgr: Correct misdocumented param 'doorbell_index' [ Upstream commit 04cef5f58395806294a64118cf8a39534bd032a2 ] Fixes the following W=1 kernel build warning(s): drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c:123: warning: Function parameter or member 'doorbell_index' not described in 'amdgpu_doorbell_index_on_bar' drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c:123: warning: Excess function parameter 'db_index' description in 'amdgpu_doorbell_index_on_bar' Reviewed-by: Shashank Sharma Signed-off-by: Lee Jones Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c index 8eee5d783a92..599aece42017 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c @@ -113,7 +113,7 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v) * * @adev: amdgpu_device pointer * @db_bo: doorbell object's bo - * @db_index: doorbell relative index in this doorbell object + * @doorbell_index: doorbell relative index in this doorbell object * * returns doorbell's absolute index in BAR */ From 45d72eadf21ab807f477e7fffd5767b17c7a2aaf Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Mon, 9 Oct 2023 22:43:16 +0530 Subject: [PATCH 0852/1397] drm/amdkfd: get doorbell's absolute offset based on the db_size [ Upstream commit 367a0af43373d4f791cc8b466a659ecf5aa52377 ] Here, Adding db_size in byte to find the doorbell's absolute offset for both 32-bit and 64-bit doorbell sizes. So that doorbell offset will be aligned based on the doorbell size. v2: - Addressed the review comment from Felix. v3: - Adding doorbell_size as parameter to get db absolute offset. v4: Squash the two patches into one. Cc: Christian Koenig Cc: Alex Deucher Reviewed-by: Felix Kuehling Signed-off-by: Shashank Sharma Signed-off-by: Arvind Yadav Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 5 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c | 13 +++++++++---- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 3 ++- drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 10 ++++++++-- .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 3 ++- 5 files changed, 24 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h index 09f6727e7c73..4a8b33f55f6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h @@ -357,8 +357,9 @@ int amdgpu_doorbell_init(struct amdgpu_device *adev); void amdgpu_doorbell_fini(struct amdgpu_device *adev); int amdgpu_doorbell_create_kernel_doorbells(struct amdgpu_device *adev); uint32_t amdgpu_doorbell_index_on_bar(struct amdgpu_device *adev, - struct amdgpu_bo *db_bo, - uint32_t doorbell_index); + struct amdgpu_bo *db_bo, + uint32_t doorbell_index, + uint32_t db_size); #define RDOORBELL32(index) amdgpu_mm_rdoorbell(adev, (index)) #define WDOORBELL32(index, v) amdgpu_mm_wdoorbell(adev, (index), (v)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c index 599aece42017..3f3662e8b871 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c @@ -114,19 +114,24 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v) * @adev: amdgpu_device pointer * @db_bo: doorbell object's bo * @doorbell_index: doorbell relative index in this doorbell object + * @db_size: doorbell size is in byte * * returns doorbell's absolute index in BAR */ uint32_t amdgpu_doorbell_index_on_bar(struct amdgpu_device *adev, - struct amdgpu_bo *db_bo, - uint32_t doorbell_index) + struct amdgpu_bo *db_bo, + uint32_t doorbell_index, + uint32_t db_size) { int db_bo_offset; db_bo_offset = amdgpu_bo_gpu_offset_no_check(db_bo); - /* doorbell index is 32 bit but doorbell's size is 64-bit, so *2 */ - return db_bo_offset / sizeof(u32) + doorbell_index * 2; + /* doorbell index is 32 bit but doorbell's size can be 32 bit + * or 64 bit, so *db_size(in byte)/4 for alignment. + */ + return db_bo_offset / sizeof(u32) + doorbell_index * + DIV_ROUND_UP(db_size, 4); } /** diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 0d3d538b64eb..e07652e72496 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -407,7 +407,8 @@ static int allocate_doorbell(struct qcm_process_device *qpd, q->properties.doorbell_off = amdgpu_doorbell_index_on_bar(dev->adev, qpd->proc_doorbells, - q->doorbell_id); + q->doorbell_id, + dev->kfd->device_info.doorbell_size); return 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c index 7b38537c7c99..05c74887fd6f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c @@ -161,7 +161,10 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) return NULL; - *doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev, kfd->doorbells, inx); + *doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev, + kfd->doorbells, + inx, + kfd->device_info.doorbell_size); inx *= 2; pr_debug("Get kernel queue doorbell\n" @@ -240,7 +243,10 @@ phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd) return 0; } - first_db_index = amdgpu_doorbell_index_on_bar(adev, pdd->qpd.proc_doorbells, 0); + first_db_index = amdgpu_doorbell_index_on_bar(adev, + pdd->qpd.proc_doorbells, + 0, + pdd->dev->kfd->device_info.doorbell_size); return adev->doorbell.base + first_db_index * sizeof(uint32_t); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index adb5e4bdc0b2..77649392e233 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -377,7 +377,8 @@ int pqm_create_queue(struct process_queue_manager *pqm, */ uint32_t first_db_index = amdgpu_doorbell_index_on_bar(pdd->dev->adev, pdd->qpd.proc_doorbells, - 0); + 0, + pdd->dev->kfd->device_info.doorbell_size); *p_doorbell_offset_in_process = (q->properties.doorbell_off - first_db_index) * sizeof(uint32_t); From 577209e844bf0e879f6bff128d4d80f0a5fd8ace Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 6 Nov 2023 17:07:40 +0300 Subject: [PATCH 0853/1397] mm/damon/sysfs: eliminate potential uninitialized variable warning [ Upstream commit 85c2ceaafbd306814a3a4740bf4d95ac26a8b36a ] The "err" variable is not initialized if damon_target_has_pid(ctx) is false and sys_target->regions->nr is zero. Link: https://lkml.kernel.org/r/739e6aaf-a634-4e33-98a8-16546379ec9f@moroto.mountain Fixes: 0bcd216c4741 ("mm/damon/sysfs: update monitoring target regions for online input commit") Signed-off-by: Dan Carpenter Reviewed-by: SeongJae Park Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- mm/damon/sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index faaef5098e26..b317f51dcc98 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -1172,7 +1172,7 @@ static int damon_sysfs_update_target(struct damon_target *target, struct damon_ctx *ctx, struct damon_sysfs_target *sys_target) { - int err; + int err = 0; if (damon_target_has_pid(ctx)) { err = damon_sysfs_update_target_pid(target, sys_target->pid); From d3c4786b01aad8c377718f92d6d9b15906ee0a2a Mon Sep 17 00:00:00 2001 From: Sumit Garg Date: Thu, 2 Nov 2023 13:00:55 +0530 Subject: [PATCH 0854/1397] tee: optee: Fix supplicant based device enumeration [ Upstream commit 7269cba53d906cf257c139d3b3a53ad272176bca ] Currently supplicant dependent optee device enumeration only registers devices whenever tee-supplicant is invoked for the first time. But it forgets to remove devices when tee-supplicant daemon stops running and closes its context gracefully. This leads to following error for fTPM driver during reboot/shutdown: [ 73.466791] tpm tpm0: ftpm_tee_tpm_op_send: SUBMIT_COMMAND invoke error: 0xffff3024 Fix this by adding an attribute for supplicant dependent devices so that the user-space service can detect and detach supplicant devices before closing the supplicant: $ for dev in /sys/bus/tee/devices/*; do if [[ -f "$dev/need_supplicant" && -f "$dev/driver/unbind" ]]; \ then echo $(basename "$dev") > $dev/driver/unbind; fi done Reported-by: Jan Kiszka Closes: https://github.com/OP-TEE/optee_os/issues/6094 Fixes: 5f178bb71e3a ("optee: enable support for multi-stage bus enumeration") Signed-off-by: Sumit Garg Reviewed-by: Ilias Apalodimas Acked-by: Jerome Forissier [jw: fixed up Date documentation] Signed-off-by: Jens Wiklander Signed-off-by: Sasha Levin --- .../ABI/testing/sysfs-bus-optee-devices | 9 +++++++++ drivers/tee/optee/device.c | 17 +++++++++++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-bus-optee-devices b/Documentation/ABI/testing/sysfs-bus-optee-devices index 0f58701367b6..af31e5a22d89 100644 --- a/Documentation/ABI/testing/sysfs-bus-optee-devices +++ b/Documentation/ABI/testing/sysfs-bus-optee-devices @@ -6,3 +6,12 @@ Description: OP-TEE bus provides reference to registered drivers under this directory. The matches Trusted Application (TA) driver and corresponding TA in secure OS. Drivers are free to create needed API under optee-ta- directory. + +What: /sys/bus/tee/devices/optee-ta-/need_supplicant +Date: November 2023 +KernelVersion: 6.7 +Contact: op-tee@lists.trustedfirmware.org +Description: + Allows to distinguish whether an OP-TEE based TA/device requires user-space + tee-supplicant to function properly or not. This attribute will be present for + devices which depend on tee-supplicant to be running. diff --git a/drivers/tee/optee/device.c b/drivers/tee/optee/device.c index 64f0e047c23d..4b1092127694 100644 --- a/drivers/tee/optee/device.c +++ b/drivers/tee/optee/device.c @@ -60,7 +60,16 @@ static void optee_release_device(struct device *dev) kfree(optee_device); } -static int optee_register_device(const uuid_t *device_uuid) +static ssize_t need_supplicant_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return 0; +} + +static DEVICE_ATTR_RO(need_supplicant); + +static int optee_register_device(const uuid_t *device_uuid, u32 func) { struct tee_client_device *optee_device = NULL; int rc; @@ -83,6 +92,10 @@ static int optee_register_device(const uuid_t *device_uuid) put_device(&optee_device->dev); } + if (func == PTA_CMD_GET_DEVICES_SUPP) + device_create_file(&optee_device->dev, + &dev_attr_need_supplicant); + return rc; } @@ -142,7 +155,7 @@ static int __optee_enumerate_devices(u32 func) num_devices = shm_size / sizeof(uuid_t); for (idx = 0; idx < num_devices; idx++) { - rc = optee_register_device(&device_uuid[idx]); + rc = optee_register_device(&device_uuid[idx], func); if (rc) goto out_shm; } From 24436fe77efef4b16c45d84a10980927b117ebb1 Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Wed, 8 Nov 2023 23:31:13 +0900 Subject: [PATCH 0855/1397] RDMA/core: Fix uninit-value access in ib_get_eth_speed() [ Upstream commit 0550d4604e2ca4e653dc13f0c009fc42106b6bfc ] KMSAN reported the following uninit-value access issue: lo speed is unknown, defaulting to 1000 ===================================================== BUG: KMSAN: uninit-value in ib_get_width_and_speed drivers/infiniband/core/verbs.c:1889 [inline] BUG: KMSAN: uninit-value in ib_get_eth_speed+0x546/0xaf0 drivers/infiniband/core/verbs.c:1998 ib_get_width_and_speed drivers/infiniband/core/verbs.c:1889 [inline] ib_get_eth_speed+0x546/0xaf0 drivers/infiniband/core/verbs.c:1998 siw_query_port drivers/infiniband/sw/siw/siw_verbs.c:173 [inline] siw_get_port_immutable+0x6f/0x120 drivers/infiniband/sw/siw/siw_verbs.c:203 setup_port_data drivers/infiniband/core/device.c:848 [inline] setup_device drivers/infiniband/core/device.c:1244 [inline] ib_register_device+0x1589/0x1df0 drivers/infiniband/core/device.c:1383 siw_device_register drivers/infiniband/sw/siw/siw_main.c:72 [inline] siw_newlink+0x129e/0x13d0 drivers/infiniband/sw/siw/siw_main.c:490 nldev_newlink+0x8fd/0xa60 drivers/infiniband/core/nldev.c:1763 rdma_nl_rcv_skb drivers/infiniband/core/netlink.c:239 [inline] rdma_nl_rcv+0xe8a/0x1120 drivers/infiniband/core/netlink.c:259 netlink_unicast_kernel net/netlink/af_netlink.c:1342 [inline] netlink_unicast+0xf4b/0x1230 net/netlink/af_netlink.c:1368 netlink_sendmsg+0x1242/0x1420 net/netlink/af_netlink.c:1910 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] ____sys_sendmsg+0x997/0xd60 net/socket.c:2588 ___sys_sendmsg+0x271/0x3b0 net/socket.c:2642 __sys_sendmsg net/socket.c:2671 [inline] __do_sys_sendmsg net/socket.c:2680 [inline] __se_sys_sendmsg net/socket.c:2678 [inline] __x64_sys_sendmsg+0x2fa/0x4a0 net/socket.c:2678 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x44/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b Local variable lksettings created at: ib_get_eth_speed+0x4b/0xaf0 drivers/infiniband/core/verbs.c:1974 siw_query_port drivers/infiniband/sw/siw/siw_verbs.c:173 [inline] siw_get_port_immutable+0x6f/0x120 drivers/infiniband/sw/siw/siw_verbs.c:203 CPU: 0 PID: 11257 Comm: syz-executor.1 Not tainted 6.6.0-14500-g1c41041124bd #10 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-1.fc38 04/01/2014 ===================================================== If __ethtool_get_link_ksettings() fails, `netdev_speed` is set to the default value, SPEED_1000. In this case, if `lanes` field of struct ethtool_link_ksettings is not initialized, an uninitialized value is passed to ib_get_width_and_speed(). This causes the above issue. This patch resolves the issue by initializing `lanes` to 0. Fixes: cb06b6b3f6cb ("RDMA/core: Get IB width and speed from netdev") Signed-off-by: Shigeru Yoshida Link: https://lore.kernel.org/r/20231108143113.1360567-1-syoshida@redhat.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/core/verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 41ff5595c860..186ed3c22ec9 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1968,7 +1968,7 @@ int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u16 *speed, u8 *width) int rc; u32 netdev_speed; struct net_device *netdev; - struct ethtool_link_ksettings lksettings; + struct ethtool_link_ksettings lksettings = {}; if (rdma_port_get_link_layer(dev, port_num) != IB_LINK_LAYER_ETHERNET) return -EINVAL; From 38772f66723386797c085587e16ae738f849b6c9 Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Sat, 28 Oct 2023 17:32:42 +0800 Subject: [PATCH 0856/1397] RDMA/hns: Fix unnecessary err return when using invalid congest control algorithm [ Upstream commit efb9cbf66440482ceaa90493d648226ab7ec2ebf ] Add a default congest control algorithm so that driver won't return an error when the configured algorithm is invalid. Fixes: f91696f2f053 ("RDMA/hns: Support congestion control type selection according to the FW") Signed-off-by: Junxian Huang Link: https://lore.kernel.org/r/20231028093242.670325-1-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 58d14f1562b9..486d635b6e3a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4755,10 +4755,15 @@ static int check_cong_type(struct ib_qp *ibqp, cong_alg->wnd_mode_sel = WND_LIMIT; break; default: - ibdev_err(&hr_dev->ib_dev, - "error type(%u) for congestion selection.\n", - hr_dev->caps.cong_type); - return -EINVAL; + ibdev_warn(&hr_dev->ib_dev, + "invalid type(%u) for congestion selection.\n", + hr_dev->caps.cong_type); + hr_dev->caps.cong_type = CONG_TYPE_DCQCN; + cong_alg->alg_sel = CONG_DCQCN; + cong_alg->alg_sub_sel = UNSUPPORT_CONG_LEVEL; + cong_alg->dip_vld = DIP_INVALID; + cong_alg->wnd_mode_sel = WND_LIMIT; + break; } return 0; From 951c6d336ebe26988de0c414a1780a0974b9c1ae Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Tue, 14 Nov 2023 11:02:45 -0600 Subject: [PATCH 0857/1397] RDMA/irdma: Do not modify to SQD on error [ Upstream commit ba12ab66aa83a2340a51ad6e74b284269745138c ] Remove the modify to SQD before going to ERROR state. It is not needed. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Link: https://lore.kernel.org/r/20231114170246.238-2-shiraz.saleem@intel.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/irdma/verbs.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 3eb7a7a3a975..895799cbc4fd 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -1424,13 +1424,6 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, case IB_QPS_SQE: case IB_QPS_ERR: case IB_QPS_RESET: - if (iwqp->iwarp_state == IRDMA_QP_STATE_RTS) { - spin_unlock_irqrestore(&iwqp->lock, flags); - info.next_iwarp_state = IRDMA_QP_STATE_SQD; - irdma_hw_modify_qp(iwdev, iwqp, &info, true); - spin_lock_irqsave(&iwqp->lock, flags); - } - if (iwqp->iwarp_state == IRDMA_QP_STATE_ERROR) { spin_unlock_irqrestore(&iwqp->lock, flags); if (udata && udata->inlen) { From 9b1b8ab2bd73f73366817572bca22ad3536eeca8 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Tue, 14 Nov 2023 11:02:46 -0600 Subject: [PATCH 0858/1397] RDMA/irdma: Add wait for suspend on SQD [ Upstream commit bd6da690c27d75cae432c09162d054b34fa2156f ] Currently, there is no wait for the QP suspend to complete on a modify to SQD state. Add a wait, after the modify to SQD state, for the Suspend Complete AE. While we are at it, update the suspend timeout value in irdma_prep_tc_change to use IRDMA_EVENT_TIMEOUT_MS too. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Link: https://lore.kernel.org/r/20231114170246.238-3-shiraz.saleem@intel.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/irdma/hw.c | 6 +++++- drivers/infiniband/hw/irdma/main.c | 2 +- drivers/infiniband/hw/irdma/main.h | 2 +- drivers/infiniband/hw/irdma/verbs.c | 21 +++++++++++++++++++++ drivers/infiniband/hw/irdma/verbs.h | 1 + 5 files changed, 29 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 7cbdd5433dba..1fc4966cf115 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -321,7 +321,11 @@ static void irdma_process_aeq(struct irdma_pci_f *rf) break; case IRDMA_AE_QP_SUSPEND_COMPLETE: if (iwqp->iwdev->vsi.tc_change_pending) { - atomic_dec(&iwqp->sc_qp.vsi->qp_suspend_reqs); + if (!atomic_dec_return(&qp->vsi->qp_suspend_reqs)) + wake_up(&iwqp->iwdev->suspend_wq); + } + if (iwqp->suspend_pending) { + iwqp->suspend_pending = false; wake_up(&iwqp->iwdev->suspend_wq); } break; diff --git a/drivers/infiniband/hw/irdma/main.c b/drivers/infiniband/hw/irdma/main.c index 514453777e07..be1030d1adfa 100644 --- a/drivers/infiniband/hw/irdma/main.c +++ b/drivers/infiniband/hw/irdma/main.c @@ -48,7 +48,7 @@ static void irdma_prep_tc_change(struct irdma_device *iwdev) /* Wait for all qp's to suspend */ wait_event_timeout(iwdev->suspend_wq, !atomic_read(&iwdev->vsi.qp_suspend_reqs), - IRDMA_EVENT_TIMEOUT); + msecs_to_jiffies(IRDMA_EVENT_TIMEOUT_MS)); irdma_ws_reset(&iwdev->vsi); } diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h index 82fc5f5b002c..cbf0db72e108 100644 --- a/drivers/infiniband/hw/irdma/main.h +++ b/drivers/infiniband/hw/irdma/main.h @@ -78,7 +78,7 @@ extern struct auxiliary_driver i40iw_auxiliary_drv; #define MAX_DPC_ITERATIONS 128 -#define IRDMA_EVENT_TIMEOUT 50000 +#define IRDMA_EVENT_TIMEOUT_MS 5000 #define IRDMA_VCHNL_EVENT_TIMEOUT 100000 #define IRDMA_RST_TIMEOUT_HZ 4 diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 895799cbc4fd..4fef576e9994 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -1157,6 +1157,21 @@ static u8 irdma_roce_get_vlan_prio(const struct ib_gid_attr *attr, u8 prio) return prio; } +static int irdma_wait_for_suspend(struct irdma_qp *iwqp) +{ + if (!wait_event_timeout(iwqp->iwdev->suspend_wq, + !iwqp->suspend_pending, + msecs_to_jiffies(IRDMA_EVENT_TIMEOUT_MS))) { + iwqp->suspend_pending = false; + ibdev_warn(&iwqp->iwdev->ibdev, + "modify_qp timed out waiting for suspend. qp_id = %d, last_ae = 0x%x\n", + iwqp->ibqp.qp_num, iwqp->last_aeq); + return -EBUSY; + } + + return 0; +} + /** * irdma_modify_qp_roce - modify qp request * @ibqp: qp's pointer for modify @@ -1420,6 +1435,7 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, info.next_iwarp_state = IRDMA_QP_STATE_SQD; issue_modify_qp = 1; + iwqp->suspend_pending = true; break; case IB_QPS_SQE: case IB_QPS_ERR: @@ -1460,6 +1476,11 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, ctx_info->rem_endpoint_idx = udp_info->arp_idx; if (irdma_hw_modify_qp(iwdev, iwqp, &info, true)) return -EINVAL; + if (info.next_iwarp_state == IRDMA_QP_STATE_SQD) { + ret = irdma_wait_for_suspend(iwqp); + if (ret) + return ret; + } spin_lock_irqsave(&iwqp->lock, flags); if (iwqp->iwarp_state == info.curr_iwarp_state) { iwqp->iwarp_state = info.next_iwarp_state; diff --git a/drivers/infiniband/hw/irdma/verbs.h b/drivers/infiniband/hw/irdma/verbs.h index 5d7b983f47a2..20297a14c9a6 100644 --- a/drivers/infiniband/hw/irdma/verbs.h +++ b/drivers/infiniband/hw/irdma/verbs.h @@ -196,6 +196,7 @@ struct irdma_qp { u8 flush_issued : 1; u8 sig_all : 1; u8 pau_mode : 1; + u8 suspend_pending : 1; u8 rsvd : 1; u8 iwarp_state; u16 term_sq_flush_code; From 4192a26f673afd7072c2c087ae7f6d1278485bd6 Mon Sep 17 00:00:00 2001 From: Jonas Karlman Date: Sun, 5 Nov 2023 23:36:15 +0000 Subject: [PATCH 0859/1397] arm64: dts: rockchip: Expand reg size of vdec node for RK3328 [ Upstream commit 0b6240d697a96eaa45a2a5503a274ebb4f162fa3 ] Expand the reg size for the vdec node to include cache/performance registers the rkvdec driver writes to. Fixes: 17408c9b119d ("arm64: dts: rockchip: Add vdec support for RK3328") Signed-off-by: Jonas Karlman Link: https://lore.kernel.org/r/20231105233630.3927502-9-jonas@kwiboo.se Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3328.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index e729e7a22b23..cc8209795c3e 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -668,7 +668,7 @@ vdec: video-codec@ff360000 { compatible = "rockchip,rk3328-vdec", "rockchip,rk3399-vdec"; - reg = <0x0 0xff360000 0x0 0x400>; + reg = <0x0 0xff360000 0x0 0x480>; interrupts = ; clocks = <&cru ACLK_RKVDEC>, <&cru HCLK_RKVDEC>, <&cru SCLK_VDEC_CABAC>, <&cru SCLK_VDEC_CORE>; From bdcedad6b951d632bbd7dfc80332bdcf9a6abbef Mon Sep 17 00:00:00 2001 From: Alex Bee Date: Sun, 5 Nov 2023 23:36:16 +0000 Subject: [PATCH 0860/1397] arm64: dts: rockchip: Expand reg size of vdec node for RK3399 [ Upstream commit 35938c18291b5da7422b2fac6dac0af11aa8d0d7 ] Expand the reg size for the vdec node to include cache/performance registers the rkvdec driver writes to. Also add missing clocks to the related power-domain. Fixes: cbd7214402ec ("arm64: dts: rockchip: Define the rockchip Video Decoder node on rk3399") Signed-off-by: Alex Bee Signed-off-by: Jonas Karlman Link: https://lore.kernel.org/r/20231105233630.3927502-10-jonas@kwiboo.se Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3399.dtsi | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index 5bc2d4faeea6..4a3d0af5ecfe 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -1109,7 +1109,9 @@ power-domain@RK3399_PD_VDU { reg = ; clocks = <&cru ACLK_VDU>, - <&cru HCLK_VDU>; + <&cru HCLK_VDU>, + <&cru SCLK_VDU_CA>, + <&cru SCLK_VDU_CORE>; pm_qos = <&qos_video_m1_r>, <&qos_video_m1_w>; #power-domain-cells = <0>; @@ -1385,7 +1387,7 @@ vdec: video-codec@ff660000 { compatible = "rockchip,rk3399-vdec"; - reg = <0x0 0xff660000 0x0 0x400>; + reg = <0x0 0xff660000 0x0 0x480>; interrupts = ; clocks = <&cru ACLK_VDU>, <&cru HCLK_VDU>, <&cru SCLK_VDU_CA>, <&cru SCLK_VDU_CORE>; From e1e4a5cbb0a16a9d738db6f82b54aef52b56cff6 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Mon, 20 Nov 2023 18:05:35 +0800 Subject: [PATCH 0861/1397] ASoC: fsl_sai: Fix no frame sync clock issue on i.MX8MP [ Upstream commit 14e8442e0789598514f3c9de014950de9feda7a4 ] On i.MX8MP, when the TERE and FSD_MSTR enabled before configuring the word width, there will be no frame sync clock issue, because old word width impact the generation of frame sync. TERE enabled earlier only for i.MX8MP case for the hardware limitation, So need to disable FSD_MSTR before configuring word width, then enable FSD_MSTR bit for this specific case. Fixes: 3e4a82612998 ("ASoC: fsl_sai: MCLK bind with TX/RX enable bit") Signed-off-by: Shengjiu Wang Link: https://lore.kernel.org/r/1700474735-3863-1-git-send-email-shengjiu.wang@nxp.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/fsl/fsl_sai.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c index 8a9a30dd31e2..3252eefc4bc0 100644 --- a/sound/soc/fsl/fsl_sai.c +++ b/sound/soc/fsl/fsl_sai.c @@ -674,6 +674,20 @@ static int fsl_sai_hw_params(struct snd_pcm_substream *substream, FSL_SAI_CR3_TRCE_MASK, FSL_SAI_CR3_TRCE((dl_cfg[dl_cfg_idx].mask[tx] & trce_mask))); + /* + * When the TERE and FSD_MSTR enabled before configuring the word width + * There will be no frame sync clock issue, because word width impact + * the generation of frame sync clock. + * + * TERE enabled earlier only for i.MX8MP case for the hardware limitation, + * We need to disable FSD_MSTR before configuring word width, then enable + * FSD_MSTR bit for this specific case. + */ + if (sai->soc_data->mclk_with_tere && sai->mclk_direction_output && + !sai->is_consumer_mode) + regmap_update_bits(sai->regmap, FSL_SAI_xCR4(tx, ofs), + FSL_SAI_CR4_FSD_MSTR, 0); + regmap_update_bits(sai->regmap, FSL_SAI_xCR4(tx, ofs), FSL_SAI_CR4_SYWD_MASK | FSL_SAI_CR4_FRSZ_MASK | FSL_SAI_CR4_CHMOD_MASK, @@ -681,6 +695,13 @@ static int fsl_sai_hw_params(struct snd_pcm_substream *substream, regmap_update_bits(sai->regmap, FSL_SAI_xCR5(tx, ofs), FSL_SAI_CR5_WNW_MASK | FSL_SAI_CR5_W0W_MASK | FSL_SAI_CR5_FBT_MASK, val_cr5); + + /* Enable FSD_MSTR after configuring word width */ + if (sai->soc_data->mclk_with_tere && sai->mclk_direction_output && + !sai->is_consumer_mode) + regmap_update_bits(sai->regmap, FSL_SAI_xCR4(tx, ofs), + FSL_SAI_CR4_FSD_MSTR, FSL_SAI_CR4_FSD_MSTR); + regmap_write(sai->regmap, FSL_SAI_xMR(tx), ~0UL - ((1 << min(channels, slots)) - 1)); From b659d48fc42608fa700548dbf20c9d871a048146 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Mon, 20 Nov 2023 16:41:38 +0100 Subject: [PATCH 0862/1397] RDMA/rtrs-srv: Do not unconditionally enable irq [ Upstream commit 3ee7ecd712048ade6482bea4b2f3dcaf039c0348 ] When IO is completed, rtrs can be called in softirq context, unconditionally enabling irq could cause panic. To be on safe side, use spin_lock_irqsave and spin_unlock_irqrestore instread. Fixes: 9cb837480424 ("RDMA/rtrs: server: main functionality") Signed-off-by: Jack Wang Signed-off-by: Florian-Ewald Mueller Signed-off-by: Md Haris Iqbal Signed-off-by: Grzegorz Prajsner Link: https://lore.kernel.org/r/20231120154146.920486-2-haris.iqbal@ionos.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index 75e56604e462..ab4200041fd3 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -65,8 +65,9 @@ static bool rtrs_srv_change_state(struct rtrs_srv_path *srv_path, { enum rtrs_srv_state old_state; bool changed = false; + unsigned long flags; - spin_lock_irq(&srv_path->state_lock); + spin_lock_irqsave(&srv_path->state_lock, flags); old_state = srv_path->state; switch (new_state) { case RTRS_SRV_CONNECTED: @@ -87,7 +88,7 @@ static bool rtrs_srv_change_state(struct rtrs_srv_path *srv_path, } if (changed) srv_path->state = new_state; - spin_unlock_irq(&srv_path->state_lock); + spin_unlock_irqrestore(&srv_path->state_lock, flags); return changed; } From 30614ed295bc55295ebe0a60abb484cc8cee0cd0 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Mon, 20 Nov 2023 16:41:39 +0100 Subject: [PATCH 0863/1397] RDMA/rtrs-clt: Start hb after path_up [ Upstream commit 3e44a61b5db873612e20e7b7922468d7d1ac2d22 ] If we start hb too early, it will confuse server side to close the session. Fixes: 6a98d71daea1 ("RDMA/rtrs: client: main functionality") Signed-off-by: Jack Wang Reviewed-by: Md Haris Iqbal Signed-off-by: Grzegorz Prajsner Link: https://lore.kernel.org/r/20231120154146.920486-3-haris.iqbal@ionos.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index b6ee801fd0ff..dc482d1872fb 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -2345,8 +2345,6 @@ static int init_conns(struct rtrs_clt_path *clt_path) if (err) goto destroy; - rtrs_start_hb(&clt_path->s); - return 0; destroy: @@ -2620,6 +2618,7 @@ static int init_path(struct rtrs_clt_path *clt_path) goto out; } rtrs_clt_path_up(clt_path); + rtrs_start_hb(&clt_path->s); out: mutex_unlock(&clt_path->init_mutex); From dfb398fdcb7f0e029d14fd0c29e6247ee8b07c98 Mon Sep 17 00:00:00 2001 From: Md Haris Iqbal Date: Mon, 20 Nov 2023 16:41:40 +0100 Subject: [PATCH 0864/1397] RDMA/rtrs-srv: Check return values while processing info request [ Upstream commit ed1e52aefa16f15dc2f04054a3baf11726a7460e ] While processing info request, it could so happen that the srv_path goes to CLOSING state, cause of any of the error events from RDMA. That state change should be picked up while trying to change the state in process_info_req, by checking the return value. In case the state change call in process_info_req fails, we fail the processing. We should also check the return value for rtrs_srv_path_up, since it sends a link event to the client above, and the client can fail for any reason. Fixes: 9cb837480424 ("RDMA/rtrs: server: main functionality") Signed-off-by: Md Haris Iqbal Signed-off-by: Jack Wang Signed-off-by: Grzegorz Prajsner Link: https://lore.kernel.org/r/20231120154146.920486-4-haris.iqbal@ionos.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index ab4200041fd3..4be0e5b132d4 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -710,20 +710,23 @@ static void rtrs_srv_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc) WARN_ON(wc->opcode != IB_WC_SEND); } -static void rtrs_srv_path_up(struct rtrs_srv_path *srv_path) +static int rtrs_srv_path_up(struct rtrs_srv_path *srv_path) { struct rtrs_srv_sess *srv = srv_path->srv; struct rtrs_srv_ctx *ctx = srv->ctx; - int up; + int up, ret = 0; mutex_lock(&srv->paths_ev_mutex); up = ++srv->paths_up; if (up == 1) - ctx->ops.link_ev(srv, RTRS_SRV_LINK_EV_CONNECTED, NULL); + ret = ctx->ops.link_ev(srv, RTRS_SRV_LINK_EV_CONNECTED, NULL); mutex_unlock(&srv->paths_ev_mutex); /* Mark session as established */ - srv_path->established = true; + if (!ret) + srv_path->established = true; + + return ret; } static void rtrs_srv_path_down(struct rtrs_srv_path *srv_path) @@ -852,7 +855,12 @@ static int process_info_req(struct rtrs_srv_con *con, goto iu_free; kobject_get(&srv_path->kobj); get_device(&srv_path->srv->dev); - rtrs_srv_change_state(srv_path, RTRS_SRV_CONNECTED); + err = rtrs_srv_change_state(srv_path, RTRS_SRV_CONNECTED); + if (!err) { + rtrs_err(s, "rtrs_srv_change_state(), err: %d\n", err); + goto iu_free; + } + rtrs_srv_start_hb(srv_path); /* @@ -861,7 +869,11 @@ static int process_info_req(struct rtrs_srv_con *con, * all connections are successfully established. Thus, simply notify * listener with a proper event if we are the first path. */ - rtrs_srv_path_up(srv_path); + err = rtrs_srv_path_up(srv_path); + if (err) { + rtrs_err(s, "rtrs_srv_path_up(), err: %d\n", err); + goto iu_free; + } ib_dma_sync_single_for_device(srv_path->s.dev->ib_dev, tx_iu->dma_addr, From 7bb1941e7f71664f37eae7762d8c24457f65f3de Mon Sep 17 00:00:00 2001 From: Md Haris Iqbal Date: Mon, 20 Nov 2023 16:41:41 +0100 Subject: [PATCH 0865/1397] RDMA/rtrs-srv: Free srv_mr iu only when always_invalidate is true [ Upstream commit 3a71cd6ca0ce33d1af019ecf1d7167406fa54400 ] Since srv_mr->iu is allocated and used only when always_invalidate is true, free it only when always_invalidate is true. Fixes: 9cb837480424 ("RDMA/rtrs: server: main functionality") Signed-off-by: Md Haris Iqbal Signed-off-by: Jack Wang Signed-off-by: Grzegorz Prajsner Link: https://lore.kernel.org/r/20231120154146.920486-5-haris.iqbal@ionos.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index 4be0e5b132d4..925b71481c62 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -551,7 +551,10 @@ static void unmap_cont_bufs(struct rtrs_srv_path *srv_path) struct rtrs_srv_mr *srv_mr; srv_mr = &srv_path->mrs[i]; - rtrs_iu_free(srv_mr->iu, srv_path->s.dev->ib_dev, 1); + + if (always_invalidate) + rtrs_iu_free(srv_mr->iu, srv_path->s.dev->ib_dev, 1); + ib_dereg_mr(srv_mr->mr); ib_dma_unmap_sg(srv_path->s.dev->ib_dev, srv_mr->sgt.sgl, srv_mr->sgt.nents, DMA_BIDIRECTIONAL); From 5aafdb5e9324c240941c805be36f2a8aa932c2c6 Mon Sep 17 00:00:00 2001 From: Md Haris Iqbal Date: Mon, 20 Nov 2023 16:41:42 +0100 Subject: [PATCH 0866/1397] RDMA/rtrs-srv: Destroy path files after making sure no IOs in-flight [ Upstream commit c4d32e77fc1006f99eeb78417efc3d81a384072a ] Destroying path files may lead to the freeing of rdma_stats. This creates the following race. An IO is in-flight, or has just passed the session state check in process_read/process_write. The close_work gets triggered and the function rtrs_srv_close_work() starts and does destroy path which frees the rdma_stats. After this the function process_read/process_write resumes and tries to update the stats through the function rtrs_srv_update_rdma_stats This commit solves the problem by moving the destroy path function to a later point. This point makes sure any inflights are completed. This is done by qp drain, and waiting for all in-flights through ops_id. Fixes: 9cb837480424 ("RDMA/rtrs: server: main functionality") Signed-off-by: Md Haris Iqbal Signed-off-by: Santosh Kumar Pradhan Signed-off-by: Grzegorz Prajsner Link: https://lore.kernel.org/r/20231120154146.920486-6-haris.iqbal@ionos.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index 925b71481c62..1d33efb8fb03 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -1532,7 +1532,6 @@ static void rtrs_srv_close_work(struct work_struct *work) srv_path = container_of(work, typeof(*srv_path), close_work); - rtrs_srv_destroy_path_files(srv_path); rtrs_srv_stop_hb(srv_path); for (i = 0; i < srv_path->s.con_num; i++) { @@ -1552,6 +1551,8 @@ static void rtrs_srv_close_work(struct work_struct *work) /* Wait for all completion */ wait_for_completion(&srv_path->complete_done); + rtrs_srv_destroy_path_files(srv_path); + /* Notify upper layer if we are the last path */ rtrs_srv_path_down(srv_path); From 4b13d47c9018d643126d5a38793d957b7e63e9dc Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Mon, 20 Nov 2023 16:41:43 +0100 Subject: [PATCH 0867/1397] RDMA/rtrs-clt: Fix the max_send_wr setting [ Upstream commit 6d09f6f7d7584e099633282ea915988914f86529 ] For each write request, we need Request, Response Memory Registration, Local Invalidate. Fixes: 6a98d71daea1 ("RDMA/rtrs: client: main functionality") Signed-off-by: Jack Wang Reviewed-by: Md Haris Iqbal Signed-off-by: Grzegorz Prajsner Link: https://lore.kernel.org/r/20231120154146.920486-7-haris.iqbal@ionos.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index dc482d1872fb..b020b97dbe67 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -1694,7 +1694,7 @@ static int create_con_cq_qp(struct rtrs_clt_con *con) clt_path->s.dev_ref++; max_send_wr = min_t(int, wr_limit, /* QD * (REQ + RSP + FR REGS or INVS) + drain */ - clt_path->queue_depth * 3 + 1); + clt_path->queue_depth * 4 + 1); max_recv_wr = min_t(int, wr_limit, clt_path->queue_depth * 3 + 1); max_send_sge = 2; From ca19116cca9d9427c816043e05429b8b6a7d95bd Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Mon, 20 Nov 2023 16:41:44 +0100 Subject: [PATCH 0868/1397] RDMA/rtrs-clt: Remove the warnings for req in_use check [ Upstream commit 0c8bb6eb70ca41031f663b4481aac9ac78b53bc6 ] As we chain the WR during write request: memory registration, rdma write, local invalidate, if only the last WR fail to send due to send queue overrun, the server can send back the reply, while client mark the req->in_use to false in case of error in rtrs_clt_req when error out from rtrs_post_rdma_write_sg. Fixes: 6a98d71daea1 ("RDMA/rtrs: client: main functionality") Signed-off-by: Jack Wang Reviewed-by: Md Haris Iqbal Signed-off-by: Grzegorz Prajsner Link: https://lore.kernel.org/r/20231120154146.920486-8-haris.iqbal@ionos.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index b020b97dbe67..1aee62aa1515 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -384,7 +384,7 @@ static void complete_rdma_req(struct rtrs_clt_io_req *req, int errno, struct rtrs_clt_path *clt_path; int err; - if (WARN_ON(!req->in_use)) + if (!req->in_use) return; if (WARN_ON(!req->con)) return; From cfaede20f5a542a6e4ddf3e9fd4d7f7f1eb4a156 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Tue, 21 Nov 2023 00:29:47 -0800 Subject: [PATCH 0869/1397] RDMA/bnxt_re: Correct module description string [ Upstream commit 422b19f7f006e813ee0865aadce6a62b3c263c42 ] The word "Driver" is repeated twice in the "modinfo bnxt_re" output description. Fix it. Fixes: 1ac5a4047975 ("RDMA/bnxt_re: Add bnxt_re RoCE driver") Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1700555387-6277-1-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/bnxt_re/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index c9066aade412..039801d93ed8 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -71,7 +71,7 @@ static char version[] = BNXT_RE_DESC "\n"; MODULE_AUTHOR("Eddie Wai "); -MODULE_DESCRIPTION(BNXT_RE_DESC " Driver"); +MODULE_DESCRIPTION(BNXT_RE_DESC); MODULE_LICENSE("Dual BSD/GPL"); /* globals */ From 55b6b95737e3e5575c9b683cdfa70c260a5528ae Mon Sep 17 00:00:00 2001 From: Shifeng Li Date: Tue, 21 Nov 2023 02:12:36 -0800 Subject: [PATCH 0870/1397] RDMA/irdma: Fix UAF in irdma_sc_ccq_get_cqe_info() [ Upstream commit 2b78832f50c4d711e161b166d7d8790968051546 ] When removing the irdma driver or unplugging its aux device, the ccq queue is released before destorying the cqp_cmpl_wq queue. But in the window, there may still be completion events for wqes. That will cause a UAF in irdma_sc_ccq_get_cqe_info(). [34693.333191] BUG: KASAN: use-after-free in irdma_sc_ccq_get_cqe_info+0x82f/0x8c0 [irdma] [34693.333194] Read of size 8 at addr ffff889097f80818 by task kworker/u67:1/26327 [34693.333194] [34693.333199] CPU: 9 PID: 26327 Comm: kworker/u67:1 Kdump: loaded Tainted: G O --------- -t - 4.18.0 #1 [34693.333200] Hardware name: SANGFOR Inspur/NULL, BIOS 4.1.13 08/01/2016 [34693.333211] Workqueue: cqp_cmpl_wq cqp_compl_worker [irdma] [34693.333213] Call Trace: [34693.333220] dump_stack+0x71/0xab [34693.333226] print_address_description+0x6b/0x290 [34693.333238] ? irdma_sc_ccq_get_cqe_info+0x82f/0x8c0 [irdma] [34693.333240] kasan_report+0x14a/0x2b0 [34693.333251] irdma_sc_ccq_get_cqe_info+0x82f/0x8c0 [irdma] [34693.333264] ? irdma_free_cqp_request+0x151/0x1e0 [irdma] [34693.333274] irdma_cqp_ce_handler+0x1fb/0x3b0 [irdma] [34693.333285] ? irdma_ctrl_init_hw+0x2c20/0x2c20 [irdma] [34693.333290] ? __schedule+0x836/0x1570 [34693.333293] ? strscpy+0x83/0x180 [34693.333296] process_one_work+0x56a/0x11f0 [34693.333298] worker_thread+0x8f/0xf40 [34693.333301] ? __kthread_parkme+0x78/0xf0 [34693.333303] ? rescuer_thread+0xc50/0xc50 [34693.333305] kthread+0x2a0/0x390 [34693.333308] ? kthread_destroy_worker+0x90/0x90 [34693.333310] ret_from_fork+0x1f/0x40 Fixes: 44d9e52977a1 ("RDMA/irdma: Implement device initialization definitions") Signed-off-by: Shifeng Li Link: https://lore.kernel.org/r/20231121101236.581694-1-lishifeng1992@126.com Acked-by: Shiraz Saleem Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/irdma/hw.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 1fc4966cf115..db3829ff922b 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -585,9 +585,6 @@ static void irdma_destroy_cqp(struct irdma_pci_f *rf) struct irdma_cqp *cqp = &rf->cqp; int status = 0; - if (rf->cqp_cmpl_wq) - destroy_workqueue(rf->cqp_cmpl_wq); - status = irdma_sc_cqp_destroy(dev->cqp); if (status) ibdev_dbg(to_ibdev(dev), "ERR: Destroy CQP failed %d\n", status); @@ -752,6 +749,9 @@ static void irdma_destroy_ccq(struct irdma_pci_f *rf) struct irdma_ccq *ccq = &rf->ccq; int status = 0; + if (rf->cqp_cmpl_wq) + destroy_workqueue(rf->cqp_cmpl_wq); + if (!rf->reset) status = irdma_sc_ccq_destroy(dev->ccq, 0, true); if (status) From 231ab2ff6039d250c7622e499cededd38fa62f92 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Thu, 19 Oct 2023 08:32:17 +0200 Subject: [PATCH 0871/1397] arm64: dt: imx93: tqma9352-mba93xxla: Fix LPUART2 pad config [ Upstream commit 2bfba37b3d90d6d2d499d5b0dfe99c05c38b1b54 ] LPUART2_RTS# has an external pull-down, so do not enable the internal pull-up at the same time, use a pull-down instead. Fixes: c982ecfa7992a ("arm64: dts: freescale: add initial device tree for MBa93xxLA SBC board") Signed-off-by: Alexander Stein Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts b/arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts index f06139bdff97..3c5c67ebee5d 100644 --- a/arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts +++ b/arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts @@ -577,7 +577,7 @@ fsl,pins = < MX93_PAD_UART2_TXD__LPUART2_TX 0x31e MX93_PAD_UART2_RXD__LPUART2_RX 0x31e - MX93_PAD_SAI1_TXD0__LPUART2_RTS_B 0x31e + MX93_PAD_SAI1_TXD0__LPUART2_RTS_B 0x51e >; }; From 03766ae3467d18c515c9f3a90f1404854deac2de Mon Sep 17 00:00:00 2001 From: Stefan Kerkmann Date: Wed, 1 Nov 2023 12:03:37 +0100 Subject: [PATCH 0872/1397] ARM: dts: imx6q: skov: fix ethernet clock regression [ Upstream commit 6552218f4dc47ba3c6c5b58cc1e9eb208a2b438b ] A regression was introduced in the Skov specific i.MX6 flavor reve-mi1010ait-1cp1 device tree causing the external ethernet controller to not being selected as the clock source for the i.MX6 ethernet MAC, resulting in a none functional ethernet interface. The root cause is that the ethernet clock selection is now part of the clocks node, which is overwritten in the specific device tree and wasn't updated to contain these ethernet clocks. Fixes: c89614079e44 ("ARM: dts: imx6qdl-skov-cpu: configure ethernet reference clock parent") Signed-off-by: Stefan Kerkmann Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/nxp/imx/imx6q-skov-reve-mi1010ait-1cp1.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/nxp/imx/imx6q-skov-reve-mi1010ait-1cp1.dts b/arch/arm/boot/dts/nxp/imx/imx6q-skov-reve-mi1010ait-1cp1.dts index a3f247c722b4..0342a79ccd5d 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6q-skov-reve-mi1010ait-1cp1.dts +++ b/arch/arm/boot/dts/nxp/imx/imx6q-skov-reve-mi1010ait-1cp1.dts @@ -37,9 +37,9 @@ &clks { assigned-clocks = <&clks IMX6QDL_CLK_LDB_DI0_SEL>, - <&clks IMX6QDL_CLK_LDB_DI1_SEL>; + <&clks IMX6QDL_CLK_LDB_DI1_SEL>, <&clks IMX6QDL_CLK_ENET_REF_SEL>; assigned-clock-parents = <&clks IMX6QDL_CLK_PLL5_VIDEO_DIV>, - <&clks IMX6QDL_CLK_PLL5_VIDEO_DIV>; + <&clks IMX6QDL_CLK_PLL5_VIDEO_DIV>, <&clk50m_phy>; }; &hdmi { From a7fb9f15fedce217ee6f8921f726d335b7a16037 Mon Sep 17 00:00:00 2001 From: Alex Bee Date: Mon, 27 Nov 2023 19:46:44 +0100 Subject: [PATCH 0873/1397] ARM: dts: rockchip: Fix sdmmc_pwren's pinmux setting for RK3128 [ Upstream commit 0c349b5001f8bdcead844484c15a0c4dfb341157 ] RK3128's reference design uses sdmmc_pwren pincontrol as GPIO - see [0]. Let's change it in the SoC DT as well. [0] https://github.com/rockchip-linux/kernel/commit/8c62deaf6025 Fixes: a0201bff6259 ("ARM: dts: rockchip: add rk3128 soc dtsi") Signed-off-by: Alex Bee Link: https://lore.kernel.org/r/20231127184643.13314-2-knaerzche@gmail.com Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm/boot/dts/rockchip/rk3128.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/rockchip/rk3128.dtsi b/arch/arm/boot/dts/rockchip/rk3128.dtsi index 88a4b0d6d928..80d81af5fe0e 100644 --- a/arch/arm/boot/dts/rockchip/rk3128.dtsi +++ b/arch/arm/boot/dts/rockchip/rk3128.dtsi @@ -795,7 +795,7 @@ }; sdmmc_pwren: sdmmc-pwren { - rockchip,pins = <1 RK_PB6 1 &pcfg_pull_default>; + rockchip,pins = <1 RK_PB6 RK_FUNC_GPIO &pcfg_pull_default>; }; sdmmc_bus4: sdmmc-bus4 { From 2e594ee138c91ded4223e7c705920108d511877b Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sat, 18 Nov 2023 13:42:52 +0100 Subject: [PATCH 0874/1397] ARM: dts: bcm2711-rpi-400: Fix delete-node of led_act [ Upstream commit dc761f11af2e39119d3a7942e3d10615f3d900e7 ] The LED ACT which is included from bcm2711-rpi-4-b doesn't exists on the Raspberry Pi 400. So the bcm2711-rpi-400.dts tries to use the delete-node directive in order to remove the complete node. Unfortunately the usage get broken in commit 1156e3a78bcc ("ARM: dts: bcm283x: Move ACT LED into separate dtsi") and now ACT and PWR LED using the same GPIO and this prevent probing of led-gpios on Raspberry Pi 400: leds-gpio: probe of leds failed with error -16 So fix the delete-node directive. Fixes: 1156e3a78bcc ("ARM: dts: bcm283x: Move ACT LED into separate dtsi") Signed-off-by: Stefan Wahren Link: https://lore.kernel.org/r/20231118124252.14838-3-wahrenst@gmx.net Signed-off-by: Florian Fainelli Signed-off-by: Sasha Levin --- arch/arm/boot/dts/broadcom/bcm2711-rpi-400.dts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/broadcom/bcm2711-rpi-400.dts b/arch/arm/boot/dts/broadcom/bcm2711-rpi-400.dts index 1ab8184302db..5a2869a18bd5 100644 --- a/arch/arm/boot/dts/broadcom/bcm2711-rpi-400.dts +++ b/arch/arm/boot/dts/broadcom/bcm2711-rpi-400.dts @@ -36,9 +36,7 @@ gpios = <&gpio 42 GPIO_ACTIVE_HIGH>; }; -&leds { - /delete-node/ led_act; -}; +/delete-node/ &led_act; &pm { /delete-property/ system-power-controller; From 8ebf05908f0e8ab5d2c50555ed059fa239b31072 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Fri, 24 Nov 2023 19:27:47 +0100 Subject: [PATCH 0875/1397] hwmon: (acpi_power_meter) Fix 4.29 MW bug [ Upstream commit 1fefca6c57fb928d2131ff365270cbf863d89c88 ] The ACPI specification says: "If an error occurs while obtaining the meter reading or if the value is not available then an Integer with all bits set is returned" Since the "integer" is 32 bits in case of the ACPI power meter, userspace will get a power reading of 2^32 * 1000 miliwatts (~4.29 MW) in case of such an error. This was discovered due to a lm_sensors bugreport (https://github.com/lm-sensors/lm-sensors/issues/460). Fix this by returning -ENODATA instead. Tested-by: Fixes: de584afa5e18 ("hwmon driver for ACPI 4.0 power meters") Signed-off-by: Armin Wolf Link: https://lore.kernel.org/r/20231124182747.13956-1-W_Armin@gmx.de Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/acpi_power_meter.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hwmon/acpi_power_meter.c b/drivers/hwmon/acpi_power_meter.c index fa28d447f0df..b772c076a5ae 100644 --- a/drivers/hwmon/acpi_power_meter.c +++ b/drivers/hwmon/acpi_power_meter.c @@ -31,6 +31,7 @@ #define POWER_METER_CAN_NOTIFY (1 << 3) #define POWER_METER_IS_BATTERY (1 << 8) #define UNKNOWN_HYSTERESIS 0xFFFFFFFF +#define UNKNOWN_POWER 0xFFFFFFFF #define METER_NOTIFY_CONFIG 0x80 #define METER_NOTIFY_TRIP 0x81 @@ -348,6 +349,9 @@ static ssize_t show_power(struct device *dev, update_meter(resource); mutex_unlock(&resource->lock); + if (resource->power == UNKNOWN_POWER) + return -ENODATA; + return sprintf(buf, "%llu\n", resource->power * 1000); } From 8d73500fcf861c5c56475a5923980371f09f2886 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Thu, 16 Nov 2023 17:44:21 +0100 Subject: [PATCH 0876/1397] ASoC: codecs: lpass-tx-macro: set active_decimator correct default value [ Upstream commit a2f35ed1d237c459100adb0c39bb811d7f170977 ] The -1 value for active_decimator[dai_id] is considered as "not set", but at probe the table is initialized a 0, this prevents enabling the DEC0 Mixer since it will be considered as already set. Initialize the table entries as -1 to fix tx_macro_tx_mixer_put(). Fixes: 1c6a7f5250ce ("ASoC: codecs: tx-macro: fix active_decimator array") Fixes: c1057a08af43 ("ASoC: codecs: tx-macro: fix kcontrol put") Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20231116-topic-sm8x50-upstream-tx-macro-fix-active-decimator-set-v1-1-6edf402f4b6f@linaro.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/lpass-tx-macro.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/soc/codecs/lpass-tx-macro.c b/sound/soc/codecs/lpass-tx-macro.c index 3e33418898e8..ebddfa74ce0a 100644 --- a/sound/soc/codecs/lpass-tx-macro.c +++ b/sound/soc/codecs/lpass-tx-macro.c @@ -2021,6 +2021,11 @@ static int tx_macro_probe(struct platform_device *pdev) tx->dev = dev; + /* Set active_decimator default value */ + tx->active_decimator[TX_MACRO_AIF1_CAP] = -1; + tx->active_decimator[TX_MACRO_AIF2_CAP] = -1; + tx->active_decimator[TX_MACRO_AIF3_CAP] = -1; + /* set MCLK and NPL rates */ clk_set_rate(tx->mclk, MCLK_FREQ); clk_set_rate(tx->npl, MCLK_FREQ); From 7cd68b364f833fdf1438e5d2c591bd650bb89bdc Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 3 Dec 2023 16:24:05 +0100 Subject: [PATCH 0877/1397] hwmon: (nzxt-kraken2) Fix error handling path in kraken2_probe() [ Upstream commit 35fe2ad259a3bfca15ab78c8ffb5278cb6149c89 ] There is no point in calling hid_hw_stop() if hid_hw_start() has failed. There is no point in calling hid_hw_close() if hid_hw_open() has failed. Update the error handling path accordingly. Fixes: 82e3430dfa8c ("hwmon: add driver for NZXT Kraken X42/X52/X62/X72") Reported-by: Aleksa Savic Closes: https://lore.kernel.org/all/121470f0-6c1f-418a-844c-7ec2e8a54b8e@gmail.com/ Signed-off-by: Christophe JAILLET Reviewed-by: Jonas Malaco Link: https://lore.kernel.org/r/a768e69851a07a1f4e29f270f4e2559063f07343.1701617030.git.christophe.jaillet@wanadoo.fr Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/nzxt-kraken2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/nzxt-kraken2.c b/drivers/hwmon/nzxt-kraken2.c index 428c77b5fce5..7caf387eb144 100644 --- a/drivers/hwmon/nzxt-kraken2.c +++ b/drivers/hwmon/nzxt-kraken2.c @@ -161,13 +161,13 @@ static int kraken2_probe(struct hid_device *hdev, ret = hid_hw_start(hdev, HID_CONNECT_HIDRAW); if (ret) { hid_err(hdev, "hid hw start failed with %d\n", ret); - goto fail_and_stop; + return ret; } ret = hid_hw_open(hdev); if (ret) { hid_err(hdev, "hid hw open failed with %d\n", ret); - goto fail_and_close; + goto fail_and_stop; } priv->hwmon_dev = hwmon_device_register_with_info(&hdev->dev, "kraken2", From 9c78a21a5a528af4e0167c9dfa5d10ba41515328 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 25 Aug 2023 13:26:21 +0200 Subject: [PATCH 0878/1397] firmware: arm_scmi: Extend perf protocol ops to get number of domains [ Upstream commit e9090e70e618cd62ab7bf2914511e5eea31a2535 ] Similar to other protocol ops, it's useful for an scmi module driver to get the number of supported performance domains, hence let's make this available by adding a new perf protocol callback. Note that, a user is being added from subsequent changes. Signed-off-by: Ulf Hansson Link: https://lore.kernel.org/r/20230825112633.236607-2-ulf.hansson@linaro.org Signed-off-by: Sudeep Holla Stable-dep-of: 8e3c98d9187e ("firmware: arm_scmi: Fix frequency truncation by promoting multiplier type") Signed-off-by: Sasha Levin --- drivers/firmware/arm_scmi/perf.c | 8 ++++++++ include/linux/scmi_protocol.h | 2 ++ 2 files changed, 10 insertions(+) diff --git a/drivers/firmware/arm_scmi/perf.c b/drivers/firmware/arm_scmi/perf.c index 30dedd6ebfde..da901c65388a 100644 --- a/drivers/firmware/arm_scmi/perf.c +++ b/drivers/firmware/arm_scmi/perf.c @@ -423,6 +423,13 @@ scmi_perf_describe_levels_get(const struct scmi_protocol_handle *ph, return ret; } +static int scmi_perf_num_domains_get(const struct scmi_protocol_handle *ph) +{ + struct scmi_perf_info *pi = ph->get_priv(ph); + + return pi->num_domains; +} + static int scmi_perf_msg_limits_set(const struct scmi_protocol_handle *ph, u32 domain, u32 max_perf, u32 min_perf) { @@ -948,6 +955,7 @@ scmi_power_scale_get(const struct scmi_protocol_handle *ph) } static const struct scmi_perf_proto_ops perf_proto_ops = { + .num_domains_get = scmi_perf_num_domains_get, .limits_set = scmi_perf_limits_set, .limits_get = scmi_perf_limits_get, .level_set = scmi_perf_level_set, diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index e6fe4f73ffe6..71b39cbbdace 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -101,6 +101,7 @@ struct scmi_clk_proto_ops { * struct scmi_perf_proto_ops - represents the various operations provided * by SCMI Performance Protocol * + * @num_domains_get: gets the number of supported performance domains * @limits_set: sets limits on the performance level of a domain * @limits_get: gets limits on the performance level of a domain * @level_set: sets the performance level of a domain @@ -120,6 +121,7 @@ struct scmi_clk_proto_ops { * or in some other (abstract) scale */ struct scmi_perf_proto_ops { + int (*num_domains_get)(const struct scmi_protocol_handle *ph); int (*limits_set)(const struct scmi_protocol_handle *ph, u32 domain, u32 max_perf, u32 min_perf); int (*limits_get)(const struct scmi_protocol_handle *ph, u32 domain, From c3af26f536161a34b26cabc0228a8d8ab50bf4f3 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 25 Aug 2023 13:26:22 +0200 Subject: [PATCH 0879/1397] firmware: arm_scmi: Extend perf protocol ops to get information of a domain [ Upstream commit 3d99ed60721bf2e108c8fc660775766057689a92 ] Similar to other protocol ops, it's useful for an scmi module driver to get some generic information of a performance domain. Therefore, let's add a new callback to provide this information. The information is currently limited to the name of the performance domain and whether the set-level operation is supported, although this can easily be extended if we find the need for it. Signed-off-by: Ulf Hansson Link: https://lore.kernel.org/r/20230825112633.236607-3-ulf.hansson@linaro.org Signed-off-by: Sudeep Holla Stable-dep-of: 8e3c98d9187e ("firmware: arm_scmi: Fix frequency truncation by promoting multiplier type") Signed-off-by: Sasha Levin --- drivers/firmware/arm_scmi/perf.c | 47 ++++++++++++++++++++------------ include/linux/scmi_protocol.h | 8 ++++++ 2 files changed, 38 insertions(+), 17 deletions(-) diff --git a/drivers/firmware/arm_scmi/perf.c b/drivers/firmware/arm_scmi/perf.c index da901c65388a..6677caefb36b 100644 --- a/drivers/firmware/arm_scmi/perf.c +++ b/drivers/firmware/arm_scmi/perf.c @@ -145,7 +145,6 @@ struct scmi_msg_resp_perf_describe_levels_v4 { struct perf_dom_info { u32 id; bool set_limits; - bool set_perf; bool perf_limit_notify; bool perf_level_notify; bool perf_fastchannels; @@ -154,7 +153,7 @@ struct perf_dom_info { u32 sustained_freq_khz; u32 sustained_perf_level; u32 mult_factor; - char name[SCMI_MAX_STR_SIZE]; + struct scmi_perf_domain_info info; struct scmi_opp opp[MAX_OPPS]; struct scmi_fc_info *fc_info; struct xarray opps_by_idx; @@ -257,7 +256,7 @@ scmi_perf_domain_attributes_get(const struct scmi_protocol_handle *ph, flags = le32_to_cpu(attr->flags); dom_info->set_limits = SUPPORTS_SET_LIMITS(flags); - dom_info->set_perf = SUPPORTS_SET_PERF_LVL(flags); + dom_info->info.set_perf = SUPPORTS_SET_PERF_LVL(flags); dom_info->perf_limit_notify = SUPPORTS_PERF_LIMIT_NOTIFY(flags); dom_info->perf_level_notify = SUPPORTS_PERF_LEVEL_NOTIFY(flags); dom_info->perf_fastchannels = SUPPORTS_PERF_FASTCHANNELS(flags); @@ -276,7 +275,8 @@ scmi_perf_domain_attributes_get(const struct scmi_protocol_handle *ph, dom_info->mult_factor = (dom_info->sustained_freq_khz * 1000) / dom_info->sustained_perf_level; - strscpy(dom_info->name, attr->name, SCMI_SHORT_NAME_MAX_SIZE); + strscpy(dom_info->info.name, attr->name, + SCMI_SHORT_NAME_MAX_SIZE); } ph->xops->xfer_put(ph, t); @@ -288,7 +288,7 @@ scmi_perf_domain_attributes_get(const struct scmi_protocol_handle *ph, if (!ret && PROTOCOL_REV_MAJOR(version) >= 0x3 && SUPPORTS_EXTENDED_NAMES(flags)) ph->hops->extended_name_get(ph, PERF_DOMAIN_NAME_GET, - dom_info->id, dom_info->name, + dom_info->id, dom_info->info.name, SCMI_MAX_STR_SIZE); if (dom_info->level_indexing_mode) { @@ -430,6 +430,29 @@ static int scmi_perf_num_domains_get(const struct scmi_protocol_handle *ph) return pi->num_domains; } +static inline struct perf_dom_info * +scmi_perf_domain_lookup(const struct scmi_protocol_handle *ph, u32 domain) +{ + struct scmi_perf_info *pi = ph->get_priv(ph); + + if (domain >= pi->num_domains) + return ERR_PTR(-EINVAL); + + return pi->dom_info + domain; +} + +static const struct scmi_perf_domain_info * +scmi_perf_info_get(const struct scmi_protocol_handle *ph, u32 domain) +{ + struct perf_dom_info *dom; + + dom = scmi_perf_domain_lookup(ph, domain); + if (IS_ERR(dom)) + return ERR_PTR(-EINVAL); + + return &dom->info; +} + static int scmi_perf_msg_limits_set(const struct scmi_protocol_handle *ph, u32 domain, u32 max_perf, u32 min_perf) { @@ -453,17 +476,6 @@ static int scmi_perf_msg_limits_set(const struct scmi_protocol_handle *ph, return ret; } -static inline struct perf_dom_info * -scmi_perf_domain_lookup(const struct scmi_protocol_handle *ph, u32 domain) -{ - struct scmi_perf_info *pi = ph->get_priv(ph); - - if (domain >= pi->num_domains) - return ERR_PTR(-EINVAL); - - return pi->dom_info + domain; -} - static int __scmi_perf_limits_set(const struct scmi_protocol_handle *ph, struct perf_dom_info *dom, u32 max_perf, u32 min_perf) @@ -819,7 +831,7 @@ static int scmi_dvfs_device_opps_add(const struct scmi_protocol_handle *ph, } dev_dbg(dev, "[%d][%s]:: Registered OPP[%d] %lu\n", - domain, dom->name, idx, freq); + domain, dom->info.name, idx, freq); } return 0; } @@ -956,6 +968,7 @@ scmi_power_scale_get(const struct scmi_protocol_handle *ph) static const struct scmi_perf_proto_ops perf_proto_ops = { .num_domains_get = scmi_perf_num_domains_get, + .info_get = scmi_perf_info_get, .limits_set = scmi_perf_limits_set, .limits_get = scmi_perf_limits_get, .level_set = scmi_perf_level_set, diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 71b39cbbdace..71923ae63b01 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -97,11 +97,17 @@ struct scmi_clk_proto_ops { u32 clk_id); }; +struct scmi_perf_domain_info { + char name[SCMI_MAX_STR_SIZE]; + bool set_perf; +}; + /** * struct scmi_perf_proto_ops - represents the various operations provided * by SCMI Performance Protocol * * @num_domains_get: gets the number of supported performance domains + * @info_get: get the information of a performance domain * @limits_set: sets limits on the performance level of a domain * @limits_get: gets limits on the performance level of a domain * @level_set: sets the performance level of a domain @@ -122,6 +128,8 @@ struct scmi_clk_proto_ops { */ struct scmi_perf_proto_ops { int (*num_domains_get)(const struct scmi_protocol_handle *ph); + const struct scmi_perf_domain_info __must_check *(*info_get) + (const struct scmi_protocol_handle *ph, u32 domain); int (*limits_set)(const struct scmi_protocol_handle *ph, u32 domain, u32 max_perf, u32 min_perf); int (*limits_get)(const struct scmi_protocol_handle *ph, u32 domain, From 1ea9f8abe364e41dc0d942baf01b5020f5b343ae Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 30 Nov 2023 20:43:42 +0000 Subject: [PATCH 0880/1397] firmware: arm_scmi: Fix frequency truncation by promoting multiplier type [ Upstream commit 8e3c98d9187e09274fc000a7d1a77b070a42d259 ] Fix the possible frequency truncation for all values equal to or greater 4GHz on 64bit machines by updating the multiplier 'mult_factor' to 'unsigned long' type. It is also possible that the multiplier itself can be greater than or equal to 2^32. So we need to also fix the equation computing the value of the multiplier. Fixes: a9e3fbfaa0ff ("firmware: arm_scmi: add initial support for performance protocol") Reported-by: Sibi Sankar Closes: https://lore.kernel.org/all/20231129065748.19871-3-quic_sibis@quicinc.com/ Cc: Cristian Marussi Link: https://lore.kernel.org/r/20231130204343.503076-1-sudeep.holla@arm.com Signed-off-by: Sudeep Holla Signed-off-by: Sasha Levin --- drivers/firmware/arm_scmi/perf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/arm_scmi/perf.c b/drivers/firmware/arm_scmi/perf.c index 6677caefb36b..7aa9b4ff48d6 100644 --- a/drivers/firmware/arm_scmi/perf.c +++ b/drivers/firmware/arm_scmi/perf.c @@ -152,7 +152,7 @@ struct perf_dom_info { u32 opp_count; u32 sustained_freq_khz; u32 sustained_perf_level; - u32 mult_factor; + unsigned long mult_factor; struct scmi_perf_domain_info info; struct scmi_opp opp[MAX_OPPS]; struct scmi_fc_info *fc_info; @@ -273,8 +273,8 @@ scmi_perf_domain_attributes_get(const struct scmi_protocol_handle *ph, dom_info->mult_factor = 1000; else dom_info->mult_factor = - (dom_info->sustained_freq_khz * 1000) / - dom_info->sustained_perf_level; + (dom_info->sustained_freq_khz * 1000UL) + / dom_info->sustained_perf_level; strscpy(dom_info->info.name, attr->name, SCMI_SHORT_NAME_MAX_SIZE); } From 4893588806552fa7b1e44f3f1549598d06d5bdd2 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 25 Sep 2023 15:17:13 +0200 Subject: [PATCH 0881/1397] firmware: arm_scmi: Simplify error path in scmi_dvfs_device_opps_add() [ Upstream commit 033ca4de129646e9969a6838b44cca0fac38e219 ] Let's simplify the code in scmi_dvfs_device_opps_add() by using dev_pm_opp_remove_all_dynamic() in the error path. Signed-off-by: Ulf Hansson Link: https://lore.kernel.org/r/20230925131715.138411-8-ulf.hansson@linaro.org Signed-off-by: Sudeep Holla Stable-dep-of: 77f5032e94f2 ("firmware: arm_scmi: Fix possible frequency truncation when using level indexing mode") Signed-off-by: Sasha Levin --- drivers/firmware/arm_scmi/perf.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/firmware/arm_scmi/perf.c b/drivers/firmware/arm_scmi/perf.c index 7aa9b4ff48d6..596316e6e1ba 100644 --- a/drivers/firmware/arm_scmi/perf.c +++ b/drivers/firmware/arm_scmi/perf.c @@ -799,7 +799,6 @@ static int scmi_dvfs_device_opps_add(const struct scmi_protocol_handle *ph, { int idx, ret, domain; unsigned long freq; - struct scmi_opp *opp; struct perf_dom_info *dom; domain = scmi_dev_domain_id(dev); @@ -810,23 +809,16 @@ static int scmi_dvfs_device_opps_add(const struct scmi_protocol_handle *ph, if (IS_ERR(dom)) return PTR_ERR(dom); - for (opp = dom->opp, idx = 0; idx < dom->opp_count; idx++, opp++) { + for (idx = 0; idx < dom->opp_count; idx++) { if (!dom->level_indexing_mode) - freq = opp->perf * dom->mult_factor; + freq = dom->opp[idx].perf * dom->mult_factor; else - freq = opp->indicative_freq * 1000; + freq = dom->opp[idx].indicative_freq * 1000; ret = dev_pm_opp_add(dev, freq, 0); if (ret) { dev_warn(dev, "failed to add opp %luHz\n", freq); - - while (idx-- > 0) { - if (!dom->level_indexing_mode) - freq = (--opp)->perf * dom->mult_factor; - else - freq = (--opp)->indicative_freq * 1000; - dev_pm_opp_remove(dev, freq); - } + dev_pm_opp_remove_all_dynamic(dev); return ret; } From 0ca497a90ef353bf59f9a07108f608bc6be32e47 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 30 Nov 2023 20:43:43 +0000 Subject: [PATCH 0882/1397] firmware: arm_scmi: Fix possible frequency truncation when using level indexing mode [ Upstream commit 77f5032e94f244ba08db51e17ca8f37bd7ff9acb ] The multiplier is already promoted to unsigned long, however the frequency calculations done when using level indexing mode doesn't use the multiplier computed. It instead hardcodes the multiplier value of 1000 at all the usage sites. Clean that up by assigning the multiplier value of 1000 when using the perf level indexing mode and update the frequency calculations to use the multiplier instead. It should fix the possible frequency truncation for all the values greater than or equal to 4GHz on 64-bit machines. Fixes: 31c7c1397a33 ("firmware: arm_scmi: Add v3.2 perf level indexing mode support") Reported-by: Sibi Sankar Closes: https://lore.kernel.org/all/20231129065748.19871-3-quic_sibis@quicinc.com/ Cc: Cristian Marussi Link: https://lore.kernel.org/r/20231130204343.503076-2-sudeep.holla@arm.com Reviewed-by: Cristian Marussi Signed-off-by: Sudeep Holla Signed-off-by: Sasha Levin --- drivers/firmware/arm_scmi/perf.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/firmware/arm_scmi/perf.c b/drivers/firmware/arm_scmi/perf.c index 596316e6e1ba..e887fd169043 100644 --- a/drivers/firmware/arm_scmi/perf.c +++ b/drivers/firmware/arm_scmi/perf.c @@ -268,7 +268,8 @@ scmi_perf_domain_attributes_get(const struct scmi_protocol_handle *ph, dom_info->sustained_perf_level = le32_to_cpu(attr->sustained_perf_level); if (!dom_info->sustained_freq_khz || - !dom_info->sustained_perf_level) + !dom_info->sustained_perf_level || + dom_info->level_indexing_mode) /* CPUFreq converts to kHz, hence default 1000 */ dom_info->mult_factor = 1000; else @@ -813,7 +814,7 @@ static int scmi_dvfs_device_opps_add(const struct scmi_protocol_handle *ph, if (!dom->level_indexing_mode) freq = dom->opp[idx].perf * dom->mult_factor; else - freq = dom->opp[idx].indicative_freq * 1000; + freq = dom->opp[idx].indicative_freq * dom->mult_factor; ret = dev_pm_opp_add(dev, freq, 0); if (ret) { @@ -862,7 +863,8 @@ static int scmi_dvfs_freq_set(const struct scmi_protocol_handle *ph, u32 domain, } else { struct scmi_opp *opp; - opp = LOOKUP_BY_FREQ(dom->opps_by_freq, freq / 1000); + opp = LOOKUP_BY_FREQ(dom->opps_by_freq, + freq / dom->mult_factor); if (!opp) return -EIO; @@ -896,7 +898,7 @@ static int scmi_dvfs_freq_get(const struct scmi_protocol_handle *ph, u32 domain, if (!opp) return -EIO; - *freq = opp->indicative_freq * 1000; + *freq = opp->indicative_freq * dom->mult_factor; } return ret; @@ -919,7 +921,7 @@ static int scmi_dvfs_est_power_get(const struct scmi_protocol_handle *ph, if (!dom->level_indexing_mode) opp_freq = opp->perf * dom->mult_factor; else - opp_freq = opp->indicative_freq * 1000; + opp_freq = opp->indicative_freq * dom->mult_factor; if (opp_freq < *freq) continue; From b8668fe7a5e88e859f7506a1b5ed58e6ce474f9c Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Mon, 4 Dec 2023 15:41:56 +0800 Subject: [PATCH 0883/1397] ASoC: wm_adsp: fix memleak in wm_adsp_buffer_populate [ Upstream commit 29046a78a3c0a1f8fa0427f164caa222f003cf5b ] When wm_adsp_buffer_read() fails, we should free buf->regions. Otherwise, the callers of wm_adsp_buffer_populate() will directly free buf on failure, which makes buf->regions a leaked memory. Fixes: a792af69b08f ("ASoC: wm_adsp: Refactor compress stream initialisation") Signed-off-by: Dinghao Liu Reviewed-by: Richard Fitzgerald Link: https://lore.kernel.org/r/20231204074158.12026-1-dinghao.liu@zju.edu.cn Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/wm_adsp.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index d1b9238d391e..cb654f1b09f1 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -1451,12 +1451,12 @@ static int wm_adsp_buffer_populate(struct wm_adsp_compr_buf *buf) ret = wm_adsp_buffer_read(buf, caps->region_defs[i].base_offset, ®ion->base_addr); if (ret < 0) - return ret; + goto err; ret = wm_adsp_buffer_read(buf, caps->region_defs[i].size_offset, &offset); if (ret < 0) - return ret; + goto err; region->cumulative_size = offset; @@ -1467,6 +1467,10 @@ static int wm_adsp_buffer_populate(struct wm_adsp_compr_buf *buf) } return 0; + +err: + kfree(buf->regions); + return ret; } static void wm_adsp_buffer_clear(struct wm_adsp_compr_buf *buf) From d103c131ef3be6ed6f4d779fc7b1fff7d01368f6 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 29 Nov 2023 14:21:41 -0600 Subject: [PATCH 0884/1397] RDMA/core: Fix umem iterator when PAGE_SIZE is greater then HCA pgsz [ Upstream commit 4fbc3a52cd4d14de3793f4b2c721d7306ea84cf9 ] 64k pages introduce the situation in this diagram when the HCA 4k page size is being used: +-------------------------------------------+ <--- 64k aligned VA | | | HCA 4k page | | | +-------------------------------------------+ | o | | | | o | | | | o | +-------------------------------------------+ | | | HCA 4k page | | | +-------------------------------------------+ <--- Live HCA page |OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO| <--- offset | | <--- VA | MR data | +-------------------------------------------+ | | | HCA 4k page | | | +-------------------------------------------+ | o | | | | o | | | | o | +-------------------------------------------+ | | | HCA 4k page | | | +-------------------------------------------+ The VA addresses are coming from rdma-core in this diagram can be arbitrary, but for 64k pages, the VA may be offset by some number of HCA 4k pages and followed by some number of HCA 4k pages. The current iterator doesn't account for either the preceding 4k pages or the following 4k pages. Fix the issue by extending the ib_block_iter to contain the number of DMA pages like comment [1] says and by using __sg_advance to start the iterator at the first live HCA page. The changes are contained in a parallel set of iterator start and next functions that are umem aware and specific to umem since there is one user of the rdma_for_each_block() without umem. These two fixes prevents the extra pages before and after the user MR data. Fix the preceding pages by using the __sq_advance field to start at the first 4k page containing MR data. Fix the following pages by saving the number of pgsz blocks in the iterator state and downcounting on each next. This fix allows for the elimination of the small page crutch noted in the Fixes. Fixes: 10c75ccb54e4 ("RDMA/umem: Prevent small pages from being returned by ib_umem_find_best_pgsz()") Link: https://lore.kernel.org/r/20231129202143.1434-2-shiraz.saleem@intel.com Signed-off-by: Mike Marciniszyn Signed-off-by: Shiraz Saleem Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/core/umem.c | 6 ------ include/rdma/ib_umem.h | 9 ++++++++- include/rdma/ib_verbs.h | 1 + 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index f9ab671c8eda..07c571c7b699 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -96,12 +96,6 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, return page_size; } - /* rdma_for_each_block() has a bug if the page size is smaller than the - * page size used to build the umem. For now prevent smaller page sizes - * from being returned. - */ - pgsz_bitmap &= GENMASK(BITS_PER_LONG - 1, PAGE_SHIFT); - /* The best result is the smallest page size that results in the minimum * number of required pages. Compute the largest page size that could * work based on VA address bits that don't change. diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 95896472a82b..565a85044541 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -77,6 +77,13 @@ static inline void __rdma_umem_block_iter_start(struct ib_block_iter *biter, { __rdma_block_iter_start(biter, umem->sgt_append.sgt.sgl, umem->sgt_append.sgt.nents, pgsz); + biter->__sg_advance = ib_umem_offset(umem) & ~(pgsz - 1); + biter->__sg_numblocks = ib_umem_num_dma_blocks(umem, pgsz); +} + +static inline bool __rdma_umem_block_iter_next(struct ib_block_iter *biter) +{ + return __rdma_block_iter_next(biter) && biter->__sg_numblocks--; } /** @@ -92,7 +99,7 @@ static inline void __rdma_umem_block_iter_start(struct ib_block_iter *biter, */ #define rdma_umem_for_each_dma_block(umem, biter, pgsz) \ for (__rdma_umem_block_iter_start(biter, umem, pgsz); \ - __rdma_block_iter_next(biter);) + __rdma_umem_block_iter_next(biter);) #ifdef CONFIG_INFINIBAND_USER_MEM diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 533ab92684d8..62f9d126a71a 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2846,6 +2846,7 @@ struct ib_block_iter { /* internal states */ struct scatterlist *__sg; /* sg holding the current aligned block */ dma_addr_t __dma_addr; /* unaligned DMA address of this block */ + size_t __sg_numblocks; /* ib_umem_num_dma_blocks() */ unsigned int __sg_nents; /* number of SG entries */ unsigned int __sg_advance; /* number of bytes to advance in sg in next step */ unsigned int __pg_bit; /* alignment of current block */ From 12a77574f0901d3ee9a20c2ef45d53f18cd7d887 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 29 Nov 2023 14:21:42 -0600 Subject: [PATCH 0885/1397] RDMA/irdma: Ensure iWarp QP queue memory is OS paged aligned [ Upstream commit 0a5ec366de7e94192669ba08de6ed336607fd282 ] The SQ is shared for between kernel and used by storing the kernel page pointer and passing that to a kmap_atomic(). This then requires that the alignment is PAGE_SIZE aligned. Fix by adding an iWarp specific alignment check. Fixes: e965ef0e7b2c ("RDMA/irdma: Split QP handler into irdma_reg_user_mr_type_qp") Link: https://lore.kernel.org/r/20231129202143.1434-3-shiraz.saleem@intel.com Signed-off-by: Mike Marciniszyn Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/irdma/verbs.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 4fef576e9994..c7cb328b90f9 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -2935,6 +2935,11 @@ static int irdma_reg_user_mr_type_qp(struct irdma_mem_reg_req req, int err; u8 lvl; + /* iWarp: Catch page not starting on OS page boundary */ + if (!rdma_protocol_roce(&iwdev->ibdev, 1) && + ib_umem_offset(iwmr->region)) + return -EINVAL; + total = req.sq_pages + req.rq_pages + 1; if (total > iwmr->page_cnt) return -EINVAL; From 926c1c7a8f2ec08a0d100ed48a845a76271192da Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 29 Nov 2023 14:21:43 -0600 Subject: [PATCH 0886/1397] RDMA/irdma: Fix support for 64k pages [ Upstream commit 03769f72d66edab82484449ed594cb6b00ae0223 ] Virtual QP and CQ require a 4K HW page size but the driver passes PAGE_SIZE to ib_umem_find_best_pgsz() instead. Fix this by using the appropriate 4k value in the bitmap passed to ib_umem_find_best_pgsz(). Fixes: 693a5386eff0 ("RDMA/irdma: Split mr alloc and free into new functions") Link: https://lore.kernel.org/r/20231129202143.1434-4-shiraz.saleem@intel.com Signed-off-by: Mike Marciniszyn Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/irdma/verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index c7cb328b90f9..2f1bedd3a520 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -2903,7 +2903,7 @@ static struct irdma_mr *irdma_alloc_iwmr(struct ib_umem *region, iwmr->type = reg_type; pgsz_bitmap = (reg_type == IRDMA_MEMREG_TYPE_MEM) ? - iwdev->rf->sc_dev.hw_attrs.page_size_cap : PAGE_SIZE; + iwdev->rf->sc_dev.hw_attrs.page_size_cap : SZ_4K; iwmr->page_size = ib_umem_find_best_pgsz(region, pgsz_bitmap, virt); if (unlikely(!iwmr->page_size)) { From fc054130cdfbb91ddb0449d5587b2b00d4c90ce6 Mon Sep 17 00:00:00 2001 From: Shifeng Li Date: Thu, 30 Nov 2023 00:14:15 -0800 Subject: [PATCH 0887/1397] RDMA/irdma: Avoid free the non-cqp_request scratch [ Upstream commit e3e82fcb79eeb3f1a88a89f676831773caff514a ] When creating ceq_0 during probing irdma, cqp.sc_cqp will be sent as a cqp_request to cqp->sc_cqp.sq_ring. If the request is pending when removing the irdma driver or unplugging its aux device, cqp.sc_cqp will be dereferenced as wrong struct in irdma_free_pending_cqp_request(). PID: 3669 TASK: ffff88aef892c000 CPU: 28 COMMAND: "kworker/28:0" #0 [fffffe0000549e38] crash_nmi_callback at ffffffff810e3a34 #1 [fffffe0000549e40] nmi_handle at ffffffff810788b2 #2 [fffffe0000549ea0] default_do_nmi at ffffffff8107938f #3 [fffffe0000549eb8] do_nmi at ffffffff81079582 #4 [fffffe0000549ef0] end_repeat_nmi at ffffffff82e016b4 [exception RIP: native_queued_spin_lock_slowpath+1291] RIP: ffffffff8127e72b RSP: ffff88aa841ef778 RFLAGS: 00000046 RAX: 0000000000000000 RBX: ffff88b01f849700 RCX: ffffffff8127e47e RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffffffff83857ec0 RBP: ffff88afe3e4efc8 R8: ffffed15fc7c9dfa R9: ffffed15fc7c9dfa R10: 0000000000000001 R11: ffffed15fc7c9df9 R12: 0000000000740000 R13: ffff88b01f849708 R14: 0000000000000003 R15: ffffed1603f092e1 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0000 -- -- #5 [ffff88aa841ef778] native_queued_spin_lock_slowpath at ffffffff8127e72b #6 [ffff88aa841ef7b0] _raw_spin_lock_irqsave at ffffffff82c22aa4 #7 [ffff88aa841ef7c8] __wake_up_common_lock at ffffffff81257363 #8 [ffff88aa841ef888] irdma_free_pending_cqp_request at ffffffffa0ba12cc [irdma] #9 [ffff88aa841ef958] irdma_cleanup_pending_cqp_op at ffffffffa0ba1469 [irdma] #10 [ffff88aa841ef9c0] irdma_ctrl_deinit_hw at ffffffffa0b2989f [irdma] #11 [ffff88aa841efa28] irdma_remove at ffffffffa0b252df [irdma] #12 [ffff88aa841efae8] auxiliary_bus_remove at ffffffff8219afdb #13 [ffff88aa841efb00] device_release_driver_internal at ffffffff821882e6 #14 [ffff88aa841efb38] bus_remove_device at ffffffff82184278 #15 [ffff88aa841efb88] device_del at ffffffff82179d23 #16 [ffff88aa841efc48] ice_unplug_aux_dev at ffffffffa0eb1c14 [ice] #17 [ffff88aa841efc68] ice_service_task at ffffffffa0d88201 [ice] #18 [ffff88aa841efde8] process_one_work at ffffffff811c589a #19 [ffff88aa841efe60] worker_thread at ffffffff811c71ff #20 [ffff88aa841eff10] kthread at ffffffff811d87a0 #21 [ffff88aa841eff50] ret_from_fork at ffffffff82e0022f Fixes: 44d9e52977a1 ("RDMA/irdma: Implement device initialization definitions") Link: https://lore.kernel.org/r/20231130081415.891006-1-lishifeng@sangfor.com.cn Suggested-by: "Ismail, Mustafa" Signed-off-by: Shifeng Li Reviewed-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/irdma/hw.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index db3829ff922b..564c9188e1f8 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -1184,7 +1184,6 @@ static int irdma_create_ceq(struct irdma_pci_f *rf, struct irdma_ceq *iwceq, int status; struct irdma_ceq_init_info info = {}; struct irdma_sc_dev *dev = &rf->sc_dev; - u64 scratch; u32 ceq_size; info.ceq_id = ceq_id; @@ -1205,14 +1204,13 @@ static int irdma_create_ceq(struct irdma_pci_f *rf, struct irdma_ceq *iwceq, iwceq->sc_ceq.ceq_id = ceq_id; info.dev = dev; info.vsi = vsi; - scratch = (uintptr_t)&rf->cqp.sc_cqp; status = irdma_sc_ceq_init(&iwceq->sc_ceq, &info); if (!status) { if (dev->ceq_valid) status = irdma_cqp_ceq_cmd(&rf->sc_dev, &iwceq->sc_ceq, IRDMA_OP_CEQ_CREATE); else - status = irdma_sc_cceq_create(&iwceq->sc_ceq, scratch); + status = irdma_sc_cceq_create(&iwceq->sc_ceq, 0); } if (status) { From 584bdbca913ed4e6b580f5cf7527bf5b9a731746 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 4 Dec 2023 08:27:36 +0100 Subject: [PATCH 0888/1397] drm/bridge: tc358768: select CONFIG_VIDEOMODE_HELPERS [ Upstream commit 26513300978f7285c3e776c144f27ef71be61f57 ] A dependency on this feature was recently introduced: x86_64-linux-ld: vmlinux.o: in function `tc358768_bridge_pre_enable': tc358768.c:(.text+0xbe3dae): undefined reference to `drm_display_mode_to_videomode' Make sure this is always enabled. Fixes: e5fb21678136 ("drm/bridge: tc358768: Use struct videomode") Signed-off-by: Arnd Bergmann Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20231204072814.968816-1-arnd@kernel.org Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20231204072814.968816-1-arnd@kernel.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig index ba82a1142adf..3e6a4e2044c0 100644 --- a/drivers/gpu/drm/bridge/Kconfig +++ b/drivers/gpu/drm/bridge/Kconfig @@ -313,6 +313,7 @@ config DRM_TOSHIBA_TC358768 select REGMAP_I2C select DRM_PANEL select DRM_MIPI_DSI + select VIDEOMODE_HELPERS help Toshiba TC358768AXBG/TC358778XBG DSI bridge chip driver. From c6a952d44a4148149ddbde169945876c25fac585 Mon Sep 17 00:00:00 2001 From: Nathan Rossi Date: Mon, 6 Nov 2023 02:14:36 +0000 Subject: [PATCH 0889/1397] arm64: dts: imx8mp: imx8mq: Add parkmode-disable-ss-quirk on DWC3 [ Upstream commit 209043cf092d7b0d4739921b3f11d6d0b451eabf ] The i.MX8MP and i.MX8MQ devices both use the same DWC3 controller and are both affected by a known issue with the controller due to specific behaviour when park mode is enabled in SuperSpeed host mode operation. Under heavy USB traffic from multiple endpoints the controller will sometimes incorrectly process transactions such that some transactions are lost, or the controller may hang when processing transactions. When the controller hangs it does not recover. This issue is documented partially within the linux-imx vendor kernel which references a Synopsys STAR number 9001415732 in commits [1] and additional details in [2]. Those commits provide some additional controller internal implementation specifics around the incorrect behaviour of the SuperSpeed host controller operation when park mode is enabled. The summary of this issue is that the host controller can incorrectly enter/exit park mode such that part of the controller is in a state which behaves as if in park mode even though it is not. In this state the controller incorrectly calculates the number of TRBs available which results in incorrect access of the internal caches causing the overwrite of pending requests in the cache which should have been processed but are ignored. This can cause the controller to drop the requests or hang waiting for the pending state of the dropped requests. The workaround for this issue is to disable park mode for SuperSpeed operation of the controller through the GUCTL1[17] bit. This is already available as a quirk for the DWC3 controller and can be enabled via the 'snps,parkmode-disable-ss-quirk' device tree property. It is possible to replicate this failure on an i.MX8MP EVK with a USB Hub connecting 4 SuperSpeed USB flash drives. Performing continuous small read operations (dd if=/dev/sd... of=/dev/null bs=16) on the block devices will result in device errors initially and will eventually result in the controller hanging. [13240.896936] xhci-hcd xhci-hcd.0.auto: WARN Event TRB for slot 4 ep 2 with no TDs queued? [13240.990708] usb 2-1.3: reset SuperSpeed USB device number 5 using xhci-hcd [13241.015582] sd 2:0:0:0: [sdc] tag#0 UNKNOWN(0x2003) Result: hostbyte=0x07 driverbyte=DRIVER_OK cmd_age=0s [13241.025198] sd 2:0:0:0: [sdc] tag#0 CDB: opcode=0x28 28 00 00 00 03 e0 00 01 00 00 [13241.032949] I/O error, dev sdc, sector 992 op 0x0:(READ) flags 0x80700 phys_seg 25 prio class 2 [13272.150710] usb 2-1.2: reset SuperSpeed USB device number 4 using xhci-hcd [13272.175469] sd 1:0:0:0: [sdb] tag#0 UNKNOWN(0x2003) Result: hostbyte=0x03 driverbyte=DRIVER_OK cmd_age=31s [13272.185365] sd 1:0:0:0: [sdb] tag#0 CDB: opcode=0x28 28 00 00 00 03 e0 00 01 00 00 [13272.193385] I/O error, dev sdb, sector 992 op 0x0:(READ) flags 0x80700 phys_seg 18 prio class 2 [13434.846556] xhci-hcd xhci-hcd.0.auto: xHCI host not responding to stop endpoint command [13434.854592] xhci-hcd xhci-hcd.0.auto: xHCI host controller not responding, assume dead [13434.862553] xhci-hcd xhci-hcd.0.auto: HC died; cleaning up [1] https://github.com/nxp-imx/linux-imx/commit/97a5349d936b08cf301730b59e4e8855283f815c [2] https://github.com/nxp-imx/linux-imx/commit/b4b5cbc5a12d7c3b920d1d7cba0ada3379e4e42b Fixes: fb8587a2c165 ("arm64: dtsi: imx8mp: add usb nodes") Fixes: ad37549cb5dc ("arm64: dts: imx8mq: add USB nodes") Signed-off-by: Nathan Rossi Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/freescale/imx8mp.dtsi | 2 ++ arch/arm64/boot/dts/freescale/imx8mq.dtsi | 2 ++ 2 files changed, 4 insertions(+) diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi index 83d907294fbc..4b50920ac204 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi @@ -2030,6 +2030,7 @@ phys = <&usb3_phy0>, <&usb3_phy0>; phy-names = "usb2-phy", "usb3-phy"; snps,gfladj-refclk-lpm-sel-quirk; + snps,parkmode-disable-ss-quirk; }; }; @@ -2072,6 +2073,7 @@ phys = <&usb3_phy1>, <&usb3_phy1>; phy-names = "usb2-phy", "usb3-phy"; snps,gfladj-refclk-lpm-sel-quirk; + snps,parkmode-disable-ss-quirk; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mq.dtsi b/arch/arm64/boot/dts/freescale/imx8mq.dtsi index 35f07dfb4ca8..052ba9baa400 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mq.dtsi @@ -1649,6 +1649,7 @@ phys = <&usb3_phy0>, <&usb3_phy0>; phy-names = "usb2-phy", "usb3-phy"; power-domains = <&pgc_otg1>; + snps,parkmode-disable-ss-quirk; status = "disabled"; }; @@ -1680,6 +1681,7 @@ phys = <&usb3_phy1>, <&usb3_phy1>; phy-names = "usb2-phy", "usb3-phy"; power-domains = <&pgc_otg2>; + snps,parkmode-disable-ss-quirk; status = "disabled"; }; From fe145559a91f731c7039158e37eb38b1773ecd41 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Sun, 5 Nov 2023 10:32:19 -0300 Subject: [PATCH 0890/1397] ARM: dts: imx6ul-pico: Describe the Ethernet PHY clock [ Upstream commit d951f8f5f23a9417b7952f22b33784c73caa1ebb ] Since commit c7e73b5051d6 ("ARM: imx: mach-imx6ul: remove 14x14 EVK specific PHY fixup")thet Ethernet PHY is no longer configured via code in board file. This caused Ethernet to stop working. Fix this problem by describing the clocks and clock-names to the Ethernet PHY node so that the KSZ8081 chip can be clocked correctly. Fixes: c7e73b5051d6 ("ARM: imx: mach-imx6ul: remove 14x14 EVK specific PHY fixup") Signed-off-by: Fabio Estevam Reviewed-by: Andrew Lunn Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/nxp/imx/imx6ul-pico.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/nxp/imx/imx6ul-pico.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ul-pico.dtsi index 4ffe99ed55ca..07dcecbe485d 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6ul-pico.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6ul-pico.dtsi @@ -121,6 +121,8 @@ max-speed = <100>; interrupt-parent = <&gpio5>; interrupts = <6 IRQ_TYPE_LEVEL_LOW>; + clocks = <&clks IMX6UL_CLK_ENET_REF>; + clock-names = "rmii-ref"; }; }; }; From b2173a8b64507f5be03c195614a491778e6963cc Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 5 Dec 2023 15:37:17 +0300 Subject: [PATCH 0891/1397] io_uring/kbuf: Fix an NULL vs IS_ERR() bug in io_alloc_pbuf_ring() [ Upstream commit e53f7b54b1fdecae897f25002ff0cff04faab228 ] The io_mem_alloc() function returns error pointers, not NULL. Update the check accordingly. Fixes: b10b73c102a2 ("io_uring/kbuf: recycle freed mapped buffer ring entries") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/5ed268d3-a997-4f64-bd71-47faa92101ab@moroto.mountain Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- io_uring/kbuf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 012f62203604..12eec4778c5b 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -623,8 +623,8 @@ static int io_alloc_pbuf_ring(struct io_ring_ctx *ctx, ibf = io_lookup_buf_free_entry(ctx, ring_size); if (!ibf) { ptr = io_mem_alloc(ring_size); - if (!ptr) - return -ENOMEM; + if (IS_ERR(ptr)) + return PTR_ERR(ptr); /* Allocate and store deferred free entry */ ibf = kmalloc(sizeof(*ibf), GFP_KERNEL_ACCOUNT); From 7e6621b99d20f26f7355a7f266bebd6f5ce88693 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 5 Dec 2023 07:02:13 -0700 Subject: [PATCH 0892/1397] io_uring/kbuf: check for buffer list readiness after NULL check [ Upstream commit 9865346b7e8374b57f1c3ccacdc77846c6352ff4 ] Move the buffer list 'is_ready' check below the validity check for the buffer list for a given group. Fixes: 5cf4f52e6d8a ("io_uring: free io_buffer_list entries via RCU") Reported-by: Dan Carpenter Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- io_uring/kbuf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 12eec4778c5b..e8516f3bbbaa 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -743,6 +743,8 @@ void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid) bl = __io_buffer_get_list(ctx, smp_load_acquire(&ctx->io_bl), bgid); + if (!bl || !bl->is_mmap) + return NULL; /* * Ensure the list is fully setup. Only strictly needed for RCU lookup * via mmap, and in that case only for the array indexed groups. For @@ -750,8 +752,6 @@ void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid) */ if (!smp_load_acquire(&bl->is_ready)) return NULL; - if (!bl || !bl->is_mmap) - return NULL; return bl->buf_ring; } From 6eec904d95a5ae18839a6102e45b0fbfa1b6d4d4 Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Tue, 5 Dec 2023 17:17:35 +0100 Subject: [PATCH 0893/1397] tracing: Fix a warning when allocating buffered events fails [ Upstream commit 34209fe83ef8404353f91ab4ea4035dbc9922d04 ] Function trace_buffered_event_disable() produces an unexpected warning when the previous call to trace_buffered_event_enable() fails to allocate pages for buffered events. The situation can occur as follows: * The counter trace_buffered_event_ref is at 0. * The soft mode gets enabled for some event and trace_buffered_event_enable() is called. The function increments trace_buffered_event_ref to 1 and starts allocating event pages. * The allocation fails for some page and trace_buffered_event_disable() is called for cleanup. * Function trace_buffered_event_disable() decrements trace_buffered_event_ref back to 0, recognizes that it was the last use of buffered events and frees all allocated pages. * The control goes back to trace_buffered_event_enable() which returns. The caller of trace_buffered_event_enable() has no information that the function actually failed. * Some time later, the soft mode is disabled for the same event. Function trace_buffered_event_disable() is called. It warns on "WARN_ON_ONCE(!trace_buffered_event_ref)" and returns. Buffered events are just an optimization and can handle failures. Make trace_buffered_event_enable() exit on the first failure and left any cleanup later to when trace_buffered_event_disable() is called. Link: https://lore.kernel.org/all/20231127151248.7232-2-petr.pavlu@suse.com/ Link: https://lkml.kernel.org/r/20231205161736.19663-3-petr.pavlu@suse.com Fixes: 0fc1b09ff1ff ("tracing: Use temp buffer when filtering events") Signed-off-by: Petr Pavlu Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin --- kernel/trace/trace.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a40d6baf101f..689d063dd206 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2769,8 +2769,11 @@ void trace_buffered_event_enable(void) for_each_tracing_cpu(cpu) { page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL | __GFP_NORETRY, 0); - if (!page) - goto failed; + /* This is just an optimization and can handle failures */ + if (!page) { + pr_err("Failed to allocate event buffer\n"); + break; + } event = page_address(page); memset(event, 0, sizeof(*event)); @@ -2784,10 +2787,6 @@ void trace_buffered_event_enable(void) WARN_ON_ONCE(1); preempt_enable(); } - - return; - failed: - trace_buffered_event_disable(); } static void enable_trace_buffered_event(void *data) From ddc5ad21da0a8ae953c5ea3b81a4d95365f6a2ac Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Thu, 23 Nov 2023 16:19:41 +0800 Subject: [PATCH 0894/1397] scsi: be2iscsi: Fix a memleak in beiscsi_init_wrb_handle() [ Upstream commit 235f2b548d7f4ac5931d834f05d3f7f5166a2e72 ] When an error occurs in the for loop of beiscsi_init_wrb_handle(), we should free phwi_ctxt->be_wrbq before returning an error code to prevent potential memleak. Fixes: a7909b396ba7 ("[SCSI] be2iscsi: Fix dynamic CID allocation Mechanism in driver") Signed-off-by: Dinghao Liu Link: https://lore.kernel.org/r/20231123081941.24854-1-dinghao.liu@zju.edu.cn Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/be2iscsi/be_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c index e48f14ad6dfd..06acb5ff609e 100644 --- a/drivers/scsi/be2iscsi/be_main.c +++ b/drivers/scsi/be2iscsi/be_main.c @@ -2710,6 +2710,7 @@ static int beiscsi_init_wrb_handle(struct beiscsi_hba *phba) kfree(pwrb_context->pwrb_handle_base); kfree(pwrb_context->pwrb_handle_basestd); } + kfree(phwi_ctxt->be_wrbq); return -ENOMEM; } From d338395bc5afcce9160cf7795c8a6d995fae72aa Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 8 Sep 2023 13:47:35 -0300 Subject: [PATCH 0895/1397] arm64: dts: imx8-ss-lsio: Add PWM interrupts [ Upstream commit 6c32f75d67a8c1ea94295234db7c11a29c189e6f ] The PWM interrupt is mandatory per imx-pwm.yaml. Add them. This also fixes the followig schema warning: imx8qm-apalis-v1.1-ixora-v1.2.dtb: pwm@5d000000: 'oneOf' conditional failed, one must be fixed: 'interrupts' is a required property 'interrupts-extended' is a required property from schema $id: http://devicetree.org/schemas/pwm/imx-pwm.yaml# Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo Stable-dep-of: d863a2f4f475 ("arm64: dts: freescale: imx8-ss-lsio: Fix #pwm-cells") Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi index ea8c93757521..7b8bbf5e6a86 100644 --- a/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi @@ -37,6 +37,7 @@ lsio_subsys: bus@5d000000 { assigned-clocks = <&clk IMX_SC_R_PWM_0 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; #pwm-cells = <2>; + interrupts = ; status = "disabled"; }; @@ -49,6 +50,7 @@ lsio_subsys: bus@5d000000 { assigned-clocks = <&clk IMX_SC_R_PWM_1 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; #pwm-cells = <2>; + interrupts = ; status = "disabled"; }; @@ -61,6 +63,7 @@ lsio_subsys: bus@5d000000 { assigned-clocks = <&clk IMX_SC_R_PWM_2 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; #pwm-cells = <2>; + interrupts = ; status = "disabled"; }; @@ -73,6 +76,7 @@ lsio_subsys: bus@5d000000 { assigned-clocks = <&clk IMX_SC_R_PWM_3 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; #pwm-cells = <2>; + interrupts = ; status = "disabled"; }; From 8b2c35e5e678a005f942c77cece9a5168662616d Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Mon, 6 Nov 2023 16:13:25 +0100 Subject: [PATCH 0896/1397] arm64: dts: freescale: imx8-ss-lsio: Fix #pwm-cells MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d863a2f4f47560d71447650822857fc3d2aea715 ] i.MX8QM/QXP supports inverted PWM output, thus #pwm-cells needs to be set to 3. Fixes: 23fa99b205ea ("arm64: dts: freescale: imx8-ss-lsio: add support for lsio_pwm0-3") Signed-off-by: Alexander Stein Reviewed-by: Uwe Kleine-König Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi index 7b8bbf5e6a86..133f2b1ce1d2 100644 --- a/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi @@ -36,7 +36,7 @@ lsio_subsys: bus@5d000000 { <&pwm0_lpcg 1>; assigned-clocks = <&clk IMX_SC_R_PWM_0 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; - #pwm-cells = <2>; + #pwm-cells = <3>; interrupts = ; status = "disabled"; }; @@ -49,7 +49,7 @@ lsio_subsys: bus@5d000000 { <&pwm1_lpcg 1>; assigned-clocks = <&clk IMX_SC_R_PWM_1 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; - #pwm-cells = <2>; + #pwm-cells = <3>; interrupts = ; status = "disabled"; }; @@ -62,7 +62,7 @@ lsio_subsys: bus@5d000000 { <&pwm2_lpcg 1>; assigned-clocks = <&clk IMX_SC_R_PWM_2 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; - #pwm-cells = <2>; + #pwm-cells = <3>; interrupts = ; status = "disabled"; }; @@ -75,7 +75,7 @@ lsio_subsys: bus@5d000000 { <&pwm3_lpcg 1>; assigned-clocks = <&clk IMX_SC_R_PWM_3 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; - #pwm-cells = <2>; + #pwm-cells = <3>; interrupts = ; status = "disabled"; }; From 3836f1f246e276d8ba296b28ec92c5764b2f3776 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Mon, 13 Nov 2023 18:02:29 +0800 Subject: [PATCH 0897/1397] arm64: dts: imx93: correct mediamix power [ Upstream commit d4cb68a5d3a1ed30ecaf1591eb901523faa13496 ] "nic_media" clock should be enabled when power on/off mediamix, otherwise power on/off will fail. Because "media_axi_root" clock is the parent of "nic_media" clock, so replace "media_axi_clock" clock with "nic_media" clock in mediamix node. Link: https://github.com/nxp-imx/linux-imx/commit/ce18e6d0071ae9df5486af8613708ebe920484be Fixes: f2d03ba997cb ("arm64: dts: imx93: reorder device nodes") Fixes: e85d3458a804 ("arm64: dts: imx93: add src node") Reviewed-by: Jacky Bai Signed-off-by: Peng Fan Signed-off-by: Liu Ying Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/freescale/imx93.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx93.dtsi b/arch/arm64/boot/dts/freescale/imx93.dtsi index dcf6e4846ac9..943b7e665563 100644 --- a/arch/arm64/boot/dts/freescale/imx93.dtsi +++ b/arch/arm64/boot/dts/freescale/imx93.dtsi @@ -373,7 +373,7 @@ compatible = "fsl,imx93-src-slice"; reg = <0x44462400 0x400>, <0x44465800 0x400>; #power-domain-cells = <0>; - clocks = <&clk IMX93_CLK_MEDIA_AXI>, + clocks = <&clk IMX93_CLK_NIC_MEDIA_GATE>, <&clk IMX93_CLK_MEDIA_APB>; }; }; From 12deaef0c8670064022819ef346ad05a692f5c69 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Wed, 22 Nov 2023 14:46:36 +0800 Subject: [PATCH 0898/1397] ARM: imx: Check return value of devm_kasprintf in imx_mmdc_perf_init [ Upstream commit 1c2b1049af3f86545fcc5fae0fc725fb64b3a09e ] devm_kasprintf() returns a pointer to dynamically allocated memory which can be NULL upon failure. Ensure the allocation was successful by checking the pointer validity. Release the id allocated in 'mmdc_pmu_init' when 'devm_kasprintf' return NULL Suggested-by: Ahmad Fatoum Fixes: e76bdfd7403a ("ARM: imx: Added perf functionality to mmdc driver") Signed-off-by: Kunwu Chan Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/mach-imx/mmdc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-imx/mmdc.c b/arch/arm/mach-imx/mmdc.c index 2157493b78a9..df69af932375 100644 --- a/arch/arm/mach-imx/mmdc.c +++ b/arch/arm/mach-imx/mmdc.c @@ -501,6 +501,10 @@ static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_b name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "mmdc%d", ret); + if (!name) { + ret = -ENOMEM; + goto pmu_release_id; + } pmu_mmdc->mmdc_ipg_clk = mmdc_ipg_clk; pmu_mmdc->devtype_data = (struct fsl_mmdc_devtype_data *)of_id->data; @@ -523,9 +527,10 @@ static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_b pmu_register_err: pr_warn("MMDC Perf PMU failed (%d), disabled\n", ret); - ida_simple_remove(&mmdc_ida, pmu_mmdc->id); cpuhp_state_remove_instance_nocalls(cpuhp_mmdc_state, &pmu_mmdc->node); hrtimer_cancel(&pmu_mmdc->hrtimer); +pmu_release_id: + ida_simple_remove(&mmdc_ida, pmu_mmdc->id); pmu_free: kfree(pmu_mmdc); return ret; From ee007123af6d5a532358dd1bc16ce60946f357d2 Mon Sep 17 00:00:00 2001 From: Stefan Eichenberger Date: Thu, 23 Nov 2023 11:48:12 +0100 Subject: [PATCH 0899/1397] arm64: dts: imx8-apalis: set wifi regulator to always-on [ Upstream commit 04179605ab604dba32571a05cd06423afc9eca19 ] Make sure that the wifi regulator is always on. The wifi driver itself puts the wifi module into suspend mode. If we cut the power the driver will crash when resuming from suspend. Signed-off-by: Stefan Eichenberger Signed-off-by: Francesco Dolcini Fixes: ad0de4ceb706 ("arm64: dts: freescale: add initial apalis imx8 aka quadmax module support") Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi b/arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi index 9b1b522517f8..0878a15acc1b 100644 --- a/arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi @@ -82,12 +82,9 @@ pinctrl-0 = <&pinctrl_wifi_pdn>; gpio = <&lsio_gpio1 28 GPIO_ACTIVE_HIGH>; enable-active-high; + regulator-always-on; regulator-name = "wifi_pwrdn_fake_regulator"; regulator-settling-time-us = <100>; - - regulator-state-mem { - regulator-off-in-suspend; - }; }; reg_pcie_switch: regulator-pcie-switch { From 5af668cd9ee5d6b5ce396257a4df48c598646cc8 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Mon, 27 Nov 2023 17:05:01 +0100 Subject: [PATCH 0900/1397] ARM: dts: imx7: Declare timers compatible with fsl,imx6dl-gpt [ Upstream commit 397caf68e2d36532054cb14ae8995537f27f8b61 ] The timer nodes declare compatibility with "fsl,imx6sx-gpt", which itself is compatible with "fsl,imx6dl-gpt". Switch the fallback compatible from "fsl,imx6sx-gpt" to "fsl,imx6dl-gpt". Fixes: 949673450291 ("ARM: dts: add imx7d soc dtsi file") Signed-off-by: Philipp Zabel Signed-off-by: Roland Hieber Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/nxp/imx/imx7s.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/nxp/imx/imx7s.dtsi b/arch/arm/boot/dts/nxp/imx/imx7s.dtsi index e152d08f27d4..bc79163c49b5 100644 --- a/arch/arm/boot/dts/nxp/imx/imx7s.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx7s.dtsi @@ -454,7 +454,7 @@ }; gpt1: timer@302d0000 { - compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; + compatible = "fsl,imx7d-gpt", "fsl,imx6dl-gpt"; reg = <0x302d0000 0x10000>; interrupts = ; clocks = <&clks IMX7D_GPT1_ROOT_CLK>, @@ -463,7 +463,7 @@ }; gpt2: timer@302e0000 { - compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; + compatible = "fsl,imx7d-gpt", "fsl,imx6dl-gpt"; reg = <0x302e0000 0x10000>; interrupts = ; clocks = <&clks IMX7D_GPT2_ROOT_CLK>, @@ -473,7 +473,7 @@ }; gpt3: timer@302f0000 { - compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; + compatible = "fsl,imx7d-gpt", "fsl,imx6dl-gpt"; reg = <0x302f0000 0x10000>; interrupts = ; clocks = <&clks IMX7D_GPT3_ROOT_CLK>, @@ -483,7 +483,7 @@ }; gpt4: timer@30300000 { - compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; + compatible = "fsl,imx7d-gpt", "fsl,imx6dl-gpt"; reg = <0x30300000 0x10000>; interrupts = ; clocks = <&clks IMX7D_GPT4_ROOT_CLK>, From 15eb6859de685ce0235db22f061d23bd8e71dc3b Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 5 Dec 2023 10:27:35 -0300 Subject: [PATCH 0901/1397] ARM: dts: imx28-xea: Pass the 'model' property [ Upstream commit 63ef8fc9bcee6b73ca445a19a7ac6bd544723c9f ] Per root-node.yaml, 'model' is a required property. Pass it to fix the following dt-schema warning: imx28-xea.dtb: /: 'model' is a required property from schema $id: http://devicetree.org/schemas/root-node.yaml# Signed-off-by: Fabio Estevam Fixes: 445ae16ac1c5 ("ARM: dts: imx28: Add DTS description of imx28 based XEA board") Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/nxp/mxs/imx28-xea.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/nxp/mxs/imx28-xea.dts b/arch/arm/boot/dts/nxp/mxs/imx28-xea.dts index a400c108f66a..6c5e6856648a 100644 --- a/arch/arm/boot/dts/nxp/mxs/imx28-xea.dts +++ b/arch/arm/boot/dts/nxp/mxs/imx28-xea.dts @@ -8,6 +8,7 @@ #include "imx28-lwe.dtsi" / { + model = "Liebherr XEA board"; compatible = "lwn,imx28-xea", "fsl,imx28"; }; From 92f095553ae58733962aa5ef5ba9882bda105b06 Mon Sep 17 00:00:00 2001 From: Sam Edwards Date: Tue, 5 Dec 2023 12:29:00 -0800 Subject: [PATCH 0902/1397] arm64: dts: rockchip: Fix eMMC Data Strobe PD on rk3588 [ Upstream commit 37f3d6108730713c411827ab4af764909f4dfc78 ] JEDEC standard JESD84-B51 defines the eMMC Data Strobe line, which is currently used only in HS400 mode, as a device->host clock signal that "is used only in read operation. The Data Strobe is always High-Z (not driven by the device and pulled down by RDS) or Driven Low in write operation, except during CRC status response." RDS is a pull-down resistor specified in the 10K-100K ohm range. Thus per the standard, the Data Strobe is always pulled to ground (by the eMMC and/or RDS) during write operations. Evidently, the eMMC host controller in the RK3588 considers an active voltage on the eMMC-DS line during a write to be an error. The default (i.e. hardware reset, and Rockchip BSP) behavior for the RK3588 is to activate the eMMC-DS pin's builtin pull-down. As a result, many RK3588 board designers do not bother adding a dedicated RDS resistor, instead relying on the RK3588's internal bias. The current devicetree, however, disables this bias (`pcfg_pull_none`), breaking HS400-mode writes for boards without a dedicated RDS, but with an eMMC chip that chooses to High-Z (instead of drive-low) the eMMC-DS line. (The Turing RK1 is one such board.) Fix this by changing the bias in the (common) emmc_data_strobe case to reflect the expected hardware/BSP behavior. This is unlikely to cause regressions elsewhere: the pull-down is only relevant for High-Z eMMCs, and if this is redundant with a (dedicated) RDS resistor, the effective result is only a lower resistance to ground -- where the range of tolerance is quite high. If it does, it's better fixed in the specific devicetrees. Fixes: d85f8a5c798d5 ("arm64: dts: rockchip: Add rk3588 pinctrl data") Signed-off-by: Sam Edwards Link: https://lore.kernel.org/r/20231205202900.4617-2-CFSworks@gmail.com Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3588s-pinctrl.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-pinctrl.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s-pinctrl.dtsi index 48181671eacb..0933652bafc3 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s-pinctrl.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588s-pinctrl.dtsi @@ -369,7 +369,7 @@ emmc_data_strobe: emmc-data-strobe { rockchip,pins = /* emmc_data_strobe */ - <2 RK_PA2 1 &pcfg_pull_none>; + <2 RK_PA2 1 &pcfg_pull_down>; }; }; From b12ccda0d469f70e8dfb9dfd2bfd1f1298910f34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Fri, 3 Nov 2023 10:02:23 +0100 Subject: [PATCH 0903/1397] riscv: fix misaligned access handling of C.SWSP and C.SDSP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 22e0eb04837a63af111fae35a92f7577676b9bc8 ] This is a backport of a fix that was done in OpenSBI: ec0559eb315b ("lib: sbi_misaligned_ldst: Fix handling of C.SWSP and C.SDSP"). Unlike C.LWSP/C.LDSP, these encodings can be used with the zero register, so checking that the rs2 field is non-zero is unnecessary. Additionally, the previous check was incorrect since it was checking the immediate field of the instruction instead of the rs2 field. Fixes: 956d705dd279 ("riscv: Unaligned load/store handling for M_MODE") Signed-off-by: Clément Léger Link: https://lore.kernel.org/r/20231103090223.702340-1-cleger@rivosinc.com Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- arch/riscv/kernel/traps_misaligned.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index 378f5b151443..5348d842c745 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -342,16 +342,14 @@ int handle_misaligned_store(struct pt_regs *regs) } else if ((insn & INSN_MASK_C_SD) == INSN_MATCH_C_SD) { len = 8; val.data_ulong = GET_RS2S(insn, regs); - } else if ((insn & INSN_MASK_C_SDSP) == INSN_MATCH_C_SDSP && - ((insn >> SH_RD) & 0x1f)) { + } else if ((insn & INSN_MASK_C_SDSP) == INSN_MATCH_C_SDSP) { len = 8; val.data_ulong = GET_RS2C(insn, regs); #endif } else if ((insn & INSN_MASK_C_SW) == INSN_MATCH_C_SW) { len = 4; val.data_ulong = GET_RS2S(insn, regs); - } else if ((insn & INSN_MASK_C_SWSP) == INSN_MATCH_C_SWSP && - ((insn >> SH_RD) & 0x1f)) { + } else if ((insn & INSN_MASK_C_SWSP) == INSN_MATCH_C_SWSP) { len = 4; val.data_ulong = GET_RS2C(insn, regs); } else { From 7442310ee4f0ecdc0a4187bd6adf034547f5ac75 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Thu, 30 Nov 2023 21:26:47 +0000 Subject: [PATCH 0904/1397] riscv: errata: andes: Probe for IOCP only once in boot stage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ed5b7cfd7839f9280a63365c1133482b42d0981f ] We need to probe for IOCP only once during boot stage, as we were probing for IOCP for all the stages this caused the below issue during module-init stage, [9.019104] Unable to handle kernel paging request at virtual address ffffffff8100d3a0 [9.027153] Oops [#1] [9.029421] Modules linked in: rcar_canfd renesas_usbhs i2c_riic can_dev spi_rspi i2c_core [9.037686] CPU: 0 PID: 90 Comm: udevd Not tainted 6.7.0-rc1+ #57 [9.043756] Hardware name: Renesas SMARC EVK based on r9a07g043f01 (DT) [9.050339] epc : riscv_noncoherent_supported+0x10/0x3e [9.055558]  ra : andes_errata_patch_func+0x4a/0x52 [9.060418] epc : ffffffff8000d8c2 ra : ffffffff8000d95c sp : ffffffc8003abb00 [9.067607]  gp : ffffffff814e25a0 tp : ffffffd80361e540 t0 : 0000000000000000 [9.074795]  t1 : 000000000900031e t2 : 0000000000000001 s0 : ffffffc8003abb20 [9.081984]  s1 : ffffffff015b57c7 a0 : 0000000000000000 a1 : 0000000000000001 [9.089172]  a2 : 0000000000000000 a3 : 0000000000000000 a4 : ffffffff8100d8be [9.096360]  a5 : 0000000000000001 a6 : 0000000000000001 a7 : 000000000900031e [9.103548]  s2 : ffffffff015b57d7 s3 : 0000000000000001 s4 : 000000000000031e [9.110736]  s5 : 8000000000008a45 s6 : 0000000000000500 s7 : 000000000000003f [9.117924]  s8 : ffffffc8003abd48 s9 : ffffffff015b1140 s10: ffffffff8151a1b0 [9.125113]  s11: ffffffff015b1000 t3 : 0000000000000001 t4 : fefefefefefefeff [9.132301]  t5 : ffffffff015b57c7 t6 : ffffffd8b63a6000 [9.137587] status: 0000000200000120 badaddr: ffffffff8100d3a0 cause: 000000000000000f [9.145468] [] riscv_noncoherent_supported+0x10/0x3e [9.151972] [] _apply_alternatives+0x84/0x86 [9.157784] [] apply_module_alternatives+0x10/0x1a [9.164113] [] module_finalize+0x5e/0x7a [9.169583] [] load_module+0xfd8/0x179c [9.174965] [] init_module_from_file+0x76/0xaa [9.180948] [] __riscv_sys_finit_module+0x176/0x2a8 [9.187365] [] do_trap_ecall_u+0xbe/0x130 [9.192922] [] ret_from_exception+0x0/0x64 [9.198573] Code: 0009 b7e9 6797 014d a783 85a7 c799 4785 0717 0100 (0123) aef7 [9.205994] ---[ end trace 0000000000000000 ]--- This is because we called riscv_noncoherent_supported() for all the stages during IOCP probe. riscv_noncoherent_supported() function sets noncoherent_supported variable to true which has an annotation set to "__ro_after_init" due to which we were seeing the above splat. Fix this by probing for IOCP only once in boot stage by having a boolean variable "done" which will be set to true upon IOCP probe in errata_probe_iocp() and we bail out early if "done" is set to true. While at it make return type of errata_probe_iocp() to void as we were not checking the return value in andes_errata_patch_func(). Fixes: e021ae7f5145 ("riscv: errata: Add Andes alternative ports") Signed-off-by: Lad Prabhakar Reviewed-by: Geert Uytterhoeven Reviewed-by: Yu Chien Peter Lin Link: https://lore.kernel.org/r/20231130212647.108746-1-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- arch/riscv/errata/andes/errata.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/arch/riscv/errata/andes/errata.c b/arch/riscv/errata/andes/errata.c index 197db68cc8da..17a904869724 100644 --- a/arch/riscv/errata/andes/errata.c +++ b/arch/riscv/errata/andes/errata.c @@ -38,29 +38,35 @@ static long ax45mp_iocp_sw_workaround(void) return ret.error ? 0 : ret.value; } -static bool errata_probe_iocp(unsigned int stage, unsigned long arch_id, unsigned long impid) +static void errata_probe_iocp(unsigned int stage, unsigned long arch_id, unsigned long impid) { + static bool done; + if (!IS_ENABLED(CONFIG_ERRATA_ANDES_CMO)) - return false; + return; + + if (done) + return; + + done = true; if (arch_id != ANDESTECH_AX45MP_MARCHID || impid != ANDESTECH_AX45MP_MIMPID) - return false; + return; if (!ax45mp_iocp_sw_workaround()) - return false; + return; /* Set this just to make core cbo code happy */ riscv_cbom_block_size = 1; riscv_noncoherent_supported(); - - return true; } void __init_or_module andes_errata_patch_func(struct alt_entry *begin, struct alt_entry *end, unsigned long archid, unsigned long impid, unsigned int stage) { - errata_probe_iocp(stage, archid, impid); + if (stage == RISCV_ALTERNATIVES_BOOT) + errata_probe_iocp(stage, archid, impid); /* we have nothing to patch here ATM so just return back */ } From 49b79af00d24e5b27049725aa3bc11f4175c3c2c Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 5 Dec 2023 17:42:14 +0800 Subject: [PATCH 0905/1397] md: don't leave 'MD_RECOVERY_FROZEN' in error path of md_set_readonly() [ Upstream commit c9f7cb5b2bc968adcdc686c197ed108f47fd8eb0 ] If md_set_readonly() failed, the array could still be read-write, however 'MD_RECOVERY_FROZEN' could still be set, which leave the array in an abnormal state that sync or recovery can't continue anymore. Hence make sure the flag is cleared after md_set_readonly() returns. Fixes: 88724bfa68be ("md: wait for pending superblock updates before switching to read-only") Signed-off-by: Yu Kuai Acked-by: Xiao Ni Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20231205094215.1824240-3-yukuai1@huaweicloud.com Signed-off-by: Sasha Levin --- drivers/md/md.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 2748b0b424cf..b2ef6af8376a 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6316,6 +6316,9 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev) int err = 0; int did_freeze = 0; + if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) + return -EBUSY; + if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) { did_freeze = 1; set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); @@ -6330,8 +6333,6 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev) */ md_wakeup_thread_directly(mddev->sync_thread); - if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) - return -EBUSY; mddev_unlock(mddev); wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)); @@ -6344,29 +6345,30 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev) mddev->sync_thread || test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) { pr_warn("md: %s still in use.\n",mdname(mddev)); - if (did_freeze) { - clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); - set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); - md_wakeup_thread(mddev->thread); - } err = -EBUSY; goto out; } + if (mddev->pers) { __md_stop_writes(mddev); - err = -ENXIO; - if (mddev->ro == MD_RDONLY) + if (mddev->ro == MD_RDONLY) { + err = -ENXIO; goto out; + } + mddev->ro = MD_RDONLY; set_disk_ro(mddev->gendisk, 1); + } + +out: + if ((mddev->pers && !err) || did_freeze) { clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); md_wakeup_thread(mddev->thread); sysfs_notify_dirent_safe(mddev->sysfs_state); - err = 0; } -out: + mutex_unlock(&mddev->open_mutex); return err; } From 2d16a9f778f7af55d8c488c7ad8e58c2577016fd Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Tue, 28 Nov 2023 13:44:57 +0800 Subject: [PATCH 0906/1397] kernel/Kconfig.kexec: drop select of KEXEC for CRASH_DUMP [ Upstream commit dccf78d39f1069a5ddf4328bf0c97aa5f2f4296e ] Ignat Korchagin complained that a potential config regression was introduced by commit 89cde455915f ("kexec: consolidate kexec and crash options into kernel/Kconfig.kexec"). Before the commit, CONFIG_CRASH_DUMP has no dependency on CONFIG_KEXEC. After the commit, CRASH_DUMP selects KEXEC. That enforces system to have CONFIG_KEXEC=y as long as CONFIG_CRASH_DUMP=Y which people may not want. In Ignat's case, he sets CONFIG_CRASH_DUMP=y, CONFIG_KEXEC_FILE=y and CONFIG_KEXEC=n because kexec_load interface could have security issue if kernel/initrd has no chance to be signed and verified. CRASH_DUMP has select of KEXEC because Eric, author of above commit, met a LKP report of build failure when posting patch of earlier version. Please see below link to get detail of the LKP report: https://lore.kernel.org/all/3e8eecd1-a277-2cfb-690e-5de2eb7b988e@oracle.com/T/#u In fact, that LKP report is triggered because arm's is wrapped in CONFIG_KEXEC ifdeffery scope. That is wrong. CONFIG_KEXEC controls the enabling/disabling of kexec_load interface, but not kexec feature. Removing the wrongly added CONFIG_KEXEC ifdeffery scope in of arm allows us to drop the select KEXEC for CRASH_DUMP. Meanwhile, change arch/arm/kernel/Makefile to let machine_kexec.o relocate_kernel.o depend on KEXEC_CORE. Link: https://lkml.kernel.org/r/20231128054457.659452-1-bhe@redhat.com Fixes: 89cde455915f ("kexec: consolidate kexec and crash options into kernel/Kconfig.kexec") Signed-off-by: Baoquan He Reported-by: Ignat Korchagin Tested-by: Ignat Korchagin [compile-time only] Tested-by: Alexander Gordeev Reviewed-by: Eric DeVolder Tested-by: Eric DeVolder Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- arch/arm/include/asm/kexec.h | 4 ---- arch/arm/kernel/Makefile | 2 +- kernel/Kconfig.kexec | 1 - 3 files changed, 1 insertion(+), 6 deletions(-) diff --git a/arch/arm/include/asm/kexec.h b/arch/arm/include/asm/kexec.h index e62832dcba76..a8287e7ab9d4 100644 --- a/arch/arm/include/asm/kexec.h +++ b/arch/arm/include/asm/kexec.h @@ -2,8 +2,6 @@ #ifndef _ARM_KEXEC_H #define _ARM_KEXEC_H -#ifdef CONFIG_KEXEC - /* Maximum physical address we can use pages from */ #define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) /* Maximum address we can reach in physical address mode */ @@ -82,6 +80,4 @@ static inline struct page *boot_pfn_to_page(unsigned long boot_pfn) #endif /* __ASSEMBLY__ */ -#endif /* CONFIG_KEXEC */ - #endif /* _ARM_KEXEC_H */ diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index d53f56d6f840..771264d4726a 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -59,7 +59,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += entry-ftrace.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o insn.o patch.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o insn.o patch.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o insn.o patch.o -obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o +obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o # Main staffs in KPROBES are in arch/arm/probes/ . obj-$(CONFIG_KPROBES) += patch.o insn.o obj-$(CONFIG_OABI_COMPAT) += sys_oabi-compat.o diff --git a/kernel/Kconfig.kexec b/kernel/Kconfig.kexec index 9bfe68fe9676..143f4d8eab7c 100644 --- a/kernel/Kconfig.kexec +++ b/kernel/Kconfig.kexec @@ -97,7 +97,6 @@ config CRASH_DUMP depends on ARCH_SUPPORTS_KEXEC select CRASH_CORE select KEXEC_CORE - select KEXEC help Generate crash dump after being started by kexec. This should be normally only set in special crash dump kernels From af448bb2eaba0d069f66b069a15440fcd36a21ca Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 29 Nov 2023 20:33:16 -0800 Subject: [PATCH 0907/1397] scripts/gdb: fix lx-device-list-bus and lx-device-list-class [ Upstream commit 801a2b1b49f4dcf06703130922806e9c639c2ca8 ] After the conversion to bus_to_subsys() and class_to_subsys(), the gdb scripts listing the system buses and classes respectively was broken, fix those by returning the subsys_priv pointer and have the various caller de-reference either the 'bus' or 'class' structure members accordingly. Link: https://lkml.kernel.org/r/20231130043317.174188-1-florian.fainelli@broadcom.com Fixes: 7b884b7f24b4 ("driver core: class.c: convert to only use class_to_subsys") Signed-off-by: Florian Fainelli Tested-by: Kuan-Ying Lee Cc: Greg Kroah-Hartman Cc: Jan Kiszka Cc: Kieran Bingham Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- scripts/gdb/linux/device.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/scripts/gdb/linux/device.py b/scripts/gdb/linux/device.py index 16376c5cfec6..0eabc5f4f8ca 100644 --- a/scripts/gdb/linux/device.py +++ b/scripts/gdb/linux/device.py @@ -36,26 +36,26 @@ def for_each_bus(): for kobj in kset_for_each_object(gdb.parse_and_eval('bus_kset')): subsys = container_of(kobj, kset_type.get_type().pointer(), 'kobj') subsys_priv = container_of(subsys, subsys_private_type.get_type().pointer(), 'subsys') - yield subsys_priv['bus'] + yield subsys_priv def for_each_class(): for kobj in kset_for_each_object(gdb.parse_and_eval('class_kset')): subsys = container_of(kobj, kset_type.get_type().pointer(), 'kobj') subsys_priv = container_of(subsys, subsys_private_type.get_type().pointer(), 'subsys') - yield subsys_priv['class'] + yield subsys_priv def get_bus_by_name(name): for item in for_each_bus(): - if item['name'].string() == name: + if item['bus']['name'].string() == name: return item raise gdb.GdbError("Can't find bus type {!r}".format(name)) def get_class_by_name(name): for item in for_each_class(): - if item['name'].string() == name: + if item['class']['name'].string() == name: return item raise gdb.GdbError("Can't find device class {!r}".format(name)) @@ -70,13 +70,13 @@ def klist_for_each(klist): def bus_for_each_device(bus): - for kn in klist_for_each(bus['p']['klist_devices']): + for kn in klist_for_each(bus['klist_devices']): dp = container_of(kn, device_private_type.get_type().pointer(), 'knode_bus') yield dp['device'] def class_for_each_device(cls): - for kn in klist_for_each(cls['p']['klist_devices']): + for kn in klist_for_each(cls['klist_devices']): dp = container_of(kn, device_private_type.get_type().pointer(), 'knode_class') yield dp['device'] @@ -103,7 +103,7 @@ def __init__(self): def invoke(self, arg, from_tty): if not arg: for bus in for_each_bus(): - gdb.write('bus {}:\t{}\n'.format(bus['name'].string(), bus)) + gdb.write('bus {}:\t{}\n'.format(bus['bus']['name'].string(), bus)) for dev in bus_for_each_device(bus): _show_device(dev, level=1) else: @@ -123,7 +123,7 @@ def __init__(self): def invoke(self, arg, from_tty): if not arg: for cls in for_each_class(): - gdb.write("class {}:\t{}\n".format(cls['name'].string(), cls)) + gdb.write("class {}:\t{}\n".format(cls['class']['name'].string(), cls)) for dev in class_for_each_device(cls): _show_device(dev, level=1) else: From 29b9ebc89106dbb6ffd0004b9a50f21874a540af Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Fri, 1 Dec 2023 14:53:56 +0900 Subject: [PATCH 0908/1397] rethook: Use __rcu pointer for rethook::handler commit a1461f1fd6cfdc4b8917c9d4a91e92605d1f28dc upstream. Since the rethook::handler is an RCU-maganged pointer so that it will notice readers the rethook is stopped (unregistered) or not, it should be an __rcu pointer and use appropriate functions to be accessed. This will use appropriate memory barrier when accessing it. OTOH, rethook::data is never changed, so we don't need to check it in get_kretprobe(). NOTE: To avoid sparse warning, rethook::handler is defined by a raw function pointer type with __rcu instead of rethook_handler_t. Link: https://lore.kernel.org/all/170126066201.398836.837498688669005979.stgit@devnote2/ Fixes: 54ecbe6f1ed5 ("rethook: Add a generic return hook") Cc: stable@vger.kernel.org Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202311241808.rv9ceuAh-lkp@intel.com/ Tested-by: JP Kobryn Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Greg Kroah-Hartman --- include/linux/kprobes.h | 6 ++---- include/linux/rethook.h | 7 ++++++- kernel/trace/rethook.c | 23 ++++++++++++++--------- 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 85a64cb95d75..38a774287bde 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -202,10 +202,8 @@ extern int arch_trampoline_kprobe(struct kprobe *p); #ifdef CONFIG_KRETPROBE_ON_RETHOOK static nokprobe_inline struct kretprobe *get_kretprobe(struct kretprobe_instance *ri) { - RCU_LOCKDEP_WARN(!rcu_read_lock_any_held(), - "Kretprobe is accessed from instance under preemptive context"); - - return (struct kretprobe *)READ_ONCE(ri->node.rethook->data); + /* rethook::data is non-changed field, so that you can access it freely. */ + return (struct kretprobe *)ri->node.rethook->data; } static nokprobe_inline unsigned long get_kretprobe_retaddr(struct kretprobe_instance *ri) { diff --git a/include/linux/rethook.h b/include/linux/rethook.h index 26b6f3c81a76..544e1bbfad28 100644 --- a/include/linux/rethook.h +++ b/include/linux/rethook.h @@ -29,7 +29,12 @@ typedef void (*rethook_handler_t) (struct rethook_node *, void *, unsigned long, */ struct rethook { void *data; - rethook_handler_t handler; + /* + * To avoid sparse warnings, this uses a raw function pointer with + * __rcu, instead of rethook_handler_t. But this must be same as + * rethook_handler_t. + */ + void (__rcu *handler) (struct rethook_node *, void *, unsigned long, struct pt_regs *); struct freelist_head pool; refcount_t ref; struct rcu_head rcu; diff --git a/kernel/trace/rethook.c b/kernel/trace/rethook.c index 5eb9b598f4e9..3cebcbaf35a4 100644 --- a/kernel/trace/rethook.c +++ b/kernel/trace/rethook.c @@ -63,7 +63,7 @@ static void rethook_free_rcu(struct rcu_head *head) */ void rethook_stop(struct rethook *rh) { - WRITE_ONCE(rh->handler, NULL); + rcu_assign_pointer(rh->handler, NULL); } /** @@ -78,11 +78,17 @@ void rethook_stop(struct rethook *rh) */ void rethook_free(struct rethook *rh) { - WRITE_ONCE(rh->handler, NULL); + rethook_stop(rh); call_rcu(&rh->rcu, rethook_free_rcu); } +static inline rethook_handler_t rethook_get_handler(struct rethook *rh) +{ + return (rethook_handler_t)rcu_dereference_check(rh->handler, + rcu_read_lock_any_held()); +} + /** * rethook_alloc() - Allocate struct rethook. * @data: a data to pass the @handler when hooking the return. @@ -102,7 +108,7 @@ struct rethook *rethook_alloc(void *data, rethook_handler_t handler) } rh->data = data; - rh->handler = handler; + rcu_assign_pointer(rh->handler, handler); rh->pool.head = NULL; refcount_set(&rh->ref, 1); @@ -142,9 +148,10 @@ static void free_rethook_node_rcu(struct rcu_head *head) */ void rethook_recycle(struct rethook_node *node) { - lockdep_assert_preemption_disabled(); + rethook_handler_t handler; - if (likely(READ_ONCE(node->rethook->handler))) + handler = rethook_get_handler(node->rethook); + if (likely(handler)) freelist_add(&node->freelist, &node->rethook->pool); else call_rcu(&node->rcu, free_rethook_node_rcu); @@ -160,11 +167,9 @@ NOKPROBE_SYMBOL(rethook_recycle); */ struct rethook_node *rethook_try_get(struct rethook *rh) { - rethook_handler_t handler = READ_ONCE(rh->handler); + rethook_handler_t handler = rethook_get_handler(rh); struct freelist_node *fn; - lockdep_assert_preemption_disabled(); - /* Check whether @rh is going to be freed. */ if (unlikely(!handler)) return NULL; @@ -312,7 +317,7 @@ unsigned long rethook_trampoline_handler(struct pt_regs *regs, rhn = container_of(first, struct rethook_node, llist); if (WARN_ON_ONCE(rhn->frame != frame)) break; - handler = READ_ONCE(rhn->rethook->handler); + handler = rethook_get_handler(rhn->rethook); if (handler) handler(rhn, rhn->rethook->data, correct_ret_addr, regs); From 127fcf79662d217ddbd2bc53de261ec0c7eb1bbb Mon Sep 17 00:00:00 2001 From: Malcolm Hart Date: Mon, 27 Nov 2023 20:36:00 +0000 Subject: [PATCH 0909/1397] ASoC: amd: yc: Fix non-functional mic on ASUS E1504FA commit b24e3590c94ab0aba6e455996b502a83baa5c31c upstream. This patch adds ASUSTeK COMPUTER INC "E1504FA" to the quirks file acp6x-mach.c to enable microphone array on ASUS Vivobook GO 15. I have this laptop and can confirm that the patch succeeds in enabling the microphone array. Signed-off-by: Malcolm Hart Cc: stable@vger.kernel.org Rule: add Link: https://lore.kernel.org/stable/875y1nt1bx.fsf%405harts.com Link: https://lore.kernel.org/r/871qcbszh0.fsf@5harts.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 15a864dcd7bd..3babb17a56bb 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -283,6 +283,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "M6500RC"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "E1504FA"), + } + }, { .driver_data = &acp6x_card, .matches = { From 5a33d385eb36991a91e3dddb189d8679e2aac2be Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 6 Dec 2023 13:26:47 +0000 Subject: [PATCH 0910/1397] io_uring/af_unix: disable sending io_uring over sockets commit 705318a99a138c29a512a72c3e0043b3cd7f55f4 upstream. File reference cycles have caused lots of problems for io_uring in the past, and it still doesn't work exactly right and races with unix_stream_read_generic(). The safest fix would be to completely disallow sending io_uring files via sockets via SCM_RIGHT, so there are no possible cycles invloving registered files and thus rendering SCM accounting on the io_uring side unnecessary. Cc: Fixes: 0091bfc81741b ("io_uring/af_unix: defer registered files gc to io_uring release") Reported-and-suggested-by: Jann Horn Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/c716c88321939156909cfa1bd8b0faaf1c804103.1701868795.git.asml.silence@gmail.com Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/rsrc.h | 7 ------- net/core/scm.c | 6 ++++++ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index 8625181fb87a..08ac0d8e07ef 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -77,17 +77,10 @@ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, int __io_scm_file_account(struct io_ring_ctx *ctx, struct file *file); -#if defined(CONFIG_UNIX) -static inline bool io_file_need_scm(struct file *filp) -{ - return !!unix_get_socket(filp); -} -#else static inline bool io_file_need_scm(struct file *filp) { return false; } -#endif static inline int io_scm_file_account(struct io_ring_ctx *ctx, struct file *file) diff --git a/net/core/scm.c b/net/core/scm.c index 880027ecf516..7dc47c17d863 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -26,6 +26,7 @@ #include #include #include +#include #include @@ -103,6 +104,11 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) if (fd < 0 || !(file = fget_raw(fd))) return -EBADF; + /* don't allow io_uring files */ + if (io_uring_get_socket(file)) { + fput(file); + return -EINVAL; + } *fpp++ = file; fpl->count++; } From dd864f6ee04e82667bce667ef896a4f25a59db07 Mon Sep 17 00:00:00 2001 From: Georg Gottleuber Date: Wed, 20 Sep 2023 10:52:10 +0200 Subject: [PATCH 0911/1397] nvme-pci: Add sleep quirk for Kingston drives commit 107b4e063d78c300b21e2d5291b1aa94c514ea5b upstream. Some Kingston NV1 and A2000 are wasting a lot of power on specific TUXEDO platforms in s2idle sleep if 'Simple Suspend' is used. This patch applies a new quirk 'Force No Simple Suspend' to achieve a low power sleep without 'Simple Suspend'. Signed-off-by: Werner Sembach Signed-off-by: Georg Gottleuber Cc: Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/nvme.h | 5 +++++ drivers/nvme/host/pci.c | 16 +++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index f35647c470af..74f74b459f5f 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -156,6 +156,11 @@ enum nvme_quirks { * No temperature thresholds for channels other than 0 (Composite). */ NVME_QUIRK_NO_SECONDARY_TEMP_THRESH = (1 << 19), + + /* + * Disables simple suspend/resume path. + */ + NVME_QUIRK_FORCE_NO_SIMPLE_SUSPEND = (1 << 20), }; /* diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 3f0c9ee09a12..5c2a3af26d4d 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2903,6 +2903,18 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev) if ((dmi_match(DMI_BOARD_VENDOR, "LENOVO")) && dmi_match(DMI_BOARD_NAME, "LNVNB161216")) return NVME_QUIRK_SIMPLE_SUSPEND; + } else if (pdev->vendor == 0x2646 && (pdev->device == 0x2263 || + pdev->device == 0x500f)) { + /* + * Exclude some Kingston NV1 and A2000 devices from + * NVME_QUIRK_SIMPLE_SUSPEND. Do a full suspend to save a + * lot fo energy with s2idle sleep on some TUXEDO platforms. + */ + if (dmi_match(DMI_BOARD_NAME, "NS5X_NS7XAU") || + dmi_match(DMI_BOARD_NAME, "NS5x_7xAU") || + dmi_match(DMI_BOARD_NAME, "NS5x_7xPU") || + dmi_match(DMI_BOARD_NAME, "PH4PRX1_PH6PRX1")) + return NVME_QUIRK_FORCE_NO_SIMPLE_SUSPEND; } return 0; @@ -2933,7 +2945,9 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev, dev->dev = get_device(&pdev->dev); quirks |= check_vendor_combination_bug(pdev); - if (!noacpi && acpi_storage_d3(&pdev->dev)) { + if (!noacpi && + !(quirks & NVME_QUIRK_FORCE_NO_SIMPLE_SUSPEND) && + acpi_storage_d3(&pdev->dev)) { /* * Some systems use a bios work around to ask for D3 on * platforms that support kernel managed suspend. From 30df2901c4aca373c1a5e38f2b59bb5a1bd38674 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 3 Dec 2023 15:37:53 +0000 Subject: [PATCH 0912/1397] io_uring: fix mutex_unlock with unreferenced ctx commit f7b32e785042d2357c5abc23ca6db1b92c91a070 upstream. Callers of mutex_unlock() have to make sure that the mutex stays alive for the whole duration of the function call. For io_uring that means that the following pattern is not valid unless we ensure that the context outlives the mutex_unlock() call. mutex_lock(&ctx->uring_lock); req_put(req); // typically via io_req_task_submit() mutex_unlock(&ctx->uring_lock); Most contexts are fine: io-wq pins requests, syscalls hold the file, task works are taking ctx references and so on. However, the task work fallback path doesn't follow the rule. Cc: Fixes: 04fc6c802d ("io_uring: save ctx put/get for task_work submit") Reported-by: Jann Horn Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/io-uring/CAG48ez3xSoYb+45f1RLtktROJrpiDQ1otNvdR+YLQf7m+Krj5Q@mail.gmail.com/ Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index f09e3ee11229..bb8880d1e084 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -269,6 +269,7 @@ static __cold void io_fallback_req_func(struct work_struct *work) struct io_kiocb *req, *tmp; struct io_tw_state ts = { .locked = true, }; + percpu_ref_get(&ctx->refs); mutex_lock(&ctx->uring_lock); llist_for_each_entry_safe(req, tmp, node, io_task_work.node) req->io_task_work.func(req, &ts); @@ -276,6 +277,7 @@ static __cold void io_fallback_req_func(struct work_struct *work) return; io_submit_flush_completions(ctx); mutex_unlock(&ctx->uring_lock); + percpu_ref_put(&ctx->refs); } static int io_alloc_hash_table(struct io_hash_table *table, unsigned bits) @@ -3138,12 +3140,7 @@ static __cold void io_ring_exit_work(struct work_struct *work) init_completion(&exit.completion); init_task_work(&exit.task_work, io_tctx_exit_cb); exit.ctx = ctx; - /* - * Some may use context even when all refs and requests have been put, - * and they are free to do so while still holding uring_lock or - * completion_lock, see io_req_task_submit(). Apart from other work, - * this lock/unlock section also waits them to finish. - */ + mutex_lock(&ctx->uring_lock); while (!list_empty(&ctx->tctx_list)) { WARN_ON_ONCE(time_after(jiffies, timeout)); From 5ae225bbf3581dd07f8a3198836ccd84aa948db1 Mon Sep 17 00:00:00 2001 From: Sarah Grant Date: Fri, 1 Dec 2023 18:16:54 +0000 Subject: [PATCH 0913/1397] ALSA: usb-audio: Add Pioneer DJM-450 mixer controls commit bbb8e71965c3737bdc691afd803a34bfd61cfbeb upstream. These values mirror those of the Pioneer DJM-250MK2 as the channel layout appears identical based on my observations. This duplication could be removed in later contributions if desired. Signed-off-by: Sarah Grant Cc: Link: https://lore.kernel.org/r/20231201181654.5058-1-s@srd.tw Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/mixer_quirks.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index ab0d459f4271..1f32e3ae3aa3 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -2978,6 +2978,7 @@ static int snd_bbfpro_controls_create(struct usb_mixer_interface *mixer) #define SND_DJM_850_IDX 0x2 #define SND_DJM_900NXS2_IDX 0x3 #define SND_DJM_750MK2_IDX 0x4 +#define SND_DJM_450_IDX 0x5 #define SND_DJM_CTL(_name, suffix, _default_value, _windex) { \ @@ -3108,6 +3109,31 @@ static const struct snd_djm_ctl snd_djm_ctls_250mk2[] = { }; +// DJM-450 +static const u16 snd_djm_opts_450_cap1[] = { + 0x0103, 0x0100, 0x0106, 0x0107, 0x0108, 0x0109, 0x010d, 0x010a }; + +static const u16 snd_djm_opts_450_cap2[] = { + 0x0203, 0x0200, 0x0206, 0x0207, 0x0208, 0x0209, 0x020d, 0x020a }; + +static const u16 snd_djm_opts_450_cap3[] = { + 0x030a, 0x0311, 0x0312, 0x0307, 0x0308, 0x0309, 0x030d }; + +static const u16 snd_djm_opts_450_pb1[] = { 0x0100, 0x0101, 0x0104 }; +static const u16 snd_djm_opts_450_pb2[] = { 0x0200, 0x0201, 0x0204 }; +static const u16 snd_djm_opts_450_pb3[] = { 0x0300, 0x0301, 0x0304 }; + +static const struct snd_djm_ctl snd_djm_ctls_450[] = { + SND_DJM_CTL("Capture Level", cap_level, 0, SND_DJM_WINDEX_CAPLVL), + SND_DJM_CTL("Ch1 Input", 450_cap1, 2, SND_DJM_WINDEX_CAP), + SND_DJM_CTL("Ch2 Input", 450_cap2, 2, SND_DJM_WINDEX_CAP), + SND_DJM_CTL("Ch3 Input", 450_cap3, 0, SND_DJM_WINDEX_CAP), + SND_DJM_CTL("Ch1 Output", 450_pb1, 0, SND_DJM_WINDEX_PB), + SND_DJM_CTL("Ch2 Output", 450_pb2, 1, SND_DJM_WINDEX_PB), + SND_DJM_CTL("Ch3 Output", 450_pb3, 2, SND_DJM_WINDEX_PB) +}; + + // DJM-750 static const u16 snd_djm_opts_750_cap1[] = { 0x0101, 0x0103, 0x0106, 0x0107, 0x0108, 0x0109, 0x010a, 0x010f }; @@ -3203,6 +3229,7 @@ static const struct snd_djm_device snd_djm_devices[] = { [SND_DJM_850_IDX] = SND_DJM_DEVICE(850), [SND_DJM_900NXS2_IDX] = SND_DJM_DEVICE(900nxs2), [SND_DJM_750MK2_IDX] = SND_DJM_DEVICE(750mk2), + [SND_DJM_450_IDX] = SND_DJM_DEVICE(450), }; @@ -3449,6 +3476,9 @@ int snd_usb_mixer_apply_create_quirk(struct usb_mixer_interface *mixer) case USB_ID(0x2b73, 0x0017): /* Pioneer DJ DJM-250MK2 */ err = snd_djm_controls_create(mixer, SND_DJM_250MK2_IDX); break; + case USB_ID(0x2b73, 0x0013): /* Pioneer DJ DJM-450 */ + err = snd_djm_controls_create(mixer, SND_DJM_450_IDX); + break; case USB_ID(0x08e4, 0x017f): /* Pioneer DJ DJM-750 */ err = snd_djm_controls_create(mixer, SND_DJM_750_IDX); break; From 8e6ac8c6bae1b5e9c40ccd6a5abd23d47c8c5a54 Mon Sep 17 00:00:00 2001 From: Jason Zhang Date: Wed, 6 Dec 2023 09:31:39 +0800 Subject: [PATCH 0914/1397] ALSA: pcm: fix out-of-bounds in snd_pcm_state_names commit 2b3a7a302c9804e463f2ea5b54dc3a6ad106a344 upstream. The pcm state can be SNDRV_PCM_STATE_DISCONNECTED at disconnect callback, and there is not an entry of SNDRV_PCM_STATE_DISCONNECTED in snd_pcm_state_names. This patch adds the missing entry to resolve this issue. cat /proc/asound/card2/pcm0p/sub0/status That results in stack traces like the following: [ 99.702732][ T5171] Unexpected kernel BRK exception at EL1 [ 99.702774][ T5171] Internal error: BRK handler: f2005512 [#1] PREEMPT SMP [ 99.703858][ T5171] Modules linked in: bcmdhd(E) (...) [ 99.747425][ T5171] CPU: 3 PID: 5171 Comm: cat Tainted: G C OE 5.10.189-android13-4-00003-g4a17384380d8-ab11086999 #1 [ 99.748447][ T5171] Hardware name: Rockchip RK3588 CVTE V10 Board (DT) [ 99.749024][ T5171] pstate: 60400005 (nZCv daif +PAN -UAO -TCO BTYPE=--) [ 99.749616][ T5171] pc : snd_pcm_substream_proc_status_read+0x264/0x2bc [ 99.750204][ T5171] lr : snd_pcm_substream_proc_status_read+0xa4/0x2bc [ 99.750778][ T5171] sp : ffffffc0175abae0 [ 99.751132][ T5171] x29: ffffffc0175abb80 x28: ffffffc009a2c498 [ 99.751665][ T5171] x27: 0000000000000001 x26: ffffff810cbae6e8 [ 99.752199][ T5171] x25: 0000000000400cc0 x24: ffffffc0175abc60 [ 99.752729][ T5171] x23: 0000000000000000 x22: ffffff802f558400 [ 99.753263][ T5171] x21: ffffff81d8d8ff00 x20: ffffff81020cdc00 [ 99.753795][ T5171] x19: ffffff802d110000 x18: ffffffc014fbd058 [ 99.754326][ T5171] x17: 0000000000000000 x16: 0000000000000000 [ 99.754861][ T5171] x15: 000000000000c276 x14: ffffffff9a976fda [ 99.755392][ T5171] x13: 0000000065689089 x12: 000000000000d72e [ 99.755923][ T5171] x11: ffffff802d110000 x10: 00000000000000e0 [ 99.756457][ T5171] x9 : 9c431600c8385d00 x8 : 0000000000000008 [ 99.756990][ T5171] x7 : 0000000000000000 x6 : 000000000000003f [ 99.757522][ T5171] x5 : 0000000000000040 x4 : ffffffc0175abb70 [ 99.758056][ T5171] x3 : 0000000000000001 x2 : 0000000000000001 [ 99.758588][ T5171] x1 : 0000000000000000 x0 : 0000000000000000 [ 99.759123][ T5171] Call trace: [ 99.759404][ T5171] snd_pcm_substream_proc_status_read+0x264/0x2bc [ 99.759958][ T5171] snd_info_seq_show+0x54/0xa4 [ 99.760370][ T5171] seq_read_iter+0x19c/0x7d4 [ 99.760770][ T5171] seq_read+0xf0/0x128 [ 99.761117][ T5171] proc_reg_read+0x100/0x1f8 [ 99.761515][ T5171] vfs_read+0xf4/0x354 [ 99.761869][ T5171] ksys_read+0x7c/0x148 [ 99.762226][ T5171] __arm64_sys_read+0x20/0x30 [ 99.762625][ T5171] el0_svc_common+0xd0/0x1e4 [ 99.763023][ T5171] el0_svc+0x28/0x98 [ 99.763358][ T5171] el0_sync_handler+0x8c/0xf0 [ 99.763759][ T5171] el0_sync+0x1b8/0x1c0 [ 99.764118][ T5171] Code: d65f03c0 b9406102 17ffffae 94191565 (d42aa240) [ 99.764715][ T5171] ---[ end trace 1eeffa3e17c58e10 ]--- [ 99.780720][ T5171] Kernel panic - not syncing: BRK handler: Fatal exception Signed-off-by: Jason Zhang Cc: Link: https://lore.kernel.org/r/20231206013139.20506-1-jason.zhang@rock-chips.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/pcm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/core/pcm.c b/sound/core/pcm.c index 20bb2d7c8d4b..6d0c9c37796c 100644 --- a/sound/core/pcm.c +++ b/sound/core/pcm.c @@ -253,6 +253,7 @@ static const char * const snd_pcm_state_names[] = { STATE(DRAINING), STATE(PAUSED), STATE(SUSPENDED), + STATE(DISCONNECTED), }; static const char * const snd_pcm_access_names[] = { From 6e25980d046f3c7e6134bb9aa69a7152fce176e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pascal=20No=C3=ABl?= Date: Fri, 1 Dec 2023 17:37:44 -0800 Subject: [PATCH 0915/1397] ALSA: hda/realtek: Apply quirk for ASUS UM3504DA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c5c325bb5849868d76969d3fe014515f5e99eabc upstream. The ASUS UM3504DA uses a Realtek HDA codec and two CS35L41 amplifiers via I2C. Apply existing quirk to model. Signed-off-by: Pascal Noël Cc: Link: https://lore.kernel.org/r/20231202013744.12369-1-pascal@pascalcompiles.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 758abe9dffd6..d5433ec37abd 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9873,6 +9873,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x17f3, "ROG Ally RC71L_RC71L", ALC294_FIXUP_ASUS_ALLY), SND_PCI_QUIRK(0x1043, 0x1881, "ASUS Zephyrus S/M", ALC294_FIXUP_ASUS_GX502_PINS), SND_PCI_QUIRK(0x1043, 0x18b1, "Asus MJ401TA", ALC256_FIXUP_ASUS_HEADSET_MIC), + SND_PCI_QUIRK(0x1043, 0x18d3, "ASUS UM3504DA", ALC294_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x18f1, "Asus FX505DT", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x194e, "ASUS UX563FD", ALC294_FIXUP_ASUS_HPE), SND_PCI_QUIRK(0x1043, 0x1970, "ASUS UX550VE", ALC289_FIXUP_ASUS_GA401), From b3f1d9230457db71509d657ceb090fc9aa168b95 Mon Sep 17 00:00:00 2001 From: Aleksandrs Vinarskis Date: Mon, 4 Dec 2023 00:30:06 +0100 Subject: [PATCH 0916/1397] ALSA: hda/realtek: fix speakers on XPS 9530 (2023) commit cd14dedf15be432066e63783c63d650f2800cd48 upstream. XPS 9530 has 2 tweeters and 2 subwoofers powered by CS35L41 amplifier, SPI connected. For subwoofers to work, it requires both to enable amplifier support, and to enable output to subwoofers via 0x17 quirk (similalry to XPS 9510/9520). Signed-off-by: Aleksandrs Vinarskis Cc: Link: https://lore.kernel.org/r/20231203233006.100558-1-alex.vinarskis@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index d5433ec37abd..85b3acde0da2 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9615,6 +9615,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0b1a, "Dell Precision 5570", ALC289_FIXUP_DUAL_SPK), SND_PCI_QUIRK(0x1028, 0x0b37, "Dell Inspiron 16 Plus 7620 2-in-1", ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS), SND_PCI_QUIRK(0x1028, 0x0b71, "Dell Inspiron 16 Plus 7620", ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS), + SND_PCI_QUIRK(0x1028, 0x0beb, "Dell XPS 15 9530 (2023)", ALC289_FIXUP_DELL_CS35L41_SPI_2), SND_PCI_QUIRK(0x1028, 0x0c03, "Dell Precision 5340", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0c19, "Dell Precision 3340", ALC236_FIXUP_DELL_DUAL_CODECS), SND_PCI_QUIRK(0x1028, 0x0c1a, "Dell Precision 3340", ALC236_FIXUP_DELL_DUAL_CODECS), From 65a7a5b2d53164183da9c7631729a376ebe7a56f Mon Sep 17 00:00:00 2001 From: Bin Li Date: Mon, 4 Dec 2023 18:04:50 +0800 Subject: [PATCH 0917/1397] ALSA: hda/realtek: Enable headset on Lenovo M90 Gen5 commit 6f7e4664e597440dfbdb8b2931c561b717030d07 upstream. Lenovo M90 Gen5 is equipped with ALC897, and it needs ALC897_FIXUP_HEADSET_MIC_PIN quirk to make its headset mic work. Signed-off-by: Bin Li Cc: Link: https://lore.kernel.org/r/20231204100450.642783-1-bin.li@canonical.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 85b3acde0da2..cb729c41c16e 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -12107,6 +12107,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x32f7, "Lenovo ThinkCentre M90", ALC897_FIXUP_HEADSET_MIC_PIN), SND_PCI_QUIRK(0x17aa, 0x3321, "Lenovo ThinkCentre M70 Gen4", ALC897_FIXUP_HEADSET_MIC_PIN), SND_PCI_QUIRK(0x17aa, 0x331b, "Lenovo ThinkCentre M90 Gen4", ALC897_FIXUP_HEADSET_MIC_PIN), + SND_PCI_QUIRK(0x17aa, 0x3364, "Lenovo ThinkCentre M90 Gen5", ALC897_FIXUP_HEADSET_MIC_PIN), SND_PCI_QUIRK(0x17aa, 0x3742, "Lenovo TianYi510Pro-14IOB", ALC897_FIXUP_HEADSET_MIC_PIN2), SND_PCI_QUIRK(0x17aa, 0x38af, "Lenovo Ideapad Y550P", ALC662_FIXUP_IDEAPAD), SND_PCI_QUIRK(0x17aa, 0x3a0d, "Lenovo Ideapad Y550", ALC662_FIXUP_IDEAPAD), From 70a6885542b12c1293974f6862b86dce21f6d242 Mon Sep 17 00:00:00 2001 From: Tim Bosse Date: Wed, 6 Dec 2023 09:26:29 -0500 Subject: [PATCH 0918/1397] ALSA: hda/realtek: add new Framework laptop to quirks commit 33038efb64f7576bac635164021f5c984d4c755f upstream. The Framework Laptop 13 (AMD Ryzen 7040Series) has an ALC295 with a disconnected or faulty headset mic presence detect similar to the previous models. It works with the same quirk chain as 309d7363ca3d9fcdb92ff2d958be14d7e8707f68. This model has a VID:PID of f111:0006. Signed-off-by: Tim Bosse Cc: Link: https://lore.kernel.org/r/20231206142629.388615-1-flinn@timbos.se Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index cb729c41c16e..077504f05704 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10182,6 +10182,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", ALC256_FIXUP_INTEL_NUC10), SND_PCI_QUIRK(0x8086, 0x3038, "Intel NUC 13", ALC295_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0xf111, 0x0001, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0xf111, 0x0006, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), #if 0 /* Below is a quirk table taken from the old code. From 5f1c1e8de568d0f4eb5ed3d3712c28476ca32788 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 6 Dec 2023 13:39:27 -0600 Subject: [PATCH 0919/1397] ALSA: hda/realtek: Add Framework laptop 16 to quirks commit 8804fa04a492f4176ea407390052292912227820 upstream. The Framework 16" laptop has the same controller as other Framework models. Apply the presence detection quirk. Signed-off-by: Mario Limonciello Cc: Link: https://lore.kernel.org/r/20231206193927.2996-1-mario.limonciello@amd.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 077504f05704..a4c55503a61e 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10182,6 +10182,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", ALC256_FIXUP_INTEL_NUC10), SND_PCI_QUIRK(0x8086, 0x3038, "Intel NUC 13", ALC295_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0xf111, 0x0001, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0xf111, 0x0005, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0xf111, 0x0006, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), #if 0 From 73249ef76c2d759bdbb466dc6ed9822233ae8c9e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 7 Dec 2023 19:20:35 +0100 Subject: [PATCH 0920/1397] ALSA: hda/realtek: Add quirk for Lenovo Yoga Pro 7 commit 634e5e1e06f5cdd614a1bc429ecb243a51cc009d upstream. Lenovo Yoga Pro 7 14APH8 (PCI SSID 17aa:3882) seems requiring the similar workaround like Yoga 9 model for the bass speaker. Cc: Link: https://lore.kernel.org/r/CAGGk=CRRQ1L9p771HsXTN_ebZP41Qj+3gw35Gezurn+nokRewg@mail.gmail.com Link: https://lore.kernel.org/r/20231207182035.30248-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index a4c55503a61e..5c609c1150c4 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10116,6 +10116,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x387d, "Yoga S780-16 pro Quad AAC", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x17aa, 0x387e, "Yoga S780-16 pro Quad YC", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x17aa, 0x3881, "YB9 dual power mode2 YC", ALC287_FIXUP_TAS2781_I2C), + SND_PCI_QUIRK(0x17aa, 0x3882, "Lenovo Yoga Pro 7 14APH8", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), SND_PCI_QUIRK(0x17aa, 0x3884, "Y780 YG DUAL", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x17aa, 0x3886, "Y780 VECO DUAL", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x17aa, 0x38a7, "Y780P AMD YG dual", ALC287_FIXUP_TAS2781_I2C), From d251b9818302b01d2ee5201084d8bd59f090becb Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Wed, 6 Dec 2023 10:00:50 -0500 Subject: [PATCH 0921/1397] ring-buffer: Test last update in 32bit version of __rb_time_read() commit f458a1453424e03462b5bb539673c9a3cddda480 upstream. Since 64 bit cmpxchg() is very expensive on 32bit architectures, the timestamp used by the ring buffer does some interesting tricks to be able to still have an atomic 64 bit number. It originally just used 60 bits and broke it up into two 32 bit words where the extra 2 bits were used for synchronization. But this was not enough for all use cases, and all 64 bits were required. The 32bit version of the ring buffer timestamp was then broken up into 3 32bit words using the same counter trick. But one update was not done. The check to see if the read operation was done without interruption only checked the first two words and not last one (like it had before this update). Fix it by making sure all three updates happen without interruption by comparing the initial counter with the last updated counter. Link: https://lore.kernel.org/linux-trace-kernel/20231206100050.3100b7bb@gandalf.local.home Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Fixes: f03f2abce4f39 ("ring-buffer: Have 32 bit time stamps use all 64 bits") Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ring_buffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 515cafdb18d9..0689e5d95a80 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -644,8 +644,8 @@ static inline bool __rb_time_read(rb_time_t *t, u64 *ret, unsigned long *cnt) *cnt = rb_time_cnt(top); - /* If top and bottom counts don't match, this interrupted a write */ - if (*cnt != rb_time_cnt(bottom)) + /* If top and msb counts don't match, this interrupted a write */ + if (*cnt != rb_time_cnt(msb)) return false; /* The shift to msb will lose its cnt bits */ From 56a334310fa9dabf466ac74a59adc65ef018e10d Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Wed, 6 Dec 2023 10:02:44 -0500 Subject: [PATCH 0922/1397] ring-buffer: Force absolute timestamp on discard of event commit b2dd797543cfa6580eac8408dd67fa02164d9e56 upstream. There's a race where if an event is discarded from the ring buffer and an interrupt were to happen at that time and insert an event, the time stamp is still used from the discarded event as an offset. This can screw up the timings. If the event is going to be discarded, set the "before_stamp" to zero. When a new event comes in, it compares the "before_stamp" with the "write_stamp" and if they are not equal, it will insert an absolute timestamp. This will prevent the timings from getting out of sync due to the discarded event. Link: https://lore.kernel.org/linux-trace-kernel/20231206100244.5130f9b3@gandalf.local.home Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Fixes: 6f6be606e763f ("ring-buffer: Force before_stamp and write_stamp to be different on discard") Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ring_buffer.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 0689e5d95a80..f1ef4329343b 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3030,22 +3030,19 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, local_read(&bpage->write) & ~RB_WRITE_MASK; unsigned long event_length = rb_event_length(event); + /* + * For the before_stamp to be different than the write_stamp + * to make sure that the next event adds an absolute + * value and does not rely on the saved write stamp, which + * is now going to be bogus. + */ + rb_time_set(&cpu_buffer->before_stamp, 0); + /* Something came in, can't discard */ if (!rb_time_cmpxchg(&cpu_buffer->write_stamp, write_stamp, write_stamp - delta)) return false; - /* - * It's possible that the event time delta is zero - * (has the same time stamp as the previous event) - * in which case write_stamp and before_stamp could - * be the same. In such a case, force before_stamp - * to be different than write_stamp. It doesn't - * matter what it is, as long as its different. - */ - if (!delta) - rb_time_set(&cpu_buffer->before_stamp, 0); - /* * If an event were to come in now, it would see that the * write_stamp and the before_stamp are different, and assume From 1cdc934c82bbd45cf81a9ce58704fa23156f8ff3 Mon Sep 17 00:00:00 2001 From: Su Hui Date: Thu, 30 Nov 2023 11:40:18 +0800 Subject: [PATCH 0923/1397] highmem: fix a memory copy problem in memcpy_from_folio commit 73424d00dc63ba681856e06cfb0a5abbdb62e2b5 upstream. Clang static checker complains that value stored to 'from' is never read. And memcpy_from_folio() only copy the last chunk memory from folio to destination. Use 'to += chunk' to replace 'from += chunk' to fix this typo problem. Link: https://lkml.kernel.org/r/20231130034017.1210429-1-suhui@nfschina.com Fixes: b23d03ef7af5 ("highmem: add memcpy_to_folio() and memcpy_from_folio()") Signed-off-by: Su Hui Reviewed-by: Matthew Wilcox (Oracle) Cc: Ira Weiny Cc: Jiaqi Yan Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Peter Collingbourne Cc: Tom Rix Cc: Tony Luck Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- include/linux/highmem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 99c474de800d..75607d4ba26c 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -454,7 +454,7 @@ static inline void memcpy_from_folio(char *to, struct folio *folio, memcpy(to, from, chunk); kunmap_local(from); - from += chunk; + to += chunk; offset += chunk; len -= chunk; } while (len > 0); From ce58f14113a8d494fbac92a9faf544da215226cf Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Wed, 29 Nov 2023 23:15:47 +0900 Subject: [PATCH 0924/1397] nilfs2: fix missing error check for sb_set_blocksize call commit d61d0ab573649789bf9eb909c89a1a193b2e3d10 upstream. When mounting a filesystem image with a block size larger than the page size, nilfs2 repeatedly outputs long error messages with stack traces to the kernel log, such as the following: getblk(): invalid block size 8192 requested logical block size: 512 ... Call Trace: dump_stack_lvl+0x92/0xd4 dump_stack+0xd/0x10 bdev_getblk+0x33a/0x354 __breadahead+0x11/0x80 nilfs_search_super_root+0xe2/0x704 [nilfs2] load_nilfs+0x72/0x504 [nilfs2] nilfs_mount+0x30f/0x518 [nilfs2] legacy_get_tree+0x1b/0x40 vfs_get_tree+0x18/0xc4 path_mount+0x786/0xa88 __ia32_sys_mount+0x147/0x1a8 __do_fast_syscall_32+0x56/0xc8 do_fast_syscall_32+0x29/0x58 do_SYSENTER_32+0x15/0x18 entry_SYSENTER_32+0x98/0xf1 ... This overloads the system logger. And to make matters worse, it sometimes crashes the kernel with a memory access violation. This is because the return value of the sb_set_blocksize() call, which should be checked for errors, is not checked. The latter issue is due to out-of-buffer memory being accessed based on a large block size that caused sb_set_blocksize() to fail for buffers read with the initial minimum block size that remained unupdated in the super_block structure. Since nilfs2 mkfs tool does not accept block sizes larger than the system page size, this has been overlooked. However, it is possible to create this situation by intentionally modifying the tool or by passing a filesystem image created on a system with a large page size to a system with a smaller page size and mounting it. Fix this issue by inserting the expected error handling for the call to sb_set_blocksize(). Link: https://lkml.kernel.org/r/20231129141547.4726-1-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Tested-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/the_nilfs.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 0f0667957c81..71400496ed36 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -716,7 +716,11 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data) goto failed_sbh; } nilfs_release_super_block(nilfs); - sb_set_blocksize(sb, blocksize); + if (!sb_set_blocksize(sb, blocksize)) { + nilfs_err(sb, "bad blocksize %d", blocksize); + err = -EINVAL; + goto out; + } err = nilfs_load_super_block(nilfs, sb, blocksize, &sbp); if (err) From 0b14276bcb857d321c6069523fe4d3c05efa4886 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 5 Dec 2023 17:59:47 +0900 Subject: [PATCH 0925/1397] nilfs2: prevent WARNING in nilfs_sufile_set_segment_usage() commit 675abf8df1353e0e3bde314993e0796c524cfbf0 upstream. If nilfs2 reads a disk image with corrupted segment usage metadata, and its segment usage information is marked as an error for the segment at the write location, nilfs_sufile_set_segment_usage() can trigger WARN_ONs during log writing. Segments newly allocated for writing with nilfs_sufile_alloc() will not have this error flag set, but this unexpected situation will occur if the segment indexed by either nilfs->ns_segnum or nilfs->ns_nextnum (active segment) was marked in error. Fix this issue by inserting a sanity check to treat it as a file system corruption. Since error returns are not allowed during the execution phase where nilfs_sufile_set_segment_usage() is used, this inserts the sanity check into nilfs_sufile_mark_dirty() which pre-reads the buffer containing the segment usage record to be updated and sets it up in a dirty state for writing. In addition, nilfs_sufile_set_segment_usage() is also called when canceling log writing and undoing segment usage update, so in order to avoid issuing the same kernel warning in that case, in case of cancellation, avoid checking the error flag in nilfs_sufile_set_segment_usage(). Link: https://lkml.kernel.org/r/20231205085947.4431-1-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Reported-by: syzbot+14e9f834f6ddecece094@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=14e9f834f6ddecece094 Tested-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/sufile.c | 42 +++++++++++++++++++++++++++++++++++------- 1 file changed, 35 insertions(+), 7 deletions(-) diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 2c6078a6b8ec..58ca7c936393 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -501,15 +501,38 @@ int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum) down_write(&NILFS_MDT(sufile)->mi_sem); ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh); - if (!ret) { - mark_buffer_dirty(bh); - nilfs_mdt_mark_dirty(sufile); - kaddr = kmap_atomic(bh->b_page); - su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); + if (ret) + goto out_sem; + + kaddr = kmap_atomic(bh->b_page); + su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); + if (unlikely(nilfs_segment_usage_error(su))) { + struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; + + kunmap_atomic(kaddr); + brelse(bh); + if (nilfs_segment_is_active(nilfs, segnum)) { + nilfs_error(sufile->i_sb, + "active segment %llu is erroneous", + (unsigned long long)segnum); + } else { + /* + * Segments marked erroneous are never allocated by + * nilfs_sufile_alloc(); only active segments, ie, + * the segments indexed by ns_segnum or ns_nextnum, + * can be erroneous here. + */ + WARN_ON_ONCE(1); + } + ret = -EIO; + } else { nilfs_segment_usage_set_dirty(su); kunmap_atomic(kaddr); + mark_buffer_dirty(bh); + nilfs_mdt_mark_dirty(sufile); brelse(bh); } +out_sem: up_write(&NILFS_MDT(sufile)->mi_sem); return ret; } @@ -536,9 +559,14 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, kaddr = kmap_atomic(bh->b_page); su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); - WARN_ON(nilfs_segment_usage_error(su)); - if (modtime) + if (modtime) { + /* + * Check segusage error and set su_lastmod only when updating + * this entry with a valid timestamp, not for cancellation. + */ + WARN_ON_ONCE(nilfs_segment_usage_error(su)); su->su_lastmod = cpu_to_le64(modtime); + } su->su_nblocks = cpu_to_le32(nblocks); kunmap_atomic(kaddr); From 39f603a262869fac18fc2445796dd7043ec77625 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 20 Nov 2023 16:35:59 +0800 Subject: [PATCH 0926/1397] lib/group_cpus.c: avoid acquiring cpu hotplug lock in group_cpus_evenly commit 0263f92fadbb9d294d5971ac57743f882c93b2b3 upstream. group_cpus_evenly() could be part of storage driver's error handler, such as nvme driver, when may happen during CPU hotplug, in which storage queue has to drain its pending IOs because all CPUs associated with the queue are offline and the queue is becoming inactive. And handling IO needs error handler to provide forward progress. Then deadlock is caused: 1) inside CPU hotplug handler, CPU hotplug lock is held, and blk-mq's handler is waiting for inflight IO 2) error handler is waiting for CPU hotplug lock 3) inflight IO can't be completed in blk-mq's CPU hotplug handler because error handling can't provide forward progress. Solve the deadlock by not holding CPU hotplug lock in group_cpus_evenly(), in which two stage spreads are taken: 1) the 1st stage is over all present CPUs; 2) the end stage is over all other CPUs. Turns out the two stage spread just needs consistent 'cpu_present_mask', and remove the CPU hotplug lock by storing it into one local cache. This way doesn't change correctness, because all CPUs are still covered. Link: https://lkml.kernel.org/r/20231120083559.285174-1-ming.lei@redhat.com Signed-off-by: Ming Lei Reported-by: Yi Zhang Reported-by: Guangwu Zhang Tested-by: Guangwu Zhang Reviewed-by: Chengming Zhou Reviewed-by: Jens Axboe Cc: Keith Busch Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- lib/group_cpus.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/lib/group_cpus.c b/lib/group_cpus.c index aa3f6815bb12..ee272c4cefcc 100644 --- a/lib/group_cpus.c +++ b/lib/group_cpus.c @@ -366,13 +366,25 @@ struct cpumask *group_cpus_evenly(unsigned int numgrps) if (!masks) goto fail_node_to_cpumask; - /* Stabilize the cpumasks */ - cpus_read_lock(); build_node_to_cpumask(node_to_cpumask); + /* + * Make a local cache of 'cpu_present_mask', so the two stages + * spread can observe consistent 'cpu_present_mask' without holding + * cpu hotplug lock, then we can reduce deadlock risk with cpu + * hotplug code. + * + * Here CPU hotplug may happen when reading `cpu_present_mask`, and + * we can live with the case because it only affects that hotplug + * CPU is handled in the 1st or 2nd stage, and either way is correct + * from API user viewpoint since 2-stage spread is sort of + * optimization. + */ + cpumask_copy(npresmsk, data_race(cpu_present_mask)); + /* grouping present CPUs first */ ret = __group_cpus_evenly(curgrp, numgrps, node_to_cpumask, - cpu_present_mask, nmsk, masks); + npresmsk, nmsk, masks); if (ret < 0) goto fail_build_affinity; nr_present = ret; @@ -387,15 +399,13 @@ struct cpumask *group_cpus_evenly(unsigned int numgrps) curgrp = 0; else curgrp = nr_present; - cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask); + cpumask_andnot(npresmsk, cpu_possible_mask, npresmsk); ret = __group_cpus_evenly(curgrp, numgrps, node_to_cpumask, npresmsk, nmsk, masks); if (ret >= 0) nr_others = ret; fail_build_affinity: - cpus_read_unlock(); - if (ret >= 0) WARN_ON(nr_present + nr_others < numgrps); From 9ec2d9267304af3057d1c9302ddfa7a8c7fb8799 Mon Sep 17 00:00:00 2001 From: Tim Van Patten Date: Wed, 15 Nov 2023 09:20:43 -0700 Subject: [PATCH 0927/1397] cgroup_freezer: cgroup_freezing: Check if not frozen commit cff5f49d433fcd0063c8be7dd08fa5bf190c6c37 upstream. __thaw_task() was recently updated to warn if the task being thawed was part of a freezer cgroup that is still currently freezing: void __thaw_task(struct task_struct *p) { ... if (WARN_ON_ONCE(freezing(p))) goto unlock; This has exposed a bug in cgroup1 freezing where when CGROUP_FROZEN is asserted, the CGROUP_FREEZING bits are not also cleared at the same time. Meaning, when a cgroup is marked FROZEN it continues to be marked FREEZING as well. This causes the WARNING to trigger, because cgroup_freezing() thinks the cgroup is still freezing. There are two ways to fix this: 1. Whenever FROZEN is set, clear FREEZING for the cgroup and all children cgroups. 2. Update cgroup_freezing() to also verify that FROZEN is not set. This patch implements option (2), since it's smaller and more straightforward. Signed-off-by: Tim Van Patten Tested-by: Mark Hasemeyer Fixes: f5d39b020809 ("freezer,sched: Rewrite core freezer logic") Cc: stable@vger.kernel.org # v6.1+ Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- kernel/cgroup/legacy_freezer.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/cgroup/legacy_freezer.c b/kernel/cgroup/legacy_freezer.c index 122dacb3a443..66d1708042a7 100644 --- a/kernel/cgroup/legacy_freezer.c +++ b/kernel/cgroup/legacy_freezer.c @@ -66,9 +66,15 @@ static struct freezer *parent_freezer(struct freezer *freezer) bool cgroup_freezing(struct task_struct *task) { bool ret; + unsigned int state; rcu_read_lock(); - ret = task_freezer(task)->state & CGROUP_FREEZING; + /* Check if the cgroup is still FREEZING, but not FROZEN. The extra + * !FROZEN check is required, because the FREEZING bit is not cleared + * when the state FROZEN is reached. + */ + state = task_freezer(task)->state; + ret = (state & CGROUP_FREEZING) && !(state & CGROUP_FROZEN); rcu_read_unlock(); return ret; From f8f32f912603f79b3784de07ab5438f31440068e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 20 Nov 2023 19:37:17 +0100 Subject: [PATCH 0928/1397] checkstack: fix printed address commit ee34db3f271cea4d4252048617919c2caafe698b upstream. All addresses printed by checkstack have an extra incorrect 0 appended at the end. This was introduced with commit 677f1410e058 ("scripts/checkstack.pl: don't display $dre as different entity"): since then the address is taken from the line which contains the function name, instead of the line which contains stack consumption. E.g. on s390: 0000000000100a30 : ... 100a44: e3 f0 ff 70 ff 71 lay %r15,-144(%r15) So the used regex which matches spaces and hexadecimal numbers to extract an address now matches a different substring. Subsequently replacing spaces with 0 appends a zero at the and, instead of replacing leading spaces. Fix this by using the proper regex, and simplify the code a bit. Link: https://lkml.kernel.org/r/20231120183719.2188479-2-hca@linux.ibm.com Fixes: 677f1410e058 ("scripts/checkstack.pl: don't display $dre as different entity") Signed-off-by: Heiko Carstens Cc: Maninder Singh Cc: Masahiro Yamada Cc: Vaneet Narang Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- scripts/checkstack.pl | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/scripts/checkstack.pl b/scripts/checkstack.pl index 84f5fb7f1cec..ac74f8629cea 100755 --- a/scripts/checkstack.pl +++ b/scripts/checkstack.pl @@ -139,15 +139,11 @@ sub arm_push_handling { while (my $line = ) { if ($line =~ m/$funcre/) { $func = $1; - next if $line !~ m/^($xs*)/; + next if $line !~ m/^($x*)/; if ($total_size > $min_stack) { push @stack, "$intro$total_size\n"; } - - $addr = $1; - $addr =~ s/ /0/g; - $addr = "0x$addr"; - + $addr = "0x$1"; $intro = "$addr $func [$file]:"; my $padlen = 56 - length($intro); while ($padlen > 0) { From 1741e17c3939d19ecc568c05d13bcd60c283171d Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 5 Dec 2023 16:52:09 -0500 Subject: [PATCH 0929/1397] tracing: Always update snapshot buffer size commit 7be76461f302ec05cbd62b90b2a05c64299ca01f upstream. It use to be that only the top level instance had a snapshot buffer (for latency tracers like wakeup and irqsoff). The update of the ring buffer size would check if the instance was the top level and if so, it would also update the snapshot buffer as it needs to be the same as the main buffer. Now that lower level instances also has a snapshot buffer, they too need to update their snapshot buffer sizes when the main buffer is changed, otherwise the following can be triggered: # cd /sys/kernel/tracing # echo 1500 > buffer_size_kb # mkdir instances/foo # echo irqsoff > instances/foo/current_tracer # echo 1000 > instances/foo/buffer_size_kb Produces: WARNING: CPU: 2 PID: 856 at kernel/trace/trace.c:1938 update_max_tr_single.part.0+0x27d/0x320 Which is: ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu); if (ret == -EBUSY) { [..] } WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY); <== here That's because ring_buffer_swap_cpu() has: int ret = -EINVAL; [..] /* At least make sure the two buffers are somewhat the same */ if (cpu_buffer_a->nr_pages != cpu_buffer_b->nr_pages) goto out; [..] out: return ret; } Instead, update all instances' snapshot buffer sizes when their main buffer size is updated. Link: https://lkml.kernel.org/r/20231205220010.454662151@goodmis.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Fixes: 6d9b3fa5e7f6 ("tracing: Move tracing_max_latency into trace_array") Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 689d063dd206..36caff9a1452 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6399,8 +6399,7 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr, return ret; #ifdef CONFIG_TRACER_MAX_TRACE - if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) || - !tr->current_trace->use_max_tr) + if (!tr->current_trace->use_max_tr) goto out; ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu); From 12c48e88e5c7dff3c26217e7527eaddcf4a5809c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 5 Dec 2023 16:52:10 -0500 Subject: [PATCH 0930/1397] tracing: Stop current tracer when resizing buffer commit d78ab792705c7be1b91243b2544d1a79406a2ad7 upstream. When the ring buffer is being resized, it can cause side effects to the running tracer. For instance, there's a race with irqsoff tracer that swaps individual per cpu buffers between the main buffer and the snapshot buffer. The resize operation modifies the main buffer and then the snapshot buffer. If a swap happens in between those two operations it will break the tracer. Simply stop the running tracer before resizing the buffers and enable it again when finished. Link: https://lkml.kernel.org/r/20231205220010.748996423@goodmis.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Fixes: 3928a8a2d9808 ("ftrace: make work with new ring buffer") Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 36caff9a1452..e08bb90733f5 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6394,9 +6394,12 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr, if (!tr->array_buffer.buffer) return 0; + /* Do not allow tracing while resizng ring buffer */ + tracing_stop_tr(tr); + ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu); if (ret < 0) - return ret; + goto out_start; #ifdef CONFIG_TRACER_MAX_TRACE if (!tr->current_trace->use_max_tr) @@ -6424,7 +6427,7 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr, WARN_ON(1); tracing_disabled = 1; } - return ret; + goto out_start; } update_buffer_entries(&tr->max_buffer, cpu); @@ -6433,7 +6436,8 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr, #endif /* CONFIG_TRACER_MAX_TRACE */ update_buffer_entries(&tr->array_buffer, cpu); - + out_start: + tracing_start_tr(tr); return ret; } From 0486a1f9d9cc166599ba3273d99c324ba8beb346 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 5 Dec 2023 16:52:11 -0500 Subject: [PATCH 0931/1397] tracing: Disable snapshot buffer when stopping instance tracers commit b538bf7d0ec11ca49f536dfda742a5f6db90a798 upstream. It use to be that only the top level instance had a snapshot buffer (for latency tracers like wakeup and irqsoff). When stopping a tracer in an instance would not disable the snapshot buffer. This could have some unintended consequences if the irqsoff tracer is enabled. Consolidate the tracing_start/stop() with tracing_start/stop_tr() so that all instances behave the same. The tracing_start/stop() functions will just call their respective tracing_start/stop_tr() with the global_array passed in. Link: https://lkml.kernel.org/r/20231205220011.041220035@goodmis.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Fixes: 6d9b3fa5e7f6 ("tracing: Move tracing_max_latency into trace_array") Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 110 +++++++++++++------------------------------ 1 file changed, 34 insertions(+), 76 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e08bb90733f5..5b2ba69dbb7f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2359,13 +2359,7 @@ int is_tracing_stopped(void) return global_trace.stop_count; } -/** - * tracing_start - quick start of the tracer - * - * If tracing is enabled but was stopped by tracing_stop, - * this will start the tracer back up. - */ -void tracing_start(void) +static void tracing_start_tr(struct trace_array *tr) { struct trace_buffer *buffer; unsigned long flags; @@ -2373,119 +2367,83 @@ void tracing_start(void) if (tracing_disabled) return; - raw_spin_lock_irqsave(&global_trace.start_lock, flags); - if (--global_trace.stop_count) { - if (global_trace.stop_count < 0) { + raw_spin_lock_irqsave(&tr->start_lock, flags); + if (--tr->stop_count) { + if (WARN_ON_ONCE(tr->stop_count < 0)) { /* Someone screwed up their debugging */ - WARN_ON_ONCE(1); - global_trace.stop_count = 0; + tr->stop_count = 0; } goto out; } /* Prevent the buffers from switching */ - arch_spin_lock(&global_trace.max_lock); + arch_spin_lock(&tr->max_lock); - buffer = global_trace.array_buffer.buffer; + buffer = tr->array_buffer.buffer; if (buffer) ring_buffer_record_enable(buffer); #ifdef CONFIG_TRACER_MAX_TRACE - buffer = global_trace.max_buffer.buffer; + buffer = tr->max_buffer.buffer; if (buffer) ring_buffer_record_enable(buffer); #endif - arch_spin_unlock(&global_trace.max_lock); - - out: - raw_spin_unlock_irqrestore(&global_trace.start_lock, flags); -} - -static void tracing_start_tr(struct trace_array *tr) -{ - struct trace_buffer *buffer; - unsigned long flags; - - if (tracing_disabled) - return; - - /* If global, we need to also start the max tracer */ - if (tr->flags & TRACE_ARRAY_FL_GLOBAL) - return tracing_start(); - - raw_spin_lock_irqsave(&tr->start_lock, flags); - - if (--tr->stop_count) { - if (tr->stop_count < 0) { - /* Someone screwed up their debugging */ - WARN_ON_ONCE(1); - tr->stop_count = 0; - } - goto out; - } - - buffer = tr->array_buffer.buffer; - if (buffer) - ring_buffer_record_enable(buffer); + arch_spin_unlock(&tr->max_lock); out: raw_spin_unlock_irqrestore(&tr->start_lock, flags); } /** - * tracing_stop - quick stop of the tracer + * tracing_start - quick start of the tracer * - * Light weight way to stop tracing. Use in conjunction with - * tracing_start. + * If tracing is enabled but was stopped by tracing_stop, + * this will start the tracer back up. */ -void tracing_stop(void) +void tracing_start(void) + +{ + return tracing_start_tr(&global_trace); +} + +static void tracing_stop_tr(struct trace_array *tr) { struct trace_buffer *buffer; unsigned long flags; - raw_spin_lock_irqsave(&global_trace.start_lock, flags); - if (global_trace.stop_count++) + raw_spin_lock_irqsave(&tr->start_lock, flags); + if (tr->stop_count++) goto out; /* Prevent the buffers from switching */ - arch_spin_lock(&global_trace.max_lock); + arch_spin_lock(&tr->max_lock); - buffer = global_trace.array_buffer.buffer; + buffer = tr->array_buffer.buffer; if (buffer) ring_buffer_record_disable(buffer); #ifdef CONFIG_TRACER_MAX_TRACE - buffer = global_trace.max_buffer.buffer; + buffer = tr->max_buffer.buffer; if (buffer) ring_buffer_record_disable(buffer); #endif - arch_spin_unlock(&global_trace.max_lock); + arch_spin_unlock(&tr->max_lock); out: - raw_spin_unlock_irqrestore(&global_trace.start_lock, flags); + raw_spin_unlock_irqrestore(&tr->start_lock, flags); } -static void tracing_stop_tr(struct trace_array *tr) +/** + * tracing_stop - quick stop of the tracer + * + * Light weight way to stop tracing. Use in conjunction with + * tracing_start. + */ +void tracing_stop(void) { - struct trace_buffer *buffer; - unsigned long flags; - - /* If global, we need to also stop the max tracer */ - if (tr->flags & TRACE_ARRAY_FL_GLOBAL) - return tracing_stop(); - - raw_spin_lock_irqsave(&tr->start_lock, flags); - if (tr->stop_count++) - goto out; - - buffer = tr->array_buffer.buffer; - if (buffer) - ring_buffer_record_disable(buffer); - - out: - raw_spin_unlock_irqrestore(&tr->start_lock, flags); + return tracing_stop_tr(&global_trace); } static int trace_save_cmdline(struct task_struct *tsk) From fc9fa702dbaad3f178172c80ef2526ead980124f Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Tue, 5 Dec 2023 17:17:34 +0100 Subject: [PATCH 0932/1397] tracing: Fix incomplete locking when disabling buffered events commit 7fed14f7ac9cf5e38c693836fe4a874720141845 upstream. The following warning appears when using buffered events: [ 203.556451] WARNING: CPU: 53 PID: 10220 at kernel/trace/ring_buffer.c:3912 ring_buffer_discard_commit+0x2eb/0x420 [...] [ 203.670690] CPU: 53 PID: 10220 Comm: stress-ng-sysin Tainted: G E 6.7.0-rc2-default #4 56e6d0fcf5581e6e51eaaecbdaec2a2338c80f3a [ 203.670704] Hardware name: Intel Corp. GROVEPORT/GROVEPORT, BIOS GVPRCRB1.86B.0016.D04.1705030402 05/03/2017 [ 203.670709] RIP: 0010:ring_buffer_discard_commit+0x2eb/0x420 [ 203.735721] Code: 4c 8b 4a 50 48 8b 42 48 49 39 c1 0f 84 b3 00 00 00 49 83 e8 01 75 b1 48 8b 42 10 f0 ff 40 08 0f 0b e9 fc fe ff ff f0 ff 47 08 <0f> 0b e9 77 fd ff ff 48 8b 42 10 f0 ff 40 08 0f 0b e9 f5 fe ff ff [ 203.735734] RSP: 0018:ffffb4ae4f7b7d80 EFLAGS: 00010202 [ 203.735745] RAX: 0000000000000000 RBX: ffffb4ae4f7b7de0 RCX: ffff8ac10662c000 [ 203.735754] RDX: ffff8ac0c750be00 RSI: ffff8ac10662c000 RDI: ffff8ac0c004d400 [ 203.781832] RBP: ffff8ac0c039cea0 R08: 0000000000000000 R09: 0000000000000000 [ 203.781839] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 [ 203.781842] R13: ffff8ac10662c000 R14: ffff8ac0c004d400 R15: ffff8ac10662c008 [ 203.781846] FS: 00007f4cd8a67740(0000) GS:ffff8ad798880000(0000) knlGS:0000000000000000 [ 203.781851] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 203.781855] CR2: 0000559766a74028 CR3: 00000001804c4000 CR4: 00000000001506f0 [ 203.781862] Call Trace: [ 203.781870] [ 203.851949] trace_event_buffer_commit+0x1ea/0x250 [ 203.851967] trace_event_raw_event_sys_enter+0x83/0xe0 [ 203.851983] syscall_trace_enter.isra.0+0x182/0x1a0 [ 203.851990] do_syscall_64+0x3a/0xe0 [ 203.852075] entry_SYSCALL_64_after_hwframe+0x6e/0x76 [ 203.852090] RIP: 0033:0x7f4cd870fa77 [ 203.982920] Code: 00 b8 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 66 90 b8 89 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d e9 43 0e 00 f7 d8 64 89 01 48 [ 203.982932] RSP: 002b:00007fff99717dd8 EFLAGS: 00000246 ORIG_RAX: 0000000000000089 [ 203.982942] RAX: ffffffffffffffda RBX: 0000558ea1d7b6f0 RCX: 00007f4cd870fa77 [ 203.982948] RDX: 0000000000000000 RSI: 00007fff99717de0 RDI: 0000558ea1d7b6f0 [ 203.982957] RBP: 00007fff99717de0 R08: 00007fff997180e0 R09: 00007fff997180e0 [ 203.982962] R10: 00007fff997180e0 R11: 0000000000000246 R12: 00007fff99717f40 [ 204.049239] R13: 00007fff99718590 R14: 0000558e9f2127a8 R15: 00007fff997180b0 [ 204.049256] For instance, it can be triggered by running these two commands in parallel: $ while true; do echo hist:key=id.syscall:val=hitcount > \ /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger; done $ stress-ng --sysinfo $(nproc) The warning indicates that the current ring_buffer_per_cpu is not in the committing state. It happens because the active ring_buffer_event doesn't actually come from the ring_buffer_per_cpu but is allocated from trace_buffered_event. The bug is in function trace_buffered_event_disable() where the following normally happens: * The code invokes disable_trace_buffered_event() via smp_call_function_many() and follows it by synchronize_rcu(). This increments the per-CPU variable trace_buffered_event_cnt on each target CPU and grants trace_buffered_event_disable() the exclusive access to the per-CPU variable trace_buffered_event. * Maintenance is performed on trace_buffered_event, all per-CPU event buffers get freed. * The code invokes enable_trace_buffered_event() via smp_call_function_many(). This decrements trace_buffered_event_cnt and releases the access to trace_buffered_event. A problem is that smp_call_function_many() runs a given function on all target CPUs except on the current one. The following can then occur: * Task X executing trace_buffered_event_disable() runs on CPU 0. * The control reaches synchronize_rcu() and the task gets rescheduled on another CPU 1. * The RCU synchronization finishes. At this point, trace_buffered_event_disable() has the exclusive access to all trace_buffered_event variables except trace_buffered_event[CPU0] because trace_buffered_event_cnt[CPU0] is never incremented and if the buffer is currently unused, remains set to 0. * A different task Y is scheduled on CPU 0 and hits a trace event. The code in trace_event_buffer_lock_reserve() sees that trace_buffered_event_cnt[CPU0] is set to 0 and decides the use the buffer provided by trace_buffered_event[CPU0]. * Task X continues its execution in trace_buffered_event_disable(). The code incorrectly frees the event buffer pointed by trace_buffered_event[CPU0] and resets the variable to NULL. * Task Y writes event data to the now freed buffer and later detects the created inconsistency. The issue is observable since commit dea499781a11 ("tracing: Fix warning in trace_buffered_event_disable()") which moved the call of trace_buffered_event_disable() in __ftrace_event_enable_disable() earlier, prior to invoking call->class->reg(.. TRACE_REG_UNREGISTER ..). The underlying problem in trace_buffered_event_disable() is however present since the original implementation in commit 0fc1b09ff1ff ("tracing: Use temp buffer when filtering events"). Fix the problem by replacing the two smp_call_function_many() calls with on_each_cpu_mask() which invokes a given callback on all CPUs. Link: https://lore.kernel.org/all/20231127151248.7232-2-petr.pavlu@suse.com/ Link: https://lkml.kernel.org/r/20231205161736.19663-2-petr.pavlu@suse.com Cc: stable@vger.kernel.org Fixes: 0fc1b09ff1ff ("tracing: Use temp buffer when filtering events") Fixes: dea499781a11 ("tracing: Fix warning in trace_buffered_event_disable()") Signed-off-by: Petr Pavlu Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5b2ba69dbb7f..f6c3a682a11b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2779,11 +2779,9 @@ void trace_buffered_event_disable(void) if (--trace_buffered_event_ref) return; - preempt_disable(); /* For each CPU, set the buffer as used. */ - smp_call_function_many(tracing_buffer_mask, - disable_trace_buffered_event, NULL, 1); - preempt_enable(); + on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event, + NULL, true); /* Wait for all current users to finish */ synchronize_rcu(); @@ -2798,11 +2796,9 @@ void trace_buffered_event_disable(void) */ smp_wmb(); - preempt_disable(); /* Do the work on each cpu */ - smp_call_function_many(tracing_buffer_mask, - enable_trace_buffered_event, NULL, 1); - preempt_enable(); + on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL, + true); } static struct trace_buffer *temp_buffer; From 7d97646474b9fdd735756025a8174df3192ac0d4 Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Tue, 5 Dec 2023 17:17:36 +0100 Subject: [PATCH 0933/1397] tracing: Fix a possible race when disabling buffered events commit c0591b1cccf708a47bc465c62436d669a4213323 upstream. Function trace_buffered_event_disable() is responsible for freeing pages backing buffered events and this process can run concurrently with trace_event_buffer_lock_reserve(). The following race is currently possible: * Function trace_buffered_event_disable() is called on CPU 0. It increments trace_buffered_event_cnt on each CPU and waits via synchronize_rcu() for each user of trace_buffered_event to complete. * After synchronize_rcu() is finished, function trace_buffered_event_disable() has the exclusive access to trace_buffered_event. All counters trace_buffered_event_cnt are at 1 and all pointers trace_buffered_event are still valid. * At this point, on a different CPU 1, the execution reaches trace_event_buffer_lock_reserve(). The function calls preempt_disable_notrace() and only now enters an RCU read-side critical section. The function proceeds and reads a still valid pointer from trace_buffered_event[CPU1] into the local variable "entry". However, it doesn't yet read trace_buffered_event_cnt[CPU1] which happens later. * Function trace_buffered_event_disable() continues. It frees trace_buffered_event[CPU1] and decrements trace_buffered_event_cnt[CPU1] back to 0. * Function trace_event_buffer_lock_reserve() continues. It reads and increments trace_buffered_event_cnt[CPU1] from 0 to 1. This makes it believe that it can use the "entry" that it already obtained but the pointer is now invalid and any access results in a use-after-free. Fix the problem by making a second synchronize_rcu() call after all trace_buffered_event values are set to NULL. This waits on all potential users in trace_event_buffer_lock_reserve() that still read a previous pointer from trace_buffered_event. Link: https://lore.kernel.org/all/20231127151248.7232-2-petr.pavlu@suse.com/ Link: https://lkml.kernel.org/r/20231205161736.19663-4-petr.pavlu@suse.com Cc: stable@vger.kernel.org Fixes: 0fc1b09ff1ff ("tracing: Use temp buffer when filtering events") Signed-off-by: Petr Pavlu Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f6c3a682a11b..b656cab67f67 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2790,13 +2790,17 @@ void trace_buffered_event_disable(void) free_page((unsigned long)per_cpu(trace_buffered_event, cpu)); per_cpu(trace_buffered_event, cpu) = NULL; } + /* - * Make sure trace_buffered_event is NULL before clearing - * trace_buffered_event_cnt. + * Wait for all CPUs that potentially started checking if they can use + * their event buffer only after the previous synchronize_rcu() call and + * they still read a valid pointer from trace_buffered_event. It must be + * ensured they don't see cleared trace_buffered_event_cnt else they + * could wrongly decide to use the pointed-to buffer which is now freed. */ - smp_wmb(); + synchronize_rcu(); - /* Do the work on each cpu */ + /* For each CPU, relinquish the buffer */ on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL, true); } From 3c0adff939a6e1798be92098941c632b85820313 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 1 Dec 2023 11:23:22 +0100 Subject: [PATCH 0934/1397] leds: trigger: netdev: fix RTNL handling to prevent potential deadlock commit fe2b1226656afae56702d1d84c6900f6b67df297 upstream. When working on LED support for r8169 I got the following lockdep warning. Easiest way to prevent this scenario seems to be to take the RTNL lock before the trigger_data lock in set_device_name(). ====================================================== WARNING: possible circular locking dependency detected 6.7.0-rc2-next-20231124+ #2 Not tainted ------------------------------------------------------ bash/383 is trying to acquire lock: ffff888103aa1c68 (&trigger_data->lock){+.+.}-{3:3}, at: netdev_trig_notify+0xec/0x190 [ledtrig_netdev] but task is already holding lock: ffffffff8cddf808 (rtnl_mutex){+.+.}-{3:3}, at: rtnl_lock+0x12/0x20 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (rtnl_mutex){+.+.}-{3:3}: __mutex_lock+0x9b/0xb50 mutex_lock_nested+0x16/0x20 rtnl_lock+0x12/0x20 set_device_name+0xa9/0x120 [ledtrig_netdev] netdev_trig_activate+0x1a1/0x230 [ledtrig_netdev] led_trigger_set+0x172/0x2c0 led_trigger_write+0xf1/0x140 sysfs_kf_bin_write+0x5d/0x80 kernfs_fop_write_iter+0x15d/0x210 vfs_write+0x1f0/0x510 ksys_write+0x6c/0xf0 __x64_sys_write+0x14/0x20 do_syscall_64+0x3f/0xf0 entry_SYSCALL_64_after_hwframe+0x6c/0x74 -> #0 (&trigger_data->lock){+.+.}-{3:3}: __lock_acquire+0x1459/0x25a0 lock_acquire+0xc8/0x2d0 __mutex_lock+0x9b/0xb50 mutex_lock_nested+0x16/0x20 netdev_trig_notify+0xec/0x190 [ledtrig_netdev] call_netdevice_register_net_notifiers+0x5a/0x100 register_netdevice_notifier+0x85/0x120 netdev_trig_activate+0x1d4/0x230 [ledtrig_netdev] led_trigger_set+0x172/0x2c0 led_trigger_write+0xf1/0x140 sysfs_kf_bin_write+0x5d/0x80 kernfs_fop_write_iter+0x15d/0x210 vfs_write+0x1f0/0x510 ksys_write+0x6c/0xf0 __x64_sys_write+0x14/0x20 do_syscall_64+0x3f/0xf0 entry_SYSCALL_64_after_hwframe+0x6c/0x74 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(rtnl_mutex); lock(&trigger_data->lock); lock(rtnl_mutex); lock(&trigger_data->lock); *** DEADLOCK *** 8 locks held by bash/383: #0: ffff888103ff33f0 (sb_writers#3){.+.+}-{0:0}, at: ksys_write+0x6c/0xf0 #1: ffff888103aa1e88 (&of->mutex){+.+.}-{3:3}, at: kernfs_fop_write_iter+0x114/0x210 #2: ffff8881036f1890 (kn->active#82){.+.+}-{0:0}, at: kernfs_fop_write_iter+0x11d/0x210 #3: ffff888108e2c358 (&led_cdev->led_access){+.+.}-{3:3}, at: led_trigger_write+0x30/0x140 #4: ffffffff8cdd9e10 (triggers_list_lock){++++}-{3:3}, at: led_trigger_write+0x75/0x140 #5: ffff888108e2c270 (&led_cdev->trigger_lock){++++}-{3:3}, at: led_trigger_write+0xe3/0x140 #6: ffffffff8cdde3d0 (pernet_ops_rwsem){++++}-{3:3}, at: register_netdevice_notifier+0x1c/0x120 #7: ffffffff8cddf808 (rtnl_mutex){+.+.}-{3:3}, at: rtnl_lock+0x12/0x20 stack backtrace: CPU: 0 PID: 383 Comm: bash Not tainted 6.7.0-rc2-next-20231124+ #2 Hardware name: Default string Default string/Default string, BIOS ADLN.M6.SODIMM.ZB.CY.015 08/08/2023 Call Trace: dump_stack_lvl+0x5c/0xd0 dump_stack+0x10/0x20 print_circular_bug+0x2dd/0x410 check_noncircular+0x131/0x150 __lock_acquire+0x1459/0x25a0 lock_acquire+0xc8/0x2d0 ? netdev_trig_notify+0xec/0x190 [ledtrig_netdev] __mutex_lock+0x9b/0xb50 ? netdev_trig_notify+0xec/0x190 [ledtrig_netdev] ? __this_cpu_preempt_check+0x13/0x20 ? netdev_trig_notify+0xec/0x190 [ledtrig_netdev] ? __cancel_work_timer+0x11c/0x1b0 ? __mutex_lock+0x123/0xb50 mutex_lock_nested+0x16/0x20 ? mutex_lock_nested+0x16/0x20 netdev_trig_notify+0xec/0x190 [ledtrig_netdev] call_netdevice_register_net_notifiers+0x5a/0x100 register_netdevice_notifier+0x85/0x120 netdev_trig_activate+0x1d4/0x230 [ledtrig_netdev] led_trigger_set+0x172/0x2c0 ? preempt_count_add+0x49/0xc0 led_trigger_write+0xf1/0x140 sysfs_kf_bin_write+0x5d/0x80 kernfs_fop_write_iter+0x15d/0x210 vfs_write+0x1f0/0x510 ksys_write+0x6c/0xf0 __x64_sys_write+0x14/0x20 do_syscall_64+0x3f/0xf0 entry_SYSCALL_64_after_hwframe+0x6c/0x74 RIP: 0033:0x7f269055d034 Code: c7 00 16 00 00 00 b8 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 f3 0f 1e fa 80 3d 35 c3 0d 00 00 74 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 48 83 ec 28 48 89 54 24 18 48 RSP: 002b:00007ffddb7ef748 EFLAGS: 00000202 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000000007 RCX: 00007f269055d034 RDX: 0000000000000007 RSI: 000055bf5f4af3c0 RDI: 0000000000000001 RBP: 000055bf5f4af3c0 R08: 0000000000000073 R09: 0000000000000001 R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000000007 R13: 00007f26906325c0 R14: 00007f269062ff20 R15: 0000000000000000 Fixes: d5e01266e7f5 ("leds: trigger: netdev: add additional specific link speed mode") Cc: stable@vger.kernel.org Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Acked-by: Lee Jones Link: https://lore.kernel.org/r/fb5c8294-2a10-4bf5-8f10-3d2b77d2757e@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/leds/trigger/ledtrig-netdev.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/leds/trigger/ledtrig-netdev.c b/drivers/leds/trigger/ledtrig-netdev.c index e358e77e4b38..d76214fa9ad8 100644 --- a/drivers/leds/trigger/ledtrig-netdev.c +++ b/drivers/leds/trigger/ledtrig-netdev.c @@ -226,6 +226,11 @@ static int set_device_name(struct led_netdev_data *trigger_data, cancel_delayed_work_sync(&trigger_data->work); + /* + * Take RTNL lock before trigger_data lock to prevent potential + * deadlock with netdev notifier registration. + */ + rtnl_lock(); mutex_lock(&trigger_data->lock); if (trigger_data->net_dev) { @@ -245,16 +250,14 @@ static int set_device_name(struct led_netdev_data *trigger_data, trigger_data->carrier_link_up = false; trigger_data->link_speed = SPEED_UNKNOWN; trigger_data->duplex = DUPLEX_UNKNOWN; - if (trigger_data->net_dev != NULL) { - rtnl_lock(); + if (trigger_data->net_dev) get_device_state(trigger_data); - rtnl_unlock(); - } trigger_data->last_activity = 0; set_baseline_state(trigger_data); mutex_unlock(&trigger_data->lock); + rtnl_unlock(); return 0; } From 9a89aad0865f0a1a3d20716d0365085065eba713 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Tue, 5 Dec 2023 11:26:25 +0200 Subject: [PATCH 0935/1397] nfp: flower: fix for take a mutex lock in soft irq context and rcu lock commit 0ad722bd9ee3a9bdfca9613148645e4c9b7f26cf upstream. The neighbour event callback call the function nfp_tun_write_neigh, this function will take a mutex lock and it is in soft irq context, change the work queue to process the neighbour event. Move the nfp_tun_write_neigh function out of range rcu_read_lock/unlock() in function nfp_tunnel_request_route_v4 and nfp_tunnel_request_route_v6. Fixes: abc210952af7 ("nfp: flower: tunnel neigh support bond offload") CC: stable@vger.kernel.org # 6.2+ Signed-off-by: Hui Zhou Signed-off-by: Louis Peens Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- .../netronome/nfp/flower/tunnel_conf.c | 127 +++++++++++++----- 1 file changed, 95 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c index 060a77f2265d..e522845c7c21 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -160,6 +160,18 @@ struct nfp_tun_mac_addr_offload { u8 addr[ETH_ALEN]; }; +/** + * struct nfp_neigh_update_work - update neighbour information to nfp + * @work: Work queue for writing neigh to the nfp + * @n: neighbour entry + * @app: Back pointer to app + */ +struct nfp_neigh_update_work { + struct work_struct work; + struct neighbour *n; + struct nfp_app *app; +}; + enum nfp_flower_mac_offload_cmd { NFP_TUNNEL_MAC_OFFLOAD_ADD = 0, NFP_TUNNEL_MAC_OFFLOAD_DEL = 1, @@ -607,38 +619,30 @@ nfp_tun_write_neigh(struct net_device *netdev, struct nfp_app *app, nfp_flower_cmsg_warn(app, "Neighbour configuration failed.\n"); } -static int -nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event, - void *ptr) +static void +nfp_tun_release_neigh_update_work(struct nfp_neigh_update_work *update_work) { - struct nfp_flower_priv *app_priv; - struct netevent_redirect *redir; - struct neighbour *n; + neigh_release(update_work->n); + kfree(update_work); +} + +static void nfp_tun_neigh_update(struct work_struct *work) +{ + struct nfp_neigh_update_work *update_work; struct nfp_app *app; + struct neighbour *n; bool neigh_invalid; int err; - switch (event) { - case NETEVENT_REDIRECT: - redir = (struct netevent_redirect *)ptr; - n = redir->neigh; - break; - case NETEVENT_NEIGH_UPDATE: - n = (struct neighbour *)ptr; - break; - default: - return NOTIFY_DONE; - } - - neigh_invalid = !(n->nud_state & NUD_VALID) || n->dead; - - app_priv = container_of(nb, struct nfp_flower_priv, tun.neigh_nb); - app = app_priv->app; + update_work = container_of(work, struct nfp_neigh_update_work, work); + app = update_work->app; + n = update_work->n; if (!nfp_flower_get_port_id_from_netdev(app, n->dev)) - return NOTIFY_DONE; + goto out; #if IS_ENABLED(CONFIG_INET) + neigh_invalid = !(n->nud_state & NUD_VALID) || n->dead; if (n->tbl->family == AF_INET6) { #if IS_ENABLED(CONFIG_IPV6) struct flowi6 flow6 = {}; @@ -655,13 +659,11 @@ nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event, dst = ip6_dst_lookup_flow(dev_net(n->dev), NULL, &flow6, NULL); if (IS_ERR(dst)) - return NOTIFY_DONE; + goto out; dst_release(dst); } nfp_tun_write_neigh(n->dev, app, &flow6, n, true, false); -#else - return NOTIFY_DONE; #endif /* CONFIG_IPV6 */ } else { struct flowi4 flow4 = {}; @@ -678,17 +680,71 @@ nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event, rt = ip_route_output_key(dev_net(n->dev), &flow4); err = PTR_ERR_OR_ZERO(rt); if (err) - return NOTIFY_DONE; + goto out; ip_rt_put(rt); } nfp_tun_write_neigh(n->dev, app, &flow4, n, false, false); } -#else - return NOTIFY_DONE; #endif /* CONFIG_INET */ +out: + nfp_tun_release_neigh_update_work(update_work); +} - return NOTIFY_OK; +static struct nfp_neigh_update_work * +nfp_tun_alloc_neigh_update_work(struct nfp_app *app, struct neighbour *n) +{ + struct nfp_neigh_update_work *update_work; + + update_work = kzalloc(sizeof(*update_work), GFP_ATOMIC); + if (!update_work) + return NULL; + + INIT_WORK(&update_work->work, nfp_tun_neigh_update); + neigh_hold(n); + update_work->n = n; + update_work->app = app; + + return update_work; +} + +static int +nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event, + void *ptr) +{ + struct nfp_neigh_update_work *update_work; + struct nfp_flower_priv *app_priv; + struct netevent_redirect *redir; + struct neighbour *n; + struct nfp_app *app; + + switch (event) { + case NETEVENT_REDIRECT: + redir = (struct netevent_redirect *)ptr; + n = redir->neigh; + break; + case NETEVENT_NEIGH_UPDATE: + n = (struct neighbour *)ptr; + break; + default: + return NOTIFY_DONE; + } +#if IS_ENABLED(CONFIG_IPV6) + if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl) +#else + if (n->tbl != &arp_tbl) +#endif + return NOTIFY_DONE; + + app_priv = container_of(nb, struct nfp_flower_priv, tun.neigh_nb); + app = app_priv->app; + update_work = nfp_tun_alloc_neigh_update_work(app, n); + if (!update_work) + return NOTIFY_DONE; + + queue_work(system_highpri_wq, &update_work->work); + + return NOTIFY_DONE; } void nfp_tunnel_request_route_v4(struct nfp_app *app, struct sk_buff *skb) @@ -706,6 +762,7 @@ void nfp_tunnel_request_route_v4(struct nfp_app *app, struct sk_buff *skb) netdev = nfp_app_dev_get(app, be32_to_cpu(payload->ingress_port), NULL); if (!netdev) goto fail_rcu_unlock; + dev_hold(netdev); flow.daddr = payload->ipv4_addr; flow.flowi4_proto = IPPROTO_UDP; @@ -725,13 +782,16 @@ void nfp_tunnel_request_route_v4(struct nfp_app *app, struct sk_buff *skb) ip_rt_put(rt); if (!n) goto fail_rcu_unlock; + rcu_read_unlock(); + nfp_tun_write_neigh(n->dev, app, &flow, n, false, true); neigh_release(n); - rcu_read_unlock(); + dev_put(netdev); return; fail_rcu_unlock: rcu_read_unlock(); + dev_put(netdev); nfp_flower_cmsg_warn(app, "Requested route not found.\n"); } @@ -749,6 +809,7 @@ void nfp_tunnel_request_route_v6(struct nfp_app *app, struct sk_buff *skb) netdev = nfp_app_dev_get(app, be32_to_cpu(payload->ingress_port), NULL); if (!netdev) goto fail_rcu_unlock; + dev_hold(netdev); flow.daddr = payload->ipv6_addr; flow.flowi6_proto = IPPROTO_UDP; @@ -766,14 +827,16 @@ void nfp_tunnel_request_route_v6(struct nfp_app *app, struct sk_buff *skb) dst_release(dst); if (!n) goto fail_rcu_unlock; + rcu_read_unlock(); nfp_tun_write_neigh(n->dev, app, &flow, n, true, true); neigh_release(n); - rcu_read_unlock(); + dev_put(netdev); return; fail_rcu_unlock: rcu_read_unlock(); + dev_put(netdev); nfp_flower_cmsg_warn(app, "Requested IPv6 route not found.\n"); } From 865b71579d010c03b5a95ed919b042b433e1582b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 1 Dec 2023 14:10:21 +0100 Subject: [PATCH 0936/1397] packet: Move reference count in packet_sock to atomic_long_t commit db3fadacaf0c817b222090290d06ca2a338422d0 upstream. In some potential instances the reference count on struct packet_sock could be saturated and cause overflows which gets the kernel a bit confused. To prevent this, move to a 64-bit atomic reference count on 64-bit architectures to prevent the possibility of this type to overflow. Because we can not handle saturation, using refcount_t is not possible in this place. Maybe someday in the future if it changes it could be used. Also, instead of using plain atomic64_t, use atomic_long_t instead. 32-bit machines tend to be memory-limited (i.e. anything that increases a reference uses so much memory that you can't actually get to 2**32 references). 32-bit architectures also tend to have serious problems with 64-bit atomics. Hence, atomic_long_t is the more natural solution. Reported-by: "The UK's National Cyber Security Centre (NCSC)" Co-developed-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman Signed-off-by: Daniel Borkmann Cc: Linus Torvalds Cc: stable@kernel.org Reviewed-by: Willem de Bruijn Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20231201131021.19999-1-daniel@iogearbox.net Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 16 ++++++++-------- net/packet/internal.h | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index a84e00b5904b..7adf48549a3b 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4300,7 +4300,7 @@ static void packet_mm_open(struct vm_area_struct *vma) struct sock *sk = sock->sk; if (sk) - atomic_inc(&pkt_sk(sk)->mapped); + atomic_long_inc(&pkt_sk(sk)->mapped); } static void packet_mm_close(struct vm_area_struct *vma) @@ -4310,7 +4310,7 @@ static void packet_mm_close(struct vm_area_struct *vma) struct sock *sk = sock->sk; if (sk) - atomic_dec(&pkt_sk(sk)->mapped); + atomic_long_dec(&pkt_sk(sk)->mapped); } static const struct vm_operations_struct packet_mmap_ops = { @@ -4405,7 +4405,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, err = -EBUSY; if (!closing) { - if (atomic_read(&po->mapped)) + if (atomic_long_read(&po->mapped)) goto out; if (packet_read_pending(rb)) goto out; @@ -4508,7 +4508,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, err = -EBUSY; mutex_lock(&po->pg_vec_lock); - if (closing || atomic_read(&po->mapped) == 0) { + if (closing || atomic_long_read(&po->mapped) == 0) { err = 0; spin_lock_bh(&rb_queue->lock); swap(rb->pg_vec, pg_vec); @@ -4526,9 +4526,9 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, po->prot_hook.func = (po->rx_ring.pg_vec) ? tpacket_rcv : packet_rcv; skb_queue_purge(rb_queue); - if (atomic_read(&po->mapped)) - pr_err("packet_mmap: vma is busy: %d\n", - atomic_read(&po->mapped)); + if (atomic_long_read(&po->mapped)) + pr_err("packet_mmap: vma is busy: %ld\n", + atomic_long_read(&po->mapped)); } mutex_unlock(&po->pg_vec_lock); @@ -4606,7 +4606,7 @@ static int packet_mmap(struct file *file, struct socket *sock, } } - atomic_inc(&po->mapped); + atomic_long_inc(&po->mapped); vma->vm_ops = &packet_mmap_ops; err = 0; diff --git a/net/packet/internal.h b/net/packet/internal.h index 63f4865202c1..11ba8a78676a 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -122,7 +122,7 @@ struct packet_sock { __be16 num; struct packet_rollover *rollover; struct packet_mclist *mclist; - atomic_t mapped; + atomic_long_t mapped; enum tpacket_versions tp_version; unsigned int tp_hdrlen; unsigned int tp_reserve; From 2e04cfdd3edb9b4b2322304b158d27b82f6b4a58 Mon Sep 17 00:00:00 2001 From: ChunHao Lin Date: Wed, 29 Nov 2023 23:53:50 +0800 Subject: [PATCH 0937/1397] r8169: fix rtl8125b PAUSE frames blasting when suspended commit 4b0768b6556af56ee9b7cf4e68452a2b6289ae45 upstream. When FIFO reaches near full state, device will issue pause frame. If pause slot is enabled(set to 1), in this time, device will issue pause frame only once. But if pause slot is disabled(set to 0), device will keep sending pause frames until FIFO reaches near empty state. When pause slot is disabled, if there is no one to handle receive packets, device FIFO will reach near full state and keep sending pause frames. That will impact entire local area network. This issue can be reproduced in Chromebox (not Chromebook) in developer mode running a test image (and v5.10 kernel): 1) ping -f $CHROMEBOX (from workstation on same local network) 2) run "powerd_dbus_suspend" from command line on the $CHROMEBOX 3) ping $ROUTER (wait until ping fails from workstation) Takes about ~20-30 seconds after step 2 for the local network to stop working. Fix this issue by enabling pause slot to only send pause frame once when FIFO reaches near full state. Fixes: f1bce4ad2f1c ("r8169: add support for RTL8125") Reported-by: Grant Grundler Tested-by: Grant Grundler Cc: stable@vger.kernel.org Signed-off-by: ChunHao Lin Reviewed-by: Jacob Keller Reviewed-by: Heiner Kallweit Link: https://lore.kernel.org/r/20231129155350.5843-1-hau@realtek.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/realtek/r8169_main.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 62cabeeb842a..bb787a52bc75 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -196,6 +196,7 @@ enum rtl_registers { /* No threshold before first PCI xfer */ #define RX_FIFO_THRESH (7 << RXCFG_FIFO_SHIFT) #define RX_EARLY_OFF (1 << 11) +#define RX_PAUSE_SLOT_ON (1 << 11) /* 8125b and later */ #define RXCFG_DMA_SHIFT 8 /* Unlimited maximum PCI burst. */ #define RX_DMA_BURST (7 << RXCFG_DMA_SHIFT) @@ -2306,9 +2307,13 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp) case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_53: RTL_W32(tp, RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST | RX_EARLY_OFF); break; - case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_63: + case RTL_GIGA_MAC_VER_61: RTL_W32(tp, RxConfig, RX_FETCH_DFLT_8125 | RX_DMA_BURST); break; + case RTL_GIGA_MAC_VER_63: + RTL_W32(tp, RxConfig, RX_FETCH_DFLT_8125 | RX_DMA_BURST | + RX_PAUSE_SLOT_ON); + break; default: RTL_W32(tp, RxConfig, RX128_INT_EN | RX_DMA_BURST); break; From 78c8fc333253077c5276bc3b4207818d45410acb Mon Sep 17 00:00:00 2001 From: Matthias Reichl Date: Sun, 3 Dec 2023 23:22:16 +0100 Subject: [PATCH 0938/1397] regmap: fix bogus error on regcache_sync success commit fea88064445a59584460f7f67d102b6e5fc1ca1d upstream. Since commit 0ec7731655de ("regmap: Ensure range selector registers are updated after cache sync") opening pcm512x based soundcards fail with EINVAL and dmesg shows sync cache and pm_runtime_get errors: [ 228.794676] pcm512x 1-004c: Failed to sync cache: -22 [ 228.794740] pcm512x 1-004c: ASoC: error at snd_soc_pcm_component_pm_runtime_get on pcm512x.1-004c: -22 This is caused by the cache check result leaking out into the regcache_sync return value. Fix this by making the check local-only, as the comment above the regcache_read call states a non-zero return value means there's nothing to do so the return value should not be altered. Fixes: 0ec7731655de ("regmap: Ensure range selector registers are updated after cache sync") Cc: stable@vger.kernel.org Signed-off-by: Matthias Reichl Link: https://lore.kernel.org/r/20231203222216.96547-1-hias@horus.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/base/regmap/regcache.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c index 92592f944a3d..ac63a73ccdaa 100644 --- a/drivers/base/regmap/regcache.c +++ b/drivers/base/regmap/regcache.c @@ -410,8 +410,7 @@ int regcache_sync(struct regmap *map) rb_entry(node, struct regmap_range_node, node); /* If there's nothing in the cache there's nothing to sync */ - ret = regcache_read(map, this->selector_reg, &i); - if (ret != 0) + if (regcache_read(map, this->selector_reg, &i) != 0) continue; ret = _regmap_write(map, this->selector_reg, i); From 7409c28cab78e843074b2a5071fee97f7f504fca Mon Sep 17 00:00:00 2001 From: Francesco Dolcini Date: Tue, 28 Nov 2023 20:49:35 +0100 Subject: [PATCH 0939/1397] platform/surface: aggregator: fix recv_buf() return value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c8820c92caf0770bec976b01fa9e82bb993c5865 upstream. Serdev recv_buf() callback is supposed to return the amount of bytes consumed, therefore an int in between 0 and count. Do not return negative number in case of issue, when ssam_controller_receive_buf() returns ESHUTDOWN just returns 0, e.g. no bytes consumed, this keep the exact same behavior as it was before. This fixes a potential WARN in serdev-ttyport.c:ttyport_receive_buf(). Fixes: c167b9c7e3d6 ("platform/surface: Add Surface Aggregator subsystem") Cc: stable@vger.kernel.org Signed-off-by: Francesco Dolcini Reviewed-by: Maximilian Luz Link: https://lore.kernel.org/r/20231128194935.11350-1-francesco@dolcini.it Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen Signed-off-by: Greg Kroah-Hartman --- drivers/platform/surface/aggregator/core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/platform/surface/aggregator/core.c b/drivers/platform/surface/aggregator/core.c index 1a6373dea109..6152be38398c 100644 --- a/drivers/platform/surface/aggregator/core.c +++ b/drivers/platform/surface/aggregator/core.c @@ -231,9 +231,12 @@ static int ssam_receive_buf(struct serdev_device *dev, const unsigned char *buf, size_t n) { struct ssam_controller *ctrl; + int ret; ctrl = serdev_device_get_drvdata(dev); - return ssam_controller_receive_buf(ctrl, buf, n); + ret = ssam_controller_receive_buf(ctrl, buf, n); + + return ret < 0 ? 0 : ret; } static void ssam_write_wakeup(struct serdev_device *dev) From b2c562a7a88b402bfd747b4ddb5ec21b54147de1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 21 Nov 2023 11:39:36 -1000 Subject: [PATCH 0940/1397] workqueue: Make sure that wq_unbound_cpumask is never empty commit 4a6c5607d4502ccd1b15b57d57f17d12b6f257a7 upstream. During boot, depending on how the housekeeping and workqueue.unbound_cpus masks are set, wq_unbound_cpumask can end up empty. Since 8639ecebc9b1 ("workqueue: Implement non-strict affinity scope for unbound workqueues"), this may end up feeding -1 as a CPU number into scheduler leading to oopses. BUG: unable to handle page fault for address: ffffffff8305e9c0 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page ... Call Trace: select_idle_sibling+0x79/0xaf0 select_task_rq_fair+0x1cb/0x7b0 try_to_wake_up+0x29c/0x5c0 wake_up_process+0x19/0x20 kick_pool+0x5e/0xb0 __queue_work+0x119/0x430 queue_work_on+0x29/0x30 ... An empty wq_unbound_cpumask is a clear misconfiguration and already disallowed once system is booted up. Let's warn on and ignore unbound_cpumask restrictions which lead to no unbound cpus. While at it, also remove now unncessary empty check on wq_unbound_cpumask in wq_select_unbound_cpu(). Signed-off-by: Tejun Heo Reported-and-Tested-by: Yong He Link: http://lkml.kernel.org/r/20231120121623.119780-1-alexyonghe@tencent.com Fixes: 8639ecebc9b1 ("workqueue: Implement non-strict affinity scope for unbound workqueues") Cc: stable@vger.kernel.org # v6.6+ Reviewed-by: Waiman Long Signed-off-by: Greg Kroah-Hartman --- kernel/workqueue.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 0f682da96e1c..e6a95bb74e22 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1684,9 +1684,6 @@ static int wq_select_unbound_cpu(int cpu) pr_warn_once("workqueue: round-robin CPU selection forced, expect performance impact\n"); } - if (cpumask_empty(wq_unbound_cpumask)) - return cpu; - new_cpu = __this_cpu_read(wq_rr_cpu_last); new_cpu = cpumask_next_and(new_cpu, wq_unbound_cpumask, cpu_online_mask); if (unlikely(new_cpu >= nr_cpu_ids)) { @@ -6515,6 +6512,17 @@ static inline void wq_watchdog_init(void) { } #endif /* CONFIG_WQ_WATCHDOG */ +static void __init restrict_unbound_cpumask(const char *name, const struct cpumask *mask) +{ + if (!cpumask_intersects(wq_unbound_cpumask, mask)) { + pr_warn("workqueue: Restricting unbound_cpumask (%*pb) with %s (%*pb) leaves no CPU, ignoring\n", + cpumask_pr_args(wq_unbound_cpumask), name, cpumask_pr_args(mask)); + return; + } + + cpumask_and(wq_unbound_cpumask, wq_unbound_cpumask, mask); +} + /** * workqueue_init_early - early init for workqueue subsystem * @@ -6534,11 +6542,11 @@ void __init workqueue_init_early(void) BUILD_BUG_ON(__alignof__(struct pool_workqueue) < __alignof__(long long)); BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL)); - cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(HK_TYPE_WQ)); - cpumask_and(wq_unbound_cpumask, wq_unbound_cpumask, housekeeping_cpumask(HK_TYPE_DOMAIN)); - + cpumask_copy(wq_unbound_cpumask, cpu_possible_mask); + restrict_unbound_cpumask("HK_TYPE_WQ", housekeeping_cpumask(HK_TYPE_WQ)); + restrict_unbound_cpumask("HK_TYPE_DOMAIN", housekeeping_cpumask(HK_TYPE_DOMAIN)); if (!cpumask_empty(&wq_cmdline_cpumask)) - cpumask_and(wq_unbound_cpumask, wq_unbound_cpumask, &wq_cmdline_cpumask); + restrict_unbound_cpumask("workqueue.unbound_cpus", &wq_cmdline_cpumask); pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC); From 512b420aaf7801b5351733f5178eec5af070aa94 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Mon, 13 Nov 2023 17:20:33 -0800 Subject: [PATCH 0941/1397] hugetlb: fix null-ptr-deref in hugetlb_vma_lock_write commit 187da0f8250aa94bd96266096aef6f694e0b4cd2 upstream. The routine __vma_private_lock tests for the existence of a reserve map associated with a private hugetlb mapping. A pointer to the reserve map is in vma->vm_private_data. __vma_private_lock was checking the pointer for NULL. However, it is possible that the low bits of the pointer could be used as flags. In such instances, vm_private_data is not NULL and not a valid pointer. This results in the null-ptr-deref reported by syzbot: general protection fault, probably for non-canonical address 0xdffffc000000001d: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x00000000000000e8-0x00000000000000ef] CPU: 0 PID: 5048 Comm: syz-executor139 Not tainted 6.6.0-rc7-syzkaller-00142-g88 8cf78c29e2 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 1 0/09/2023 RIP: 0010:__lock_acquire+0x109/0x5de0 kernel/locking/lockdep.c:5004 ... Call Trace: lock_acquire kernel/locking/lockdep.c:5753 [inline] lock_acquire+0x1ae/0x510 kernel/locking/lockdep.c:5718 down_write+0x93/0x200 kernel/locking/rwsem.c:1573 hugetlb_vma_lock_write mm/hugetlb.c:300 [inline] hugetlb_vma_lock_write+0xae/0x100 mm/hugetlb.c:291 __hugetlb_zap_begin+0x1e9/0x2b0 mm/hugetlb.c:5447 hugetlb_zap_begin include/linux/hugetlb.h:258 [inline] unmap_vmas+0x2f4/0x470 mm/memory.c:1733 exit_mmap+0x1ad/0xa60 mm/mmap.c:3230 __mmput+0x12a/0x4d0 kernel/fork.c:1349 mmput+0x62/0x70 kernel/fork.c:1371 exit_mm kernel/exit.c:567 [inline] do_exit+0x9ad/0x2a20 kernel/exit.c:861 __do_sys_exit kernel/exit.c:991 [inline] __se_sys_exit kernel/exit.c:989 [inline] __x64_sys_exit+0x42/0x50 kernel/exit.c:989 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Mask off low bit flags before checking for NULL pointer. In addition, the reserve map only 'belongs' to the OWNER (parent in parent/child relationships) so also check for the OWNER flag. Link: https://lkml.kernel.org/r/20231114012033.259600-1-mike.kravetz@oracle.com Reported-by: syzbot+6ada951e7c0f7bc8a71e@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-mm/00000000000078d1e00608d7878b@google.com/ Fixes: bf4916922c60 ("hugetlbfs: extend hugetlb_vma_lock to private VMAs") Signed-off-by: Mike Kravetz Reviewed-by: Rik van Riel Cc: Edward Adam Davis Cc: Muchun Song Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Tom Rix Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- include/linux/hugetlb.h | 5 +---- mm/hugetlb.c | 7 +++++++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 47d25a5e1933..31b2927ada73 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -1265,10 +1265,7 @@ static inline bool __vma_shareable_lock(struct vm_area_struct *vma) return (vma->vm_flags & VM_MAYSHARE) && vma->vm_private_data; } -static inline bool __vma_private_lock(struct vm_area_struct *vma) -{ - return (!(vma->vm_flags & VM_MAYSHARE)) && vma->vm_private_data; -} +bool __vma_private_lock(struct vm_area_struct *vma); /* * Safe version of huge_pte_offset() to check the locks. See comments diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 5f0adffeceb1..5e6c4d367d33 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1189,6 +1189,13 @@ static int is_vma_resv_set(struct vm_area_struct *vma, unsigned long flag) return (get_vma_private_data(vma) & flag) != 0; } +bool __vma_private_lock(struct vm_area_struct *vma) +{ + return !(vma->vm_flags & VM_MAYSHARE) && + get_vma_private_data(vma) & ~HPAGE_RESV_MASK && + is_vma_resv_set(vma, HPAGE_RESV_OWNER); +} + void hugetlb_dup_vma_private(struct vm_area_struct *vma) { VM_BUG_ON_VMA(!is_vm_hugetlb_page(vma), vma); From 83dd18e0b76fd346fd4753016ec00377bb636837 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Tue, 28 Nov 2023 13:52:48 +0800 Subject: [PATCH 0942/1397] drivers/base/cpu: crash data showing should depends on KEXEC_CORE commit 4e9e2e4c65136dfd32dd0afe555961433d1cf906 upstream. After commit 88a6f8994421 ("crash: memory and CPU hotplug sysfs attributes"), on x86_64, if only below kernel configs related to kdump are set, compiling error are triggered. ---- CONFIG_CRASH_CORE=y CONFIG_KEXEC_CORE=y CONFIG_CRASH_DUMP=y CONFIG_CRASH_HOTPLUG=y ------ ------------------------------------------------------ drivers/base/cpu.c: In function `crash_hotplug_show': drivers/base/cpu.c:309:40: error: implicit declaration of function `crash_hotplug_cpu_support'; did you mean `crash_hotplug_show'? [-Werror=implicit-function-declaration] 309 | return sysfs_emit(buf, "%d\n", crash_hotplug_cpu_support()); | ^~~~~~~~~~~~~~~~~~~~~~~~~ | crash_hotplug_show cc1: some warnings being treated as errors ------------------------------------------------------ CONFIG_KEXEC is used to enable kexec_load interface, the crash_notes/crash_notes_size/crash_hotplug showing depends on CONFIG_KEXEC is incorrect. It should depend on KEXEC_CORE instead. Fix it now. Link: https://lkml.kernel.org/r/20231128055248.659808-1-bhe@redhat.com Fixes: 88a6f8994421 ("crash: memory and CPU hotplug sysfs attributes") Signed-off-by: Baoquan He Tested-by: Ignat Korchagin [compile-time only] Tested-by: Alexander Gordeev Reviewed-by: Eric DeVolder Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- drivers/base/cpu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 9ea22e165acd..548491de818e 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -144,7 +144,7 @@ static DEVICE_ATTR(release, S_IWUSR, NULL, cpu_release_store); #endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */ #endif /* CONFIG_HOTPLUG_CPU */ -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE #include static ssize_t crash_notes_show(struct device *dev, @@ -189,14 +189,14 @@ static const struct attribute_group crash_note_cpu_attr_group = { #endif static const struct attribute_group *common_cpu_attr_groups[] = { -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE &crash_note_cpu_attr_group, #endif NULL }; static const struct attribute_group *hotplugable_cpu_attr_groups[] = { -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE &crash_note_cpu_attr_group, #endif NULL From e0270ffad4266736ac2d2caa8c9a9ab091292c52 Mon Sep 17 00:00:00 2001 From: Sumanth Korikkar Date: Mon, 20 Nov 2023 15:53:52 +0100 Subject: [PATCH 0943/1397] mm/memory_hotplug: add missing mem_hotplug_lock commit 001002e73712cdf6b8d9a103648cda3040ad7647 upstream. From Documentation/core-api/memory-hotplug.rst: When adding/removing/onlining/offlining memory or adding/removing heterogeneous/device memory, we should always hold the mem_hotplug_lock in write mode to serialise memory hotplug (e.g. access to global/zone variables). mhp_(de)init_memmap_on_memory() functions can change zone stats and struct page content, but they are currently called w/o the mem_hotplug_lock. When memory block is being offlined and when kmemleak goes through each populated zone, the following theoretical race conditions could occur: CPU 0: | CPU 1: memory_offline() | -> offline_pages() | -> mem_hotplug_begin() | ... | -> mem_hotplug_done() | | kmemleak_scan() | -> get_online_mems() | ... -> mhp_deinit_memmap_on_memory() | [not protected by mem_hotplug_begin/done()]| Marks memory section as offline, | Retrieves zone_start_pfn poisons vmemmap struct pages and updates | and struct page members. the zone related data | | ... | -> put_online_mems() Fix this by ensuring mem_hotplug_lock is taken before performing mhp_init_memmap_on_memory(). Also ensure that mhp_deinit_memmap_on_memory() holds the lock. online/offline_pages() are currently only called from memory_block_online/offline(), so it is safe to move the locking there. Link: https://lkml.kernel.org/r/20231120145354.308999-2-sumanthk@linux.ibm.com Fixes: a08a2ae34613 ("mm,memory_hotplug: allocate memmap from the added memory range") Signed-off-by: Sumanth Korikkar Reviewed-by: Gerald Schaefer Acked-by: David Hildenbrand Cc: Alexander Gordeev Cc: Aneesh Kumar K.V Cc: Anshuman Khandual Cc: Heiko Carstens Cc: Michal Hocko Cc: Oscar Salvador Cc: Vasily Gorbik Cc: kernel test robot Cc: [5.15+] Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- drivers/base/memory.c | 18 +++++++++++++++--- mm/memory_hotplug.c | 13 ++++++------- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/drivers/base/memory.c b/drivers/base/memory.c index f3b9a4d0fa3b..8a13babd826c 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -180,6 +180,9 @@ static inline unsigned long memblk_nr_poison(struct memory_block *mem) } #endif +/* + * Must acquire mem_hotplug_lock in write mode. + */ static int memory_block_online(struct memory_block *mem) { unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); @@ -204,10 +207,11 @@ static int memory_block_online(struct memory_block *mem) if (mem->altmap) nr_vmemmap_pages = mem->altmap->free; + mem_hotplug_begin(); if (nr_vmemmap_pages) { ret = mhp_init_memmap_on_memory(start_pfn, nr_vmemmap_pages, zone); if (ret) - return ret; + goto out; } ret = online_pages(start_pfn + nr_vmemmap_pages, @@ -215,7 +219,7 @@ static int memory_block_online(struct memory_block *mem) if (ret) { if (nr_vmemmap_pages) mhp_deinit_memmap_on_memory(start_pfn, nr_vmemmap_pages); - return ret; + goto out; } /* @@ -227,9 +231,14 @@ static int memory_block_online(struct memory_block *mem) nr_vmemmap_pages); mem->zone = zone; +out: + mem_hotplug_done(); return ret; } +/* + * Must acquire mem_hotplug_lock in write mode. + */ static int memory_block_offline(struct memory_block *mem) { unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); @@ -247,6 +256,7 @@ static int memory_block_offline(struct memory_block *mem) if (mem->altmap) nr_vmemmap_pages = mem->altmap->free; + mem_hotplug_begin(); if (nr_vmemmap_pages) adjust_present_page_count(pfn_to_page(start_pfn), mem->group, -nr_vmemmap_pages); @@ -258,13 +268,15 @@ static int memory_block_offline(struct memory_block *mem) if (nr_vmemmap_pages) adjust_present_page_count(pfn_to_page(start_pfn), mem->group, nr_vmemmap_pages); - return ret; + goto out; } if (nr_vmemmap_pages) mhp_deinit_memmap_on_memory(start_pfn, nr_vmemmap_pages); mem->zone = NULL; +out: + mem_hotplug_done(); return ret; } diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 3b301c4023ff..49653e1366e8 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1129,6 +1129,9 @@ void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages) kasan_remove_zero_shadow(__va(PFN_PHYS(pfn)), PFN_PHYS(nr_pages)); } +/* + * Must be called with mem_hotplug_lock in write mode. + */ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, struct zone *zone, struct memory_group *group) { @@ -1149,7 +1152,6 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, !IS_ALIGNED(pfn + nr_pages, PAGES_PER_SECTION))) return -EINVAL; - mem_hotplug_begin(); /* associate pfn range with the zone */ move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_ISOLATE); @@ -1208,7 +1210,6 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, writeback_set_ratelimit(); memory_notify(MEM_ONLINE, &arg); - mem_hotplug_done(); return 0; failed_addition: @@ -1217,7 +1218,6 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, (((unsigned long long) pfn + nr_pages) << PAGE_SHIFT) - 1); memory_notify(MEM_CANCEL_ONLINE, &arg); remove_pfn_range_from_zone(zone, pfn, nr_pages); - mem_hotplug_done(); return ret; } @@ -1863,6 +1863,9 @@ static int count_system_ram_pages_cb(unsigned long start_pfn, return 0; } +/* + * Must be called with mem_hotplug_lock in write mode. + */ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages, struct zone *zone, struct memory_group *group) { @@ -1885,8 +1888,6 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages, !IS_ALIGNED(start_pfn + nr_pages, PAGES_PER_SECTION))) return -EINVAL; - mem_hotplug_begin(); - /* * Don't allow to offline memory blocks that contain holes. * Consequently, memory blocks with holes can never get onlined @@ -2027,7 +2028,6 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages, memory_notify(MEM_OFFLINE, &arg); remove_pfn_range_from_zone(zone, start_pfn, nr_pages); - mem_hotplug_done(); return 0; failed_removal_isolated: @@ -2042,7 +2042,6 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages, (unsigned long long) start_pfn << PAGE_SHIFT, ((unsigned long long) end_pfn << PAGE_SHIFT) - 1, reason); - mem_hotplug_done(); return ret; } From 799f90c385cd7cb434e8971012231b7dc4c5324c Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 17 Nov 2023 00:49:18 -0800 Subject: [PATCH 0944/1397] mm: fix oops when filemap_map_pmd() without prealloc_pte MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9aa1345d66b8132745ffb99b348b1492088da9e2 upstream. syzbot reports oops in lockdep's __lock_acquire(), called from __pte_offset_map_lock() called from filemap_map_pages(); or when I run the repro, the oops comes in pmd_install(), called from filemap_map_pmd() called from filemap_map_pages(), just before the __pte_offset_map_lock(). The problem is that filemap_map_pmd() has been assuming that when it finds pmd_none(), a page table has already been prepared in prealloc_pte; and indeed do_fault_around() has been careful to preallocate one there, when it finds pmd_none(): but what if *pmd became none in between? My 6.6 mods in mm/khugepaged.c, avoiding mmap_lock for write, have made it easy for *pmd to be cleared while servicing a page fault; but even before those, a huge *pmd might be zapped while a fault is serviced. The difference in symptomatic stack traces comes from the "memory model" in use: pmd_install() uses pmd_populate() uses page_to_pfn(): in some models that is strict, and will oops on the NULL prealloc_pte; in other models, it will construct a bogus value to be populated into *pmd, then __pte_offset_map_lock() oops when trying to access split ptlock pointer (or some other symptom in normal case of ptlock embedded not pointer). Link: https://lore.kernel.org/linux-mm/20231115065506.19780-1-jose.pekkarinen@foxhound.fi/ Link: https://lkml.kernel.org/r/6ed0c50c-78ef-0719-b3c5-60c0c010431c@google.com Fixes: f9ce0be71d1f ("mm: Cleanup faultaround and finish_fault() codepaths") Signed-off-by: Hugh Dickins Reported-and-tested-by: syzbot+89edd67979b52675ddec@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-mm/0000000000005e44550608a0806c@google.com/ Reviewed-by: David Hildenbrand Cc: Jann Horn , Cc: José Pekkarinen Cc: Kirill A. Shutemov Cc: Matthew Wilcox (Oracle) Cc: [5.12+] Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/filemap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/filemap.c b/mm/filemap.c index f0a15ce1bd1b..d40a20c9d59f 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3422,7 +3422,7 @@ static bool filemap_map_pmd(struct vm_fault *vmf, struct folio *folio, } } - if (pmd_none(*vmf->pmd)) + if (pmd_none(*vmf->pmd) && vmf->prealloc_pte) pmd_install(mm, vmf->pmd, &vmf->prealloc_pte); return false; From 9e5d3096742daef9b33338ab564114c7d15e5c9e Mon Sep 17 00:00:00 2001 From: Sumanth Korikkar Date: Mon, 20 Nov 2023 15:53:53 +0100 Subject: [PATCH 0945/1397] mm/memory_hotplug: fix error handling in add_memory_resource() commit f42ce5f087eb69e47294ababd2e7e6f88a82d308 upstream. In add_memory_resource(), creation of memory block devices occurs after successful call to arch_add_memory(). However, creation of memory block devices could fail. In that case, arch_remove_memory() is called to perform necessary cleanup. Currently with or without altmap support, arch_remove_memory() is always passed with altmap set to NULL during error handling. This leads to freeing of struct pages using free_pages(), eventhough the allocation might have been performed with altmap support via altmap_alloc_block_buf(). Fix the error handling by passing altmap in arch_remove_memory(). This ensures the following: * When altmap is disabled, deallocation of the struct pages array occurs via free_pages(). * When altmap is enabled, deallocation occurs via vmem_altmap_free(). Link: https://lkml.kernel.org/r/20231120145354.308999-3-sumanthk@linux.ibm.com Fixes: a08a2ae34613 ("mm,memory_hotplug: allocate memmap from the added memory range") Signed-off-by: Sumanth Korikkar Reviewed-by: Gerald Schaefer Acked-by: David Hildenbrand Cc: Alexander Gordeev Cc: Aneesh Kumar K.V Cc: Anshuman Khandual Cc: Heiko Carstens Cc: kernel test robot Cc: Michal Hocko Cc: Oscar Salvador Cc: Vasily Gorbik Cc: [5.15+] Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/memory_hotplug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 49653e1366e8..144758820e3a 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1458,7 +1458,7 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) /* create memory block devices after memory was added */ ret = create_memory_block_devices(start, size, params.altmap, group); if (ret) { - arch_remove_memory(start, size, NULL); + arch_remove_memory(start, size, params.altmap); goto error_free; } From aa581b37dae993214bd9a06ae64be46cce6b51c2 Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Fri, 1 Dec 2023 12:32:05 +0000 Subject: [PATCH 0946/1397] powercap: DTPM: Fix missing cpufreq_cpu_put() calls commit bdefd9913bdd453991ef756b6f7176e8ad80d786 upstream. The policy returned by cpufreq_cpu_get() has to be released with the help of cpufreq_cpu_put() to balance its kobject reference counter properly. Add the missing calls to cpufreq_cpu_put() in the code. Fixes: 0aea2e4ec2a2 ("powercap/dtpm_cpu: Reset per_cpu variable in the release function") Fixes: 0e8f68d7f048 ("powercap/drivers/dtpm: Add CPU energy model based support") Cc: v5.16+ # v5.16+ Signed-off-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/powercap/dtpm_cpu.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c index 8a2f18fa3faf..9193c3b8edeb 100644 --- a/drivers/powercap/dtpm_cpu.c +++ b/drivers/powercap/dtpm_cpu.c @@ -140,6 +140,8 @@ static void pd_release(struct dtpm *dtpm) if (policy) { for_each_cpu(dtpm_cpu->cpu, policy->related_cpus) per_cpu(dtpm_per_cpu, dtpm_cpu->cpu) = NULL; + + cpufreq_cpu_put(policy); } kfree(dtpm_cpu); @@ -191,12 +193,16 @@ static int __dtpm_cpu_setup(int cpu, struct dtpm *parent) return 0; pd = em_cpu_get(cpu); - if (!pd || em_is_artificial(pd)) - return -EINVAL; + if (!pd || em_is_artificial(pd)) { + ret = -EINVAL; + goto release_policy; + } dtpm_cpu = kzalloc(sizeof(*dtpm_cpu), GFP_KERNEL); - if (!dtpm_cpu) - return -ENOMEM; + if (!dtpm_cpu) { + ret = -ENOMEM; + goto release_policy; + } dtpm_init(&dtpm_cpu->dtpm, &dtpm_ops); dtpm_cpu->cpu = cpu; @@ -216,6 +222,7 @@ static int __dtpm_cpu_setup(int cpu, struct dtpm *parent) if (ret) goto out_dtpm_unregister; + cpufreq_cpu_put(policy); return 0; out_dtpm_unregister: @@ -227,6 +234,8 @@ static int __dtpm_cpu_setup(int cpu, struct dtpm *parent) per_cpu(dtpm_per_cpu, cpu) = NULL; kfree(dtpm_cpu); +release_policy: + cpufreq_cpu_put(policy); return ret; } From 4ce431c297558e30baa9226243a15d818320742b Mon Sep 17 00:00:00 2001 From: David Jeffery Date: Tue, 28 Nov 2023 13:11:39 -0500 Subject: [PATCH 0947/1397] md/raid6: use valid sector values to determine if an I/O should wait on the reshape commit c467e97f079f0019870c314996fae952cc768e82 upstream. During a reshape or a RAID6 array such as expanding by adding an additional disk, I/Os to the region of the array which have not yet been reshaped can stall indefinitely. This is from errors in the stripe_ahead_of_reshape function causing md to think the I/O is to a region in the actively undergoing the reshape. stripe_ahead_of_reshape fails to account for the q disk having a sector value of 0. By not excluding the q disk from the for loop, raid6 will always generate a min_sector value of 0, causing a return value which stalls. The function's max_sector calculation also uses min() when it should use max(), causing the max_sector value to always be 0. During a backwards rebuild this can cause the opposite problem where it allows I/O to advance when it should wait. Fixing these errors will allow safe I/O to advance in a timely manner and delay only I/O which is unsafe due to stripes in the middle of undergoing the reshape. Fixes: 486f60558607 ("md/raid5: Check all disks in a stripe_head for reshape progress") Cc: stable@vger.kernel.org # v6.0+ Signed-off-by: David Jeffery Tested-by: Laurence Oberman Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20231128181233.6187-1-djeffery@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid5.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 284cd71bcc68..68d86dbecb4a 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5892,11 +5892,11 @@ static bool stripe_ahead_of_reshape(struct mddev *mddev, struct r5conf *conf, int dd_idx; for (dd_idx = 0; dd_idx < sh->disks; dd_idx++) { - if (dd_idx == sh->pd_idx) + if (dd_idx == sh->pd_idx || dd_idx == sh->qd_idx) continue; min_sector = min(min_sector, sh->dev[dd_idx].sector); - max_sector = min(max_sector, sh->dev[dd_idx].sector); + max_sector = max(max_sector, sh->dev[dd_idx].sector); } spin_lock_irq(&conf->device_lock); From 02650b3b98cff56d26ec3687b76da4a81c20d187 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 4 Dec 2023 09:32:33 +0100 Subject: [PATCH 0948/1397] drm/atomic-helpers: Invoke end_fb_access while owning plane state commit e0f04e41e8eedd4e5a1275f2318df7e1841855f2 upstream. Invoke drm_plane_helper_funcs.end_fb_access before drm_atomic_helper_commit_hw_done(). The latter function hands over ownership of the plane state to the following commit, which might free it. Releasing resources in end_fb_access then operates on undefined state. This bug has been observed with non-blocking commits when they are being queued up quickly. Here is an example stack trace from the bug report. The plane state has been free'd already, so the pages for drm_gem_fb_vunmap() are gone. Unable to handle kernel paging request at virtual address 0000000100000049 [...] drm_gem_fb_vunmap+0x18/0x74 drm_gem_end_shadow_fb_access+0x1c/0x2c drm_atomic_helper_cleanup_planes+0x58/0xd8 drm_atomic_helper_commit_tail+0x90/0xa0 commit_tail+0x15c/0x188 commit_work+0x14/0x20 Fix this by running end_fb_access immediately after updating all planes in drm_atomic_helper_commit_planes(). The existing clean-up helper drm_atomic_helper_cleanup_planes() now only handles cleanup_fb. For aborted commits, roll back from drm_atomic_helper_prepare_planes() in the new helper drm_atomic_helper_unprepare_planes(). This case is different from regular cleanup, as we have to release the new state; regular cleanup releases the old state. The new helper also invokes cleanup_fb for all planes. The changes mostly involve DRM's atomic helpers. Only two drivers, i915 and nouveau, implement their own commit function. Update them to invoke drm_atomic_helper_unprepare_planes(). Drivers with custom commit_tail function do not require changes. v4: * fix documentation (kernel test robot) v3: * add drm_atomic_helper_unprepare_planes() for rolling back * use correct state for end_fb_access v2: * fix test in drm_atomic_helper_cleanup_planes() Reported-by: Alyssa Ross Closes: https://lore.kernel.org/dri-devel/87leazm0ya.fsf@alyssa.is/ Suggested-by: Daniel Vetter Fixes: 94d879eaf7fb ("drm/atomic-helper: Add {begin,end}_fb_access to plane helpers") Tested-by: Alyssa Ross Reviewed-by: Alyssa Ross Signed-off-by: Thomas Zimmermann Cc: # v6.2+ Link: https://patchwork.freedesktop.org/patch/msgid/20231204083247.22006-1-tzimmermann@suse.de Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_atomic_helper.c | 78 +++++++++++++------- drivers/gpu/drm/i915/display/intel_display.c | 2 +- drivers/gpu/drm/nouveau/dispnv50/disp.c | 2 +- include/drm/drm_atomic_helper.h | 2 + 4 files changed, 56 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 60794fcde1d5..554d4468aa7c 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -2012,7 +2012,7 @@ int drm_atomic_helper_commit(struct drm_device *dev, return ret; drm_atomic_helper_async_commit(dev, state); - drm_atomic_helper_cleanup_planes(dev, state); + drm_atomic_helper_unprepare_planes(dev, state); return 0; } @@ -2072,7 +2072,7 @@ int drm_atomic_helper_commit(struct drm_device *dev, return 0; err: - drm_atomic_helper_cleanup_planes(dev, state); + drm_atomic_helper_unprepare_planes(dev, state); return ret; } EXPORT_SYMBOL(drm_atomic_helper_commit); @@ -2650,6 +2650,39 @@ int drm_atomic_helper_prepare_planes(struct drm_device *dev, } EXPORT_SYMBOL(drm_atomic_helper_prepare_planes); +/** + * drm_atomic_helper_unprepare_planes - release plane resources on aborts + * @dev: DRM device + * @state: atomic state object with old state structures + * + * This function cleans up plane state, specifically framebuffers, from the + * atomic state. It undoes the effects of drm_atomic_helper_prepare_planes() + * when aborting an atomic commit. For cleaning up after a successful commit + * use drm_atomic_helper_cleanup_planes(). + */ +void drm_atomic_helper_unprepare_planes(struct drm_device *dev, + struct drm_atomic_state *state) +{ + struct drm_plane *plane; + struct drm_plane_state *new_plane_state; + int i; + + for_each_new_plane_in_state(state, plane, new_plane_state, i) { + const struct drm_plane_helper_funcs *funcs = plane->helper_private; + + if (funcs->end_fb_access) + funcs->end_fb_access(plane, new_plane_state); + } + + for_each_new_plane_in_state(state, plane, new_plane_state, i) { + const struct drm_plane_helper_funcs *funcs = plane->helper_private; + + if (funcs->cleanup_fb) + funcs->cleanup_fb(plane, new_plane_state); + } +} +EXPORT_SYMBOL(drm_atomic_helper_unprepare_planes); + static bool plane_crtc_active(const struct drm_plane_state *state) { return state->crtc && state->crtc->state->active; @@ -2784,6 +2817,17 @@ void drm_atomic_helper_commit_planes(struct drm_device *dev, funcs->atomic_flush(crtc, old_state); } + + /* + * Signal end of framebuffer access here before hw_done. After hw_done, + * a later commit might have already released the plane state. + */ + for_each_old_plane_in_state(old_state, plane, old_plane_state, i) { + const struct drm_plane_helper_funcs *funcs = plane->helper_private; + + if (funcs->end_fb_access) + funcs->end_fb_access(plane, old_plane_state); + } } EXPORT_SYMBOL(drm_atomic_helper_commit_planes); @@ -2911,40 +2955,22 @@ EXPORT_SYMBOL(drm_atomic_helper_disable_planes_on_crtc); * configuration. Hence the old configuration must be perserved in @old_state to * be able to call this function. * - * This function must also be called on the new state when the atomic update - * fails at any point after calling drm_atomic_helper_prepare_planes(). + * This function may not be called on the new state when the atomic update + * fails at any point after calling drm_atomic_helper_prepare_planes(). Use + * drm_atomic_helper_unprepare_planes() in this case. */ void drm_atomic_helper_cleanup_planes(struct drm_device *dev, struct drm_atomic_state *old_state) { struct drm_plane *plane; - struct drm_plane_state *old_plane_state, *new_plane_state; + struct drm_plane_state *old_plane_state; int i; - for_each_oldnew_plane_in_state(old_state, plane, old_plane_state, new_plane_state, i) { + for_each_old_plane_in_state(old_state, plane, old_plane_state, i) { const struct drm_plane_helper_funcs *funcs = plane->helper_private; - if (funcs->end_fb_access) - funcs->end_fb_access(plane, new_plane_state); - } - - for_each_oldnew_plane_in_state(old_state, plane, old_plane_state, new_plane_state, i) { - const struct drm_plane_helper_funcs *funcs; - struct drm_plane_state *plane_state; - - /* - * This might be called before swapping when commit is aborted, - * in which case we have to cleanup the new state. - */ - if (old_plane_state == plane->state) - plane_state = new_plane_state; - else - plane_state = old_plane_state; - - funcs = plane->helper_private; - if (funcs->cleanup_fb) - funcs->cleanup_fb(plane, plane_state); + funcs->cleanup_fb(plane, old_plane_state); } } EXPORT_SYMBOL(drm_atomic_helper_cleanup_planes); diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 763ab569d8f3..f00b5d5290e6 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -7279,7 +7279,7 @@ int intel_atomic_commit(struct drm_device *dev, struct drm_atomic_state *_state, for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) intel_color_cleanup_commit(new_crtc_state); - drm_atomic_helper_cleanup_planes(dev, &state->base); + drm_atomic_helper_unprepare_planes(dev, &state->base); intel_runtime_pm_put(&dev_priv->runtime_pm, state->wakeref); return ret; } diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index 4e7c9c353c51..617162aac060 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -2310,7 +2310,7 @@ nv50_disp_atomic_commit(struct drm_device *dev, err_cleanup: if (ret) - drm_atomic_helper_cleanup_planes(dev, state); + drm_atomic_helper_unprepare_planes(dev, state); done: pm_runtime_put_autosuspend(dev->dev); return ret; diff --git a/include/drm/drm_atomic_helper.h b/include/drm/drm_atomic_helper.h index 536a0b0091c3..006b5c977ad7 100644 --- a/include/drm/drm_atomic_helper.h +++ b/include/drm/drm_atomic_helper.h @@ -97,6 +97,8 @@ void drm_atomic_helper_commit_modeset_enables(struct drm_device *dev, int drm_atomic_helper_prepare_planes(struct drm_device *dev, struct drm_atomic_state *state); +void drm_atomic_helper_unprepare_planes(struct drm_device *dev, + struct drm_atomic_state *state); #define DRM_PLANE_COMMIT_ACTIVE_ONLY BIT(0) #define DRM_PLANE_COMMIT_NO_DISABLE_AFTER_MODESET BIT(1) From 654748c6fc6a403f141d85d71d3d24dd5a2af6fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 27 Nov 2023 16:50:26 +0200 Subject: [PATCH 0949/1397] drm/i915/mst: Fix .mode_valid_ctx() return values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7cf82b25dd91d7f330d9df2de868caca14289ba1 upstream. .mode_valid_ctx() returns an errno, not the mode status. Fix the code to do the right thing. Cc: stable@vger.kernel.org Cc: Stanislav Lisovskiy Fixes: d51f25eb479a ("drm/i915: Add DSC support to MST path") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231127145028.4899-2-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit c1799032d2ef6616113b733428dfaa2199a5604b) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/display/intel_dp_mst.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index e3f176a093d2..ccea5ea23fc8 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -988,11 +988,15 @@ intel_dp_mst_mode_valid_ctx(struct drm_connector *connector, * Big joiner configuration needs DSC for TGL which is not true for * XE_LPD where uncompressed joiner is supported. */ - if (DISPLAY_VER(dev_priv) < 13 && bigjoiner && !dsc) - return MODE_CLOCK_HIGH; + if (DISPLAY_VER(dev_priv) < 13 && bigjoiner && !dsc) { + *status = MODE_CLOCK_HIGH; + return 0; + } - if (mode_rate > max_rate && !dsc) - return MODE_CLOCK_HIGH; + if (mode_rate > max_rate && !dsc) { + *status = MODE_CLOCK_HIGH; + return 0; + } *status = intel_mode_valid_max_plane_size(dev_priv, mode, false); return 0; From a0396af35ca2010a129dda039fdf71358d5035be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 27 Nov 2023 16:50:27 +0200 Subject: [PATCH 0950/1397] drm/i915/mst: Reject modes that require the bigjoiner MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit dd7eb65c493615fda7d459501c3d4a46e00ea5ba upstream. We have no bigjoiner support in the MST code, so .mode_valid() pretending otherwise is just going to result black screens for users. Reject any mode that needs the joiner. Cc: stable@vger.kernel.org Cc: Stanislav Lisovskiy Fixes: d51f25eb479a ("drm/i915: Add DSC support to MST path") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231127145028.4899-3-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit 9c058492b16f90bb772cb0dad567e8acc68e155d) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/display/intel_dp_mst.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index ccea5ea23fc8..7a3d024e3437 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -955,6 +955,10 @@ intel_dp_mst_mode_valid_ctx(struct drm_connector *connector, if (intel_dp_need_bigjoiner(intel_dp, mode->hdisplay, target_clock)) { bigjoiner = true; max_dotclk *= 2; + + /* TODO: add support for bigjoiner */ + *status = MODE_CLOCK_HIGH; + return 0; } if (DISPLAY_VER(dev_priv) >= 10 && From 8e1e489cdb5bf3ba602f7bb68bbee61265feebad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 27 Nov 2023 16:50:25 +0200 Subject: [PATCH 0951/1397] drm/i915: Skip some timing checks on BXT/GLK DSI transcoders MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 20c2dbff342aec13bf93c2f6c951da198916a455 upstream. Apparently some BXT/GLK systems have DSI panels whose timings don't agree with the normal cpu transcoder hblank>=32 limitation. This is perhaps fine as there are no specific hblank/etc. limits listed for the BXT/GLK DSI transcoders. Move those checks out from the global intel_mode_valid() into into connector specific .mode_valid() hooks, skipping BXT/GLK DSI connectors. We'll leave the basic [hv]display/[hv]total checks in intel_mode_valid() as those seem like sensible upper limits regardless of the transcoder used. Cc: stable@vger.kernel.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/9720 Fixes: 8f4b1068e7fc ("drm/i915: Check some transcoder timing minimum limits") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231127145028.4899-1-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit e0ef2daa8ca8ce4dbc2fd0959e383b753a87fd7d) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/display/icl_dsi.c | 7 +++++++ drivers/gpu/drm/i915/display/intel_crt.c | 5 +++++ drivers/gpu/drm/i915/display/intel_display.c | 10 ++++++++++ drivers/gpu/drm/i915/display/intel_display.h | 3 +++ drivers/gpu/drm/i915/display/intel_dp.c | 4 ++++ drivers/gpu/drm/i915/display/intel_dp_mst.c | 4 ++++ drivers/gpu/drm/i915/display/intel_dvo.c | 6 ++++++ drivers/gpu/drm/i915/display/intel_hdmi.c | 4 ++++ drivers/gpu/drm/i915/display/intel_lvds.c | 5 +++++ drivers/gpu/drm/i915/display/intel_sdvo.c | 8 +++++++- drivers/gpu/drm/i915/display/intel_tv.c | 8 +++++++- drivers/gpu/drm/i915/display/vlv_dsi.c | 18 +++++++++++++++++- 12 files changed, 79 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index ad6488e9c2b2..5b8efe8e735a 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -1440,6 +1440,13 @@ static void gen11_dsi_post_disable(struct intel_atomic_state *state, static enum drm_mode_status gen11_dsi_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { + struct drm_i915_private *i915 = to_i915(connector->dev); + enum drm_mode_status status; + + status = intel_cpu_transcoder_mode_valid(i915, mode); + if (status != MODE_OK) + return status; + /* FIXME: DSC? */ return intel_dsi_mode_valid(connector, mode); } diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c index 809074758687..d23020eb87f4 100644 --- a/drivers/gpu/drm/i915/display/intel_crt.c +++ b/drivers/gpu/drm/i915/display/intel_crt.c @@ -348,8 +348,13 @@ intel_crt_mode_valid(struct drm_connector *connector, struct drm_device *dev = connector->dev; struct drm_i915_private *dev_priv = to_i915(dev); int max_dotclk = dev_priv->max_dotclk_freq; + enum drm_mode_status status; int max_clock; + status = intel_cpu_transcoder_mode_valid(dev_priv, mode); + if (status != MODE_OK) + return status; + if (mode->flags & DRM_MODE_FLAG_DBLSCAN) return MODE_NO_DBLESCAN; diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index f00b5d5290e6..1e2b09ae09b9 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -7660,6 +7660,16 @@ enum drm_mode_status intel_mode_valid(struct drm_device *dev, mode->vtotal > vtotal_max) return MODE_V_ILLEGAL; + return MODE_OK; +} + +enum drm_mode_status intel_cpu_transcoder_mode_valid(struct drm_i915_private *dev_priv, + const struct drm_display_mode *mode) +{ + /* + * Additional transcoder timing limits, + * excluding BXT/GLK DSI transcoders. + */ if (DISPLAY_VER(dev_priv) >= 5) { if (mode->hdisplay < 64 || mode->htotal - mode->hdisplay < 32) diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index 49ac8473b988..13b0904d42e3 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -405,6 +405,9 @@ enum drm_mode_status intel_mode_valid_max_plane_size(struct drm_i915_private *dev_priv, const struct drm_display_mode *mode, bool bigjoiner); +enum drm_mode_status +intel_cpu_transcoder_mode_valid(struct drm_i915_private *i915, + const struct drm_display_mode *mode); enum phy intel_port_to_phy(struct drm_i915_private *i915, enum port port); bool is_trans_port_sync_mode(const struct intel_crtc_state *state); bool is_trans_port_sync_master(const struct intel_crtc_state *state); diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 119a4de7fe6f..66e35f8443e1 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -1127,6 +1127,10 @@ intel_dp_mode_valid(struct drm_connector *_connector, enum drm_mode_status status; bool dsc = false, bigjoiner = false; + status = intel_cpu_transcoder_mode_valid(dev_priv, mode); + if (status != MODE_OK) + return status; + if (mode->flags & DRM_MODE_FLAG_DBLCLK) return MODE_H_ILLEGAL; diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 7a3d024e3437..77bd1313c808 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -921,6 +921,10 @@ intel_dp_mst_mode_valid_ctx(struct drm_connector *connector, return 0; } + *status = intel_cpu_transcoder_mode_valid(dev_priv, mode); + if (*status != MODE_OK) + return 0; + if (mode->flags & DRM_MODE_FLAG_DBLSCAN) { *status = MODE_NO_DBLESCAN; return 0; diff --git a/drivers/gpu/drm/i915/display/intel_dvo.c b/drivers/gpu/drm/i915/display/intel_dvo.c index b386894c3a6d..d1cfa966d48d 100644 --- a/drivers/gpu/drm/i915/display/intel_dvo.c +++ b/drivers/gpu/drm/i915/display/intel_dvo.c @@ -217,11 +217,17 @@ intel_dvo_mode_valid(struct drm_connector *_connector, struct drm_display_mode *mode) { struct intel_connector *connector = to_intel_connector(_connector); + struct drm_i915_private *i915 = to_i915(connector->base.dev); struct intel_dvo *intel_dvo = intel_attached_dvo(connector); const struct drm_display_mode *fixed_mode = intel_panel_fixed_mode(connector, mode); int max_dotclk = to_i915(connector->base.dev)->max_dotclk_freq; int target_clock = mode->clock; + enum drm_mode_status status; + + status = intel_cpu_transcoder_mode_valid(i915, mode); + if (status != MODE_OK) + return status; if (mode->flags & DRM_MODE_FLAG_DBLSCAN) return MODE_NO_DBLESCAN; diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index 94a7e1537f42..bc975918e0eb 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -1986,6 +1986,10 @@ intel_hdmi_mode_valid(struct drm_connector *connector, bool ycbcr_420_only; enum intel_output_format sink_format; + status = intel_cpu_transcoder_mode_valid(dev_priv, mode); + if (status != MODE_OK) + return status; + if ((mode->flags & DRM_MODE_FLAG_3D_MASK) == DRM_MODE_FLAG_3D_FRAME_PACKING) clock *= 2; diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c index 3ace56979b70..dcb07d9a739d 100644 --- a/drivers/gpu/drm/i915/display/intel_lvds.c +++ b/drivers/gpu/drm/i915/display/intel_lvds.c @@ -389,11 +389,16 @@ intel_lvds_mode_valid(struct drm_connector *_connector, struct drm_display_mode *mode) { struct intel_connector *connector = to_intel_connector(_connector); + struct drm_i915_private *i915 = to_i915(connector->base.dev); const struct drm_display_mode *fixed_mode = intel_panel_fixed_mode(connector, mode); int max_pixclk = to_i915(connector->base.dev)->max_dotclk_freq; enum drm_mode_status status; + status = intel_cpu_transcoder_mode_valid(i915, mode); + if (status != MODE_OK) + return status; + if (mode->flags & DRM_MODE_FLAG_DBLSCAN) return MODE_NO_DBLESCAN; diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index 7d25a64698e2..0ce935efe5df 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -1906,13 +1906,19 @@ static enum drm_mode_status intel_sdvo_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { + struct drm_i915_private *i915 = to_i915(connector->dev); struct intel_sdvo *intel_sdvo = intel_attached_sdvo(to_intel_connector(connector)); struct intel_sdvo_connector *intel_sdvo_connector = to_intel_sdvo_connector(connector); - int max_dotclk = to_i915(connector->dev)->max_dotclk_freq; bool has_hdmi_sink = intel_has_hdmi_sink(intel_sdvo_connector, connector->state); + int max_dotclk = i915->max_dotclk_freq; + enum drm_mode_status status; int clock = mode->clock; + status = intel_cpu_transcoder_mode_valid(i915, mode); + if (status != MODE_OK) + return status; + if (mode->flags & DRM_MODE_FLAG_DBLSCAN) return MODE_NO_DBLESCAN; diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c index 36b479b46b60..d84a79491da2 100644 --- a/drivers/gpu/drm/i915/display/intel_tv.c +++ b/drivers/gpu/drm/i915/display/intel_tv.c @@ -958,8 +958,14 @@ static enum drm_mode_status intel_tv_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { + struct drm_i915_private *i915 = to_i915(connector->dev); const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state); - int max_dotclk = to_i915(connector->dev)->max_dotclk_freq; + int max_dotclk = i915->max_dotclk_freq; + enum drm_mode_status status; + + status = intel_cpu_transcoder_mode_valid(i915, mode); + if (status != MODE_OK) + return status; if (mode->flags & DRM_MODE_FLAG_DBLSCAN) return MODE_NO_DBLESCAN; diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index a96e7d028c5c..d778b88413b7 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -1540,9 +1540,25 @@ static const struct drm_encoder_funcs intel_dsi_funcs = { .destroy = intel_dsi_encoder_destroy, }; +static enum drm_mode_status vlv_dsi_mode_valid(struct drm_connector *connector, + struct drm_display_mode *mode) +{ + struct drm_i915_private *i915 = to_i915(connector->dev); + + if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { + enum drm_mode_status status; + + status = intel_cpu_transcoder_mode_valid(i915, mode); + if (status != MODE_OK) + return status; + } + + return intel_dsi_mode_valid(connector, mode); +} + static const struct drm_connector_helper_funcs intel_dsi_connector_helper_funcs = { .get_modes = intel_dsi_get_modes, - .mode_valid = intel_dsi_mode_valid, + .mode_valid = vlv_dsi_mode_valid, .atomic_check = intel_digital_connector_atomic_check, }; From 5012eb028032c62f1c68bc0765b46f830a194443 Mon Sep 17 00:00:00 2001 From: Frank Wunderlich Date: Wed, 25 Oct 2023 19:08:30 +0200 Subject: [PATCH 0952/1397] arm64: dts: mt7986: change cooling trips commit 1fcda8ceb014aafd56f10b33e0077c93b5dd45d1 upstream. Add Critical and hot trips for emergency system shutdown and limiting system load. Change passive trip to active to make sure fan is activated on the lowest trip. Cc: stable@vger.kernel.org Fixes: 1f5be05132f3 ("arm64: dts: mt7986: add thermal-zones") Fixes: c26f779a2295 ("arm64: dts: mt7986: add pwm-fan and cooling-maps to BPI-R3 dts") Suggested-by: Daniel Golle Signed-off-by: Frank Wunderlich Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20231025170832.78727-4-linux@fw-web.de Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Greg Kroah-Hartman --- .../dts/mediatek/mt7986a-bananapi-bpi-r3.dts | 10 +++++----- arch/arm64/boot/dts/mediatek/mt7986a.dtsi | 20 +++++++++++++++---- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts b/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts index af4a4309bda4..fb25ffb1be08 100644 --- a/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts +++ b/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts @@ -150,16 +150,16 @@ trip = <&cpu_trip_active_high>; }; - cpu-active-low { + cpu-active-med { /* active: set fan to cooling level 1 */ cooling-device = <&fan 1 1>; - trip = <&cpu_trip_active_low>; + trip = <&cpu_trip_active_med>; }; - cpu-passive { - /* passive: set fan to cooling level 0 */ + cpu-active-low { + /* active: set fan to cooling level 0 */ cooling-device = <&fan 0 0>; - trip = <&cpu_trip_passive>; + trip = <&cpu_trip_active_low>; }; }; }; diff --git a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi index 24eda00e320d..ade5e62a6b2b 100644 --- a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi @@ -610,22 +610,34 @@ thermal-sensors = <&thermal 0>; trips { + cpu_trip_crit: crit { + temperature = <125000>; + hysteresis = <2000>; + type = "critical"; + }; + + cpu_trip_hot: hot { + temperature = <120000>; + hysteresis = <2000>; + type = "hot"; + }; + cpu_trip_active_high: active-high { temperature = <115000>; hysteresis = <2000>; type = "active"; }; - cpu_trip_active_low: active-low { + cpu_trip_active_med: active-med { temperature = <85000>; hysteresis = <2000>; type = "active"; }; - cpu_trip_passive: passive { - temperature = <40000>; + cpu_trip_active_low: active-low { + temperature = <60000>; hysteresis = <2000>; - type = "passive"; + type = "active"; }; }; }; From 287b1c41decbd903e4adb70c9aa6989551bf6ffb Mon Sep 17 00:00:00 2001 From: Frank Wunderlich Date: Wed, 25 Oct 2023 19:08:29 +0200 Subject: [PATCH 0953/1397] arm64: dts: mt7986: define 3W max power to both SFP on BPI-R3 commit 6413cbc17f89b3a160f3a6f3fad1232b1678fe40 upstream. All SFP power supplies are connected to the system VDD33 which is 3v3/8A. Set 3A per SFP slot to allow SFPs work which need more power than the default 1W. Cc: stable@vger.kernel.org Fixes: 8e01fb15b815 ("arm64: dts: mt7986: add Bananapi R3") Signed-off-by: Frank Wunderlich Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20231025170832.78727-3-linux@fw-web.de Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts b/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts index fb25ffb1be08..b876e501216b 100644 --- a/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts +++ b/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts @@ -126,6 +126,7 @@ compatible = "sff,sfp"; i2c-bus = <&i2c_sfp1>; los-gpios = <&pio 46 GPIO_ACTIVE_HIGH>; + maximum-power-milliwatt = <3000>; mod-def0-gpios = <&pio 49 GPIO_ACTIVE_LOW>; tx-disable-gpios = <&pio 20 GPIO_ACTIVE_HIGH>; tx-fault-gpios = <&pio 7 GPIO_ACTIVE_HIGH>; @@ -137,6 +138,7 @@ i2c-bus = <&i2c_sfp2>; los-gpios = <&pio 31 GPIO_ACTIVE_HIGH>; mod-def0-gpios = <&pio 47 GPIO_ACTIVE_LOW>; + maximum-power-milliwatt = <3000>; tx-disable-gpios = <&pio 15 GPIO_ACTIVE_HIGH>; tx-fault-gpios = <&pio 48 GPIO_ACTIVE_HIGH>; }; From 90dc20c8c51d13d410776af8d81689afc16183ea Mon Sep 17 00:00:00 2001 From: Eric Woudstra Date: Wed, 25 Oct 2023 19:08:28 +0200 Subject: [PATCH 0954/1397] arm64: dts: mt7986: fix emmc hs400 mode without uboot initialization commit 8dfe51c3f6ef31502fca3e2da8cd250ebbe4b8f2 upstream. Eric reports errors on emmc with hs400 mode when booting linux on bpi-r3 without uboot [1]. Booting with uboot does not show this because clocks seem to be initialized by uboot. Fix this by adding assigned-clocks and assigned-clock-parents like it's done in uboot [2]. [1] https://forum.banana-pi.org/t/bpi-r3-kernel-fails-setting-emmc-clock-to-416m-depends-on-u-boot/15170 [2] https://github.com/u-boot/u-boot/blob/master/arch/arm/dts/mt7986.dtsi#L287 Cc: stable@vger.kernel.org Fixes: 513b49d19b34 ("arm64: dts: mt7986: add mmc related device nodes") Signed-off-by: Eric Woudstra Signed-off-by: Frank Wunderlich Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20231025170832.78727-2-linux@fw-web.de Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/mediatek/mt7986a.dtsi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi index ade5e62a6b2b..fc751e049953 100644 --- a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi @@ -374,6 +374,10 @@ reg = <0 0x11230000 0 0x1000>, <0 0x11c20000 0 0x1000>; interrupts = ; + assigned-clocks = <&topckgen CLK_TOP_EMMC_416M_SEL>, + <&topckgen CLK_TOP_EMMC_250M_SEL>; + assigned-clock-parents = <&apmixedsys CLK_APMIXED_MPLL>, + <&topckgen CLK_TOP_NET1PLL_D5_D2>; clocks = <&topckgen CLK_TOP_EMMC_416M_SEL>, <&infracfg CLK_INFRA_MSDC_HCK_CK>, <&infracfg CLK_INFRA_MSDC_CK>, From 6a6df679ac229bfb46eaea93e610c70b5ca45d32 Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Mon, 14 Aug 2023 09:50:42 +0300 Subject: [PATCH 0955/1397] arm64: dts: mediatek: mt7622: fix memory node warning check commit 8e6ecbfd44b5542a7598c1c5fc9c6dcb5d367f2a upstream. dtbs_check throws a warning at the memory node: Warning (unit_address_vs_reg): /memory: node has a reg or ranges property, but no unit name fix by adding the address into the node name. Cc: stable@vger.kernel.org Fixes: 0b6286dd96c0 ("arm64: dts: mt7622: add bananapi BPI-R64 board") Signed-off-by: Eugen Hristev Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20230814065042.4973-1-eugen.hristev@collabora.com Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts | 2 +- arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts index 86cedb0bf1a9..94e1cc9fbea3 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts +++ b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts @@ -73,7 +73,7 @@ }; }; - memory { + memory@40000000 { reg = <0 0x40000000 0 0x40000000>; }; diff --git a/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts b/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts index dad8e683aac5..c435984ca767 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts +++ b/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts @@ -55,7 +55,7 @@ }; }; - memory { + memory@40000000 { reg = <0 0x40000000 0 0x20000000>; }; From d7646d79ea19d0bce4b921e5f544bfdd6eefc57e Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Mon, 14 Aug 2023 10:10:53 +0300 Subject: [PATCH 0956/1397] arm64: dts: mediatek: mt8183-kukui-jacuzzi: fix dsi unnecessary cells properties commit 74543b303a9abfe4fa253d1fa215281baa05ff3a upstream. dtbs_check throws a warning at the dsi node: Warning (avoid_unnecessary_addr_size): /soc/dsi@14014000: unnecessary #address-cells/#size-cells without "ranges" or child "reg" property Other DTS have a panel child node with a reg, so the parent dtsi must have the address-cells and size-cells, however this specific DT has the panel removed, but not the cells, hence the warning above. If panel is deleted then the cells must also be deleted since they are tied together, as the child node in this DT does not have a reg. Cc: stable@vger.kernel.org Fixes: cabc71b08eb5 ("arm64: dts: mt8183: Add kukui-jacuzzi-damu board") Signed-off-by: Eugen Hristev Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20230814071053.5459-1-eugen.hristev@collabora.com Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi index bf97b60ae4d1..649477af2f41 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi @@ -91,6 +91,8 @@ &dsi0 { status = "okay"; + /delete-property/#size-cells; + /delete-property/#address-cells; /delete-node/panel@0; ports { port { From ac9a2f55bfcc4792c7f2da2d1409e733e49e0e7b Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Mon, 27 Nov 2023 14:20:26 +0100 Subject: [PATCH 0957/1397] arm64: dts: mediatek: cherry: Fix interrupt cells for MT6360 on I2C7 commit 5943b8f7449df9881b273db07bdde1e7120dccf0 upstream. Change interrupt cells to 2 to suppress interrupts_property warning. Cc: stable@vger.kernel.org Fixes: 0de0fe950f1b ("arm64: dts: mediatek: cherry: Enable MT6360 sub-pmic on I2C7") Link: https://lore.kernel.org/r/20231127132026.165027-1-angelogioacchino.delregno@collabora.com Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi b/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi index 37a3e9de90ff..3f508e5c1843 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi @@ -362,7 +362,7 @@ pinctrl-0 = <&i2c7_pins>; pmic@34 { - #interrupt-cells = <1>; + #interrupt-cells = <2>; compatible = "mediatek,mt6360"; reg = <0x34>; interrupt-controller; From 12530266947ac3a6168624b0b5a9fac949d73806 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Wed, 25 Oct 2023 11:38:15 +0200 Subject: [PATCH 0958/1397] arm64: dts: mediatek: mt8173-evb: Fix regulator-fixed node names commit 24165c5dad7ba7c7624d05575a5e0cc851396c71 upstream. Fix a unit_address_vs_reg warning for the USB VBUS fixed regulators by renaming the regulator nodes from regulator@{0,1} to regulator-usb-p0 and regulator-usb-p1. Cc: stable@vger.kernel.org Fixes: c0891284a74a ("arm64: dts: mediatek: add USB3 DRD driver") Link: https://lore.kernel.org/r/20231025093816.44327-8-angelogioacchino.delregno@collabora.com Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/mediatek/mt8173-evb.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts index 5122963d8743..d258c80213b2 100644 --- a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts +++ b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts @@ -44,7 +44,7 @@ id-gpio = <&pio 16 GPIO_ACTIVE_HIGH>; }; - usb_p1_vbus: regulator@0 { + usb_p1_vbus: regulator-usb-p1 { compatible = "regulator-fixed"; regulator-name = "usb_vbus"; regulator-min-microvolt = <5000000>; @@ -53,7 +53,7 @@ enable-active-high; }; - usb_p0_vbus: regulator@1 { + usb_p0_vbus: regulator-usb-p0 { compatible = "regulator-fixed"; regulator-name = "vbus"; regulator-min-microvolt = <5000000>; From 9c4ae4801f8131ddeed6021907e9294da31e0c38 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Thu, 6 Jul 2023 11:58:41 +0200 Subject: [PATCH 0959/1397] arm64: dts: mediatek: mt8195: Fix PM suspend/resume with venc clocks commit 61b94d54421a1f3670ddd5396ec70afe833e9405 upstream. Before suspending the LARBs we're making sure that any operation is done: this never happens because we are unexpectedly unclocking the LARB20 before executing the suspend handler for the MediaTek Smart Multimedia Interface (SMI) and the cause of this is incorrect clocks on this LARB. Fix this issue by changing the Local Arbiter 20 (used by the video encoder secondary core) apb clock to CLK_VENC_CORE1_VENC; furthermore, in order to make sure that both the PM resume and video encoder operation is stable, add the CLK_VENC(_CORE1)_LARB clock to the VENC (main core) and VENC_CORE1 power domains, as this IP cannot communicate with the rest of the system (the AP) without local arbiter clocks being operational. Cc: stable@vger.kernel.org Fixes: 3b5838d1d82e ("arm64: dts: mt8195: Add iommu and smi nodes") Fixes: 2b515194bf0c ("arm64: dts: mt8195: Add power domains controller") Reviewed-by: Alexandre Mergnat Link: https://lore.kernel.org/r/20230706095841.109315-1-angelogioacchino.delregno@collabora.com Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/mediatek/mt8195.dtsi | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8195.dtsi b/arch/arm64/boot/dts/mediatek/mt8195.dtsi index 54c674c45b49..e0ac2e9f5b72 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8195.dtsi @@ -627,6 +627,8 @@ power-domain@MT8195_POWER_DOMAIN_VENC_CORE1 { reg = ; + clocks = <&vencsys_core1 CLK_VENC_CORE1_LARB>; + clock-names = "venc1-larb"; mediatek,infracfg = <&infracfg_ao>; #power-domain-cells = <0>; }; @@ -689,6 +691,8 @@ power-domain@MT8195_POWER_DOMAIN_VENC { reg = ; + clocks = <&vencsys CLK_VENC_LARB>; + clock-names = "venc0-larb"; mediatek,infracfg = <&infracfg_ao>; #power-domain-cells = <0>; }; @@ -2665,7 +2669,7 @@ reg = <0 0x1b010000 0 0x1000>; mediatek,larb-id = <20>; mediatek,smi = <&smi_common_vpp>; - clocks = <&vencsys_core1 CLK_VENC_CORE1_LARB>, + clocks = <&vencsys_core1 CLK_VENC_CORE1_VENC>, <&vencsys_core1 CLK_VENC_CORE1_GALS>, <&vppsys0 CLK_VPP0_GALS_VDO0_VDO1_VENCSYS_CORE1>; clock-names = "apb", "smi", "gals"; From d97373c3b159f31631eb04fea12be7b4fe7efb87 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Wed, 25 Oct 2023 11:38:13 +0200 Subject: [PATCH 0960/1397] arm64: dts: mediatek: mt8183: Fix unit address for scp reserved memory commit 19cba9a6c071db57888dc6b2ec1d9bf8996ea681 upstream. The reserved memory for scp had node name "scp_mem_region" and also without unit-address: change the name to "memory@(address)". This fixes a unit_address_vs_reg warning. Cc: stable@vger.kernel.org Fixes: 1652dbf7363a ("arm64: dts: mt8183: add scp node") Link: https://lore.kernel.org/r/20231025093816.44327-6-angelogioacchino.delregno@collabora.com Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/mediatek/mt8183-evb.dts | 2 +- arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8183-evb.dts b/arch/arm64/boot/dts/mediatek/mt8183-evb.dts index d8bd51807683..a038e28102fc 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-evb.dts +++ b/arch/arm64/boot/dts/mediatek/mt8183-evb.dts @@ -31,7 +31,7 @@ #address-cells = <2>; #size-cells = <2>; ranges; - scp_mem_reserved: scp_mem_region { + scp_mem_reserved: memory@50000000 { compatible = "shared-dma-pool"; reg = <0 0x50000000 0 0x2900000>; no-map; diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi index 6ce16a265e05..a3add2160233 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi @@ -108,7 +108,7 @@ #size-cells = <2>; ranges; - scp_mem_reserved: scp_mem_region { + scp_mem_reserved: memory@50000000 { compatible = "shared-dma-pool"; reg = <0 0x50000000 0 0x2900000>; no-map; From bfff27fb5d2d7f40b5f989cd446a1cdea03b514c Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Wed, 25 Oct 2023 11:38:16 +0200 Subject: [PATCH 0961/1397] arm64: dts: mediatek: mt8183: Move thermal-zones to the root node commit 5a60d63439694590cd5ab1f998fc917ff7ba1c1d upstream. The thermal zones are not a soc bus device: move it to the root node to solve simple_bus_reg warnings. Cc: stable@vger.kernel.org Fixes: b325ce39785b ("arm64: dts: mt8183: add thermal zone node") Link: https://lore.kernel.org/r/20231025093816.44327-9-angelogioacchino.delregno@collabora.com Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/mediatek/mt8183.dtsi | 242 +++++++++++------------ 1 file changed, 121 insertions(+), 121 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8183.dtsi b/arch/arm64/boot/dts/mediatek/mt8183.dtsi index 5169779d01df..976dc968b3ca 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183.dtsi @@ -1210,127 +1210,6 @@ nvmem-cell-names = "calibration-data"; }; - thermal_zones: thermal-zones { - cpu_thermal: cpu-thermal { - polling-delay-passive = <100>; - polling-delay = <500>; - thermal-sensors = <&thermal 0>; - sustainable-power = <5000>; - - trips { - threshold: trip-point0 { - temperature = <68000>; - hysteresis = <2000>; - type = "passive"; - }; - - target: trip-point1 { - temperature = <80000>; - hysteresis = <2000>; - type = "passive"; - }; - - cpu_crit: cpu-crit { - temperature = <115000>; - hysteresis = <2000>; - type = "critical"; - }; - }; - - cooling-maps { - map0 { - trip = <&target>; - cooling-device = <&cpu0 - THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>, - <&cpu1 - THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>, - <&cpu2 - THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>, - <&cpu3 - THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>; - contribution = <3072>; - }; - map1 { - trip = <&target>; - cooling-device = <&cpu4 - THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>, - <&cpu5 - THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>, - <&cpu6 - THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>, - <&cpu7 - THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>; - contribution = <1024>; - }; - }; - }; - - /* The tzts1 ~ tzts6 don't need to polling */ - /* The tzts1 ~ tzts6 don't need to thermal throttle */ - - tzts1: tzts1 { - polling-delay-passive = <0>; - polling-delay = <0>; - thermal-sensors = <&thermal 1>; - sustainable-power = <5000>; - trips {}; - cooling-maps {}; - }; - - tzts2: tzts2 { - polling-delay-passive = <0>; - polling-delay = <0>; - thermal-sensors = <&thermal 2>; - sustainable-power = <5000>; - trips {}; - cooling-maps {}; - }; - - tzts3: tzts3 { - polling-delay-passive = <0>; - polling-delay = <0>; - thermal-sensors = <&thermal 3>; - sustainable-power = <5000>; - trips {}; - cooling-maps {}; - }; - - tzts4: tzts4 { - polling-delay-passive = <0>; - polling-delay = <0>; - thermal-sensors = <&thermal 4>; - sustainable-power = <5000>; - trips {}; - cooling-maps {}; - }; - - tzts5: tzts5 { - polling-delay-passive = <0>; - polling-delay = <0>; - thermal-sensors = <&thermal 5>; - sustainable-power = <5000>; - trips {}; - cooling-maps {}; - }; - - tztsABB: tztsABB { - polling-delay-passive = <0>; - polling-delay = <0>; - thermal-sensors = <&thermal 6>; - sustainable-power = <5000>; - trips {}; - cooling-maps {}; - }; - }; - pwm0: pwm@1100e000 { compatible = "mediatek,mt8183-disp-pwm"; reg = <0 0x1100e000 0 0x1000>; @@ -2105,4 +1984,125 @@ power-domains = <&spm MT8183_POWER_DOMAIN_CAM>; }; }; + + thermal_zones: thermal-zones { + cpu_thermal: cpu-thermal { + polling-delay-passive = <100>; + polling-delay = <500>; + thermal-sensors = <&thermal 0>; + sustainable-power = <5000>; + + trips { + threshold: trip-point0 { + temperature = <68000>; + hysteresis = <2000>; + type = "passive"; + }; + + target: trip-point1 { + temperature = <80000>; + hysteresis = <2000>; + type = "passive"; + }; + + cpu_crit: cpu-crit { + temperature = <115000>; + hysteresis = <2000>; + type = "critical"; + }; + }; + + cooling-maps { + map0 { + trip = <&target>; + cooling-device = <&cpu0 + THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>, + <&cpu1 + THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>, + <&cpu2 + THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>, + <&cpu3 + THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>; + contribution = <3072>; + }; + map1 { + trip = <&target>; + cooling-device = <&cpu4 + THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>, + <&cpu5 + THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>, + <&cpu6 + THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>, + <&cpu7 + THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>; + contribution = <1024>; + }; + }; + }; + + /* The tzts1 ~ tzts6 don't need to polling */ + /* The tzts1 ~ tzts6 don't need to thermal throttle */ + + tzts1: tzts1 { + polling-delay-passive = <0>; + polling-delay = <0>; + thermal-sensors = <&thermal 1>; + sustainable-power = <5000>; + trips {}; + cooling-maps {}; + }; + + tzts2: tzts2 { + polling-delay-passive = <0>; + polling-delay = <0>; + thermal-sensors = <&thermal 2>; + sustainable-power = <5000>; + trips {}; + cooling-maps {}; + }; + + tzts3: tzts3 { + polling-delay-passive = <0>; + polling-delay = <0>; + thermal-sensors = <&thermal 3>; + sustainable-power = <5000>; + trips {}; + cooling-maps {}; + }; + + tzts4: tzts4 { + polling-delay-passive = <0>; + polling-delay = <0>; + thermal-sensors = <&thermal 4>; + sustainable-power = <5000>; + trips {}; + cooling-maps {}; + }; + + tzts5: tzts5 { + polling-delay-passive = <0>; + polling-delay = <0>; + thermal-sensors = <&thermal 5>; + sustainable-power = <5000>; + trips {}; + cooling-maps {}; + }; + + tztsABB: tztsABB { + polling-delay-passive = <0>; + polling-delay = <0>; + thermal-sensors = <&thermal 6>; + sustainable-power = <5000>; + trips {}; + cooling-maps {}; + }; + }; }; From 2e465268df4cbf81450eda3d913213553619d8c7 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Wed, 25 Oct 2023 11:38:14 +0200 Subject: [PATCH 0962/1397] arm64: dts: mediatek: mt8183-evb: Fix unit_address_vs_reg warning on ntc commit 9dea1c724fc36643e83216c1f5a26613412150db upstream. The NTC is defined as ntc@0 but it doesn't need any address at all. Fix the unit_address_vs_reg warning by dropping the unit address: since the node name has to be generic also fully rename it from ntc@0 to thermal-sensor. Cc: stable@vger.kernel.org Fixes: ff9ea5c62279 ("arm64: dts: mediatek: mt8183-evb: Add node for thermistor") Link: https://lore.kernel.org/r/20231025093816.44327-7-angelogioacchino.delregno@collabora.com Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/mediatek/mt8183-evb.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8183-evb.dts b/arch/arm64/boot/dts/mediatek/mt8183-evb.dts index a038e28102fc..a27d906db7ea 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-evb.dts +++ b/arch/arm64/boot/dts/mediatek/mt8183-evb.dts @@ -38,7 +38,7 @@ }; }; - ntc@0 { + thermal-sensor { compatible = "murata,ncp03wf104"; pullup-uv = <1800000>; pullup-ohm = <390000>; From b6eccbcb1bcc8694c5998f37c72dce340a1d2d7c Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Thu, 5 Oct 2023 13:30:41 +0300 Subject: [PATCH 0963/1397] arm64: dts: mediatek: mt8186: fix clock names for power domains commit 9adf7580f6d498a5839e02fa1d1535e934364602 upstream. Clocks for each power domain are split into big categories: pd clocks and subsys clocks. According to the binding, all clocks which have a dash '-' in their name are treated as subsys clocks, and must be placed at the end of the list. The other clocks which are pd clocks must come first. Fixed the naming and the placing of all clocks in the power domains. For the avoidance of doubt, prefixed all subsys clocks with the 'subsys' prefix. The binding does not enforce strict clock names, the driver uses them in bulk, only making a difference for pd clocks vs subsys clocks. The above problem appears to be trivial, however, it leads to incorrect power up and power down sequence of the power domains, because some clocks will be mistakenly taken for subsys clocks and viceversa. One consequence is the fact that if the DIS power domain goes power down and power back up during the boot process, when it comes back up, there are still transactions left on the bus which makes the display inoperable. Some of the clocks for the DIS power domain were wrongly using '_' instead of '-', which again made these clocks being treated as pd clocks instead of subsys clocks. Cc: stable@vger.kernel.org Fixes: d9e43c1e7a38 ("arm64: dts: mt8186: Add power domains controller") Signed-off-by: Eugen Hristev Tested-by: Chen-Yu Tsai Reviewed-by: AngeloGioacchino Del Regno Tested-by: AngeloGioacchino Del Regno Reviewed-by: Alexandre Mergnat Link: https://lore.kernel.org/r/20231005103041.352478-1-eugen.hristev@collabora.com Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/mediatek/mt8186.dtsi | 42 +++++++++++++++--------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8186.dtsi b/arch/arm64/boot/dts/mediatek/mt8186.dtsi index f04ae70c470a..8598aa153b56 100644 --- a/arch/arm64/boot/dts/mediatek/mt8186.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8186.dtsi @@ -924,7 +924,8 @@ reg = ; clocks = <&topckgen CLK_TOP_SENINF>, <&topckgen CLK_TOP_SENINF1>; - clock-names = "csirx_top0", "csirx_top1"; + clock-names = "subsys-csirx-top0", + "subsys-csirx-top1"; #power-domain-cells = <0>; }; @@ -942,7 +943,8 @@ reg = ; clocks = <&topckgen CLK_TOP_AUDIODSP>, <&topckgen CLK_TOP_ADSP_BUS>; - clock-names = "audioadsp", "adsp_bus"; + clock-names = "audioadsp", + "subsys-adsp-bus"; #address-cells = <1>; #size-cells = <0>; #power-domain-cells = <1>; @@ -975,8 +977,11 @@ <&mmsys CLK_MM_SMI_COMMON>, <&mmsys CLK_MM_SMI_GALS>, <&mmsys CLK_MM_SMI_IOMMU>; - clock-names = "disp", "mdp", "smi_infra", "smi_common", - "smi_gals", "smi_iommu"; + clock-names = "disp", "mdp", + "subsys-smi-infra", + "subsys-smi-common", + "subsys-smi-gals", + "subsys-smi-iommu"; mediatek,infracfg = <&infracfg_ao>; #address-cells = <1>; #size-cells = <0>; @@ -993,15 +998,17 @@ power-domain@MT8186_POWER_DOMAIN_CAM { reg = ; - clocks = <&topckgen CLK_TOP_CAM>, - <&topckgen CLK_TOP_SENINF>, + clocks = <&topckgen CLK_TOP_SENINF>, <&topckgen CLK_TOP_SENINF1>, <&topckgen CLK_TOP_SENINF2>, <&topckgen CLK_TOP_SENINF3>, + <&camsys CLK_CAM2MM_GALS>, <&topckgen CLK_TOP_CAMTM>, - <&camsys CLK_CAM2MM_GALS>; - clock-names = "cam-top", "cam0", "cam1", "cam2", - "cam3", "cam-tm", "gals"; + <&topckgen CLK_TOP_CAM>; + clock-names = "cam0", "cam1", "cam2", + "cam3", "gals", + "subsys-cam-tm", + "subsys-cam-top"; mediatek,infracfg = <&infracfg_ao>; #address-cells = <1>; #size-cells = <0>; @@ -1020,9 +1027,9 @@ power-domain@MT8186_POWER_DOMAIN_IMG { reg = ; - clocks = <&topckgen CLK_TOP_IMG1>, - <&imgsys1 CLK_IMG1_GALS_IMG1>; - clock-names = "img-top", "gals"; + clocks = <&imgsys1 CLK_IMG1_GALS_IMG1>, + <&topckgen CLK_TOP_IMG1>; + clock-names = "gals", "subsys-img-top"; mediatek,infracfg = <&infracfg_ao>; #address-cells = <1>; #size-cells = <0>; @@ -1041,8 +1048,11 @@ <&ipesys CLK_IPE_LARB20>, <&ipesys CLK_IPE_SMI_SUBCOM>, <&ipesys CLK_IPE_GALS_IPE>; - clock-names = "ipe-top", "ipe-larb0", "ipe-larb1", - "ipe-smi", "ipe-gals"; + clock-names = "subsys-ipe-top", + "subsys-ipe-larb0", + "subsys-ipe-larb1", + "subsys-ipe-smi", + "subsys-ipe-gals"; mediatek,infracfg = <&infracfg_ao>; #power-domain-cells = <0>; }; @@ -1061,7 +1071,9 @@ clocks = <&topckgen CLK_TOP_WPE>, <&wpesys CLK_WPE_SMI_LARB8_CK_EN>, <&wpesys CLK_WPE_SMI_LARB8_PCLK_EN>; - clock-names = "wpe0", "larb-ck", "larb-pclk"; + clock-names = "wpe0", + "subsys-larb-ck", + "subsys-larb-pclk"; mediatek,infracfg = <&infracfg_ao>; #power-domain-cells = <0>; }; From b9cc170842d8f0bc2464048375e20dc2fea793c5 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Thu, 5 Oct 2023 17:11:50 +0200 Subject: [PATCH 0964/1397] arm64: dts: mediatek: mt8186: Change gpu speedbin nvmem cell name commit 59fa1e51ba54e1f513985a8177969b62973f7fd5 upstream. MT8186's GPU speedbin value must be interpreted, or the value will not be meaningful. Use the correct "gpu-speedbin" nvmem cell name for the GPU speedbin to allow triggering the cell info fixup handler, hence feeding the right speedbin number to the users. Cc: stable@vger.kernel.org Fixes: 263d2fd02afc ("arm64: dts: mediatek: mt8186: Add GPU speed bin NVMEM cells") Reviewed-by: Chen-Yu Tsai Link: https://lore.kernel.org/r/20231005151150.355536-1-angelogioacchino.delregno@collabora.com Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/mediatek/mt8186.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8186.dtsi b/arch/arm64/boot/dts/mediatek/mt8186.dtsi index 8598aa153b56..df0c04f2ba1d 100644 --- a/arch/arm64/boot/dts/mediatek/mt8186.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8186.dtsi @@ -1668,7 +1668,7 @@ #address-cells = <1>; #size-cells = <1>; - gpu_speedbin: gpu-speed-bin@59c { + gpu_speedbin: gpu-speedbin@59c { reg = <0x59c 0x4>; bits = <0 3>; }; From 1b5d156c24282ca91b5b7c8988f5006f6dc7e113 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 29 Sep 2023 10:16:37 +0200 Subject: [PATCH 0965/1397] coresight: etm4x: Remove bogous __exit annotation for some functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 348ddab81f7b0983d9fb158df910254f08d3f887 ] etm4_platform_driver (which lives in ".data" contains a reference to etm4_remove_platform_dev(). So the latter must not be marked with __exit which results in the function being discarded for a build with CONFIG_CORESIGHT_SOURCE_ETM4X=y which in turn makes the remove pointer contain invalid data. etm4x_amba_driver referencing etm4_remove_amba() has the same issue. Drop the __exit annotations for the two affected functions and a third one that is called by the other two. For reasons I don't understand this isn't catched by building with CONFIG_DEBUG_SECTION_MISMATCH=y. Fixes: c23bc382ef0e ("coresight: etm4x: Refactor probing routine") Fixes: 5214b563588e ("coresight: etm4x: Add support for sysreg only devices") Signed-off-by: Uwe Kleine-König Reviewed-by: James Clark Link: https://lore.kernel.org/all/20230929081540.yija47lsj35xtj4v@pengutronix.de/ Link: https://lore.kernel.org/r/20230929081637.2377335-1-u.kleine-koenig@pengutronix.de Signed-off-by: Suzuki K Poulose Signed-off-by: Sasha Levin --- drivers/hwtracing/coresight/coresight-etm4x-core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c index 77b0271ce6eb..34aee59dd147 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c @@ -2224,7 +2224,7 @@ static void clear_etmdrvdata(void *info) per_cpu(delayed_probe, cpu) = NULL; } -static void __exit etm4_remove_dev(struct etmv4_drvdata *drvdata) +static void etm4_remove_dev(struct etmv4_drvdata *drvdata) { bool had_delayed_probe; /* @@ -2253,7 +2253,7 @@ static void __exit etm4_remove_dev(struct etmv4_drvdata *drvdata) } } -static void __exit etm4_remove_amba(struct amba_device *adev) +static void etm4_remove_amba(struct amba_device *adev) { struct etmv4_drvdata *drvdata = dev_get_drvdata(&adev->dev); @@ -2261,7 +2261,7 @@ static void __exit etm4_remove_amba(struct amba_device *adev) etm4_remove_dev(drvdata); } -static int __exit etm4_remove_platform_dev(struct platform_device *pdev) +static int etm4_remove_platform_dev(struct platform_device *pdev) { struct etmv4_drvdata *drvdata = dev_get_drvdata(&pdev->dev); From 2f6b1527db82d959639be1f55db4069a869b71db Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 6 Oct 2023 14:14:52 +0100 Subject: [PATCH 0966/1397] coresight: Fix crash when Perf and sysfs modes are used concurrently [ Upstream commit 287e82cf69aa264a52bc37591bd0eb407e20f85c ] Partially revert the change in commit 6148652807ba ("coresight: Enable and disable helper devices adjacent to the path") which changed the bare call from source_ops(csdev)->enable() to coresight_enable_source() for Perf sessions. It was missed that coresight_enable_source() is specifically for the sysfs interface, rather than being a generic call. This interferes with the sysfs reference counting to cause the following crash: $ perf record -e cs_etm/@tmc_etr0/ -C 0 & $ echo 1 > /sys/bus/coresight/devices/tmc_etr0/enable_sink $ echo 1 > /sys/bus/coresight/devices/etm0/enable_source $ echo 0 > /sys/bus/coresight/devices/etm0/enable_source Unable to handle kernel NULL pointer dereference at virtual address 00000000000001d0 Internal error: Oops: 0000000096000004 [#1] PREEMPT SMP ... Call trace: etm4_disable+0x54/0x150 [coresight_etm4x] coresight_disable_source+0x6c/0x98 [coresight] coresight_disable+0x74/0x1c0 [coresight] enable_source_store+0x88/0xa0 [coresight] dev_attr_store+0x20/0x40 sysfs_kf_write+0x4c/0x68 kernfs_fop_write_iter+0x120/0x1b8 vfs_write+0x2dc/0x3b0 ksys_write+0x70/0x108 __arm64_sys_write+0x24/0x38 invoke_syscall+0x50/0x128 el0_svc_common.constprop.0+0x104/0x130 do_el0_svc+0x40/0xb8 el0_svc+0x2c/0xb8 el0t_64_sync_handler+0xc0/0xc8 el0t_64_sync+0x1a4/0x1a8 Code: d53cd042 91002000 b9402a81 b8626800 (f940ead5) ---[ end trace 0000000000000000 ]--- This commit linked below also fixes the issue, but has unlocked updates to the mode which could potentially race. So until we come up with a more complete solution that takes all locking and interaction between both modes into account, just revert back to the old behavior for Perf. Reported-by: Junhao He Closes: https://lore.kernel.org/linux-arm-kernel/20230921132904.60996-1-hejunhao3@huawei.com/ Fixes: 6148652807ba ("coresight: Enable and disable helper devices adjacent to the path") Tested-by: Junhao He Signed-off-by: James Clark Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20231006131452.646721-1-james.clark@arm.com Signed-off-by: Sasha Levin --- drivers/hwtracing/coresight/coresight-etm-perf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index 5ca6278baff4..89e8ed214ea4 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -493,7 +493,7 @@ static void etm_event_start(struct perf_event *event, int flags) goto fail_end_stop; /* Finally enable the tracer */ - if (coresight_enable_source(csdev, CS_MODE_PERF, event)) + if (source_ops(csdev)->enable(csdev, event, CS_MODE_PERF)) goto fail_disable_path; /* @@ -587,7 +587,7 @@ static void etm_event_stop(struct perf_event *event, int mode) return; /* stop tracer */ - coresight_disable_source(csdev, event); + source_ops(csdev)->disable(csdev, event); /* tell the core */ event->hw.state = PERF_HES_STOPPED; From 359d3fbcbc099430ca3f35a696e298e9c2d9e9ba Mon Sep 17 00:00:00 2001 From: Junhao He Date: Tue, 10 Oct 2023 16:47:31 +0800 Subject: [PATCH 0967/1397] hwtracing: hisi_ptt: Add dummy callback pmu::read() [ Upstream commit 55e0a2fb0cb5ab7c9c99c1ad4d3e6954de8b73a0 ] When start trace with perf option "-C $cpu" and immediately stop it with SIGTERM or others, the perf core will invoke pmu::read() while the driver doesn't implement it. Add a dummy pmu::read() to avoid any issues. Fixes: ff0de066b463 ("hwtracing: hisi_ptt: Add trace function support for HiSilicon PCIe Tune and Trace device") Signed-off-by: Junhao He Signed-off-by: Yicong Yang Acked-by: Jonathan Cameron Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20231010084731.30450-6-yangyicong@huawei.com Signed-off-by: Sasha Levin --- drivers/hwtracing/ptt/hisi_ptt.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hwtracing/ptt/hisi_ptt.c b/drivers/hwtracing/ptt/hisi_ptt.c index 49ea1b0f7489..3045d1894b81 100644 --- a/drivers/hwtracing/ptt/hisi_ptt.c +++ b/drivers/hwtracing/ptt/hisi_ptt.c @@ -1178,6 +1178,10 @@ static void hisi_ptt_pmu_del(struct perf_event *event, int flags) hisi_ptt_pmu_stop(event, PERF_EF_UPDATE); } +static void hisi_ptt_pmu_read(struct perf_event *event) +{ +} + static void hisi_ptt_remove_cpuhp_instance(void *hotplug_node) { cpuhp_state_remove_instance_nocalls(hisi_ptt_pmu_online, hotplug_node); @@ -1221,6 +1225,7 @@ static int hisi_ptt_register_pmu(struct hisi_ptt *hisi_ptt) .stop = hisi_ptt_pmu_stop, .add = hisi_ptt_pmu_add, .del = hisi_ptt_pmu_del, + .read = hisi_ptt_pmu_read, }; reg = readl(hisi_ptt->iobase + HISI_PTT_LOCATION); From ace850bd8600328f8f56e41db88d7b232ab939d4 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Tue, 14 Nov 2023 21:33:43 +0800 Subject: [PATCH 0968/1397] coresight: ultrasoc-smb: Fix sleep while close preempt in enable_smb [ Upstream commit b8411287aef4a994eff0c68f5597910c4194dfe3 ] When we to enable the SMB by perf, the perf sched will call perf_ctx_lock() to close system preempt in event_function_call(). But SMB::enable_smb() use mutex to lock the critical section, which may sleep. BUG: sleeping function called from invalid context at kernel/locking/mutex.c:580 in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 153023, name: perf preempt_count: 2, expected: 0 RCU nest depth: 0, expected: 0 INFO: lockdep is turned off. irq event stamp: 0 hardirqs last enabled at (0): [<0000000000000000>] 0x0 hardirqs last disabled at (0): [] copy_process+0xae8/0x2b48 softirqs last enabled at (0): [] copy_process+0xae8/0x2b48 softirqs last disabled at (0): [<0000000000000000>] 0x0 CPU: 2 PID: 153023 Comm: perf Kdump: loaded Tainted: G W O 6.5.0-rc4+ #1 Call trace: ... __mutex_lock+0xbc/0xa70 mutex_lock_nested+0x34/0x48 smb_update_buffer+0x58/0x360 [ultrasoc_smb] etm_event_stop+0x204/0x2d8 [coresight] etm_event_del+0x1c/0x30 [coresight] event_sched_out+0x17c/0x3b8 group_sched_out.part.0+0x5c/0x208 __perf_event_disable+0x15c/0x210 event_function+0xe0/0x230 remote_function+0xb4/0xe8 generic_exec_single+0x160/0x268 smp_call_function_single+0x20c/0x2a0 event_function_call+0x20c/0x220 _perf_event_disable+0x5c/0x90 perf_event_for_each_child+0x58/0xc0 _perf_ioctl+0x34c/0x1250 perf_ioctl+0x64/0x98 ... Use spinlock to replace mutex to control driver data access to one at a time. The function copy_to_user() may sleep, it cannot be in a spinlock context, so we can't simply replace it in smb_read(). But we can ensure that only one user gets the SMB device fd by smb_open(), so remove the locks from smb_read() and buffer synchronization is guaranteed by the user. Fixes: 06f5c2926aaa ("drivers/coresight: Add UltraSoc System Memory Buffer driver") Signed-off-by: Junhao He Reviewed-by: James Clark Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20231114133346.30489-2-hejunhao3@huawei.com Signed-off-by: Sasha Levin --- drivers/hwtracing/coresight/ultrasoc-smb.c | 35 +++++++++------------- drivers/hwtracing/coresight/ultrasoc-smb.h | 6 ++-- 2 files changed, 17 insertions(+), 24 deletions(-) diff --git a/drivers/hwtracing/coresight/ultrasoc-smb.c b/drivers/hwtracing/coresight/ultrasoc-smb.c index e9a32a97fbee..0a0fe9fcc57f 100644 --- a/drivers/hwtracing/coresight/ultrasoc-smb.c +++ b/drivers/hwtracing/coresight/ultrasoc-smb.c @@ -99,7 +99,7 @@ static int smb_open(struct inode *inode, struct file *file) struct smb_drv_data, miscdev); int ret = 0; - mutex_lock(&drvdata->mutex); + spin_lock(&drvdata->spinlock); if (drvdata->reading) { ret = -EBUSY; @@ -115,7 +115,7 @@ static int smb_open(struct inode *inode, struct file *file) drvdata->reading = true; out: - mutex_unlock(&drvdata->mutex); + spin_unlock(&drvdata->spinlock); return ret; } @@ -132,10 +132,8 @@ static ssize_t smb_read(struct file *file, char __user *data, size_t len, if (!len) return 0; - mutex_lock(&drvdata->mutex); - if (!sdb->data_size) - goto out; + return 0; to_copy = min(sdb->data_size, len); @@ -145,20 +143,15 @@ static ssize_t smb_read(struct file *file, char __user *data, size_t len, if (copy_to_user(data, sdb->buf_base + sdb->buf_rdptr, to_copy)) { dev_dbg(dev, "Failed to copy data to user\n"); - to_copy = -EFAULT; - goto out; + return -EFAULT; } *ppos += to_copy; - smb_update_read_ptr(drvdata, to_copy); - - dev_dbg(dev, "%zu bytes copied\n", to_copy); -out: if (!sdb->data_size) smb_reset_buffer(drvdata); - mutex_unlock(&drvdata->mutex); + dev_dbg(dev, "%zu bytes copied\n", to_copy); return to_copy; } @@ -167,9 +160,9 @@ static int smb_release(struct inode *inode, struct file *file) struct smb_drv_data *drvdata = container_of(file->private_data, struct smb_drv_data, miscdev); - mutex_lock(&drvdata->mutex); + spin_lock(&drvdata->spinlock); drvdata->reading = false; - mutex_unlock(&drvdata->mutex); + spin_unlock(&drvdata->spinlock); return 0; } @@ -262,7 +255,7 @@ static int smb_enable(struct coresight_device *csdev, enum cs_mode mode, struct smb_drv_data *drvdata = dev_get_drvdata(csdev->dev.parent); int ret = 0; - mutex_lock(&drvdata->mutex); + spin_lock(&drvdata->spinlock); /* Do nothing, the trace data is reading by other interface now */ if (drvdata->reading) { @@ -294,7 +287,7 @@ static int smb_enable(struct coresight_device *csdev, enum cs_mode mode, dev_dbg(&csdev->dev, "Ultrasoc SMB enabled\n"); out: - mutex_unlock(&drvdata->mutex); + spin_unlock(&drvdata->spinlock); return ret; } @@ -304,7 +297,7 @@ static int smb_disable(struct coresight_device *csdev) struct smb_drv_data *drvdata = dev_get_drvdata(csdev->dev.parent); int ret = 0; - mutex_lock(&drvdata->mutex); + spin_lock(&drvdata->spinlock); if (drvdata->reading) { ret = -EBUSY; @@ -327,7 +320,7 @@ static int smb_disable(struct coresight_device *csdev) dev_dbg(&csdev->dev, "Ultrasoc SMB disabled\n"); out: - mutex_unlock(&drvdata->mutex); + spin_unlock(&drvdata->spinlock); return ret; } @@ -408,7 +401,7 @@ static unsigned long smb_update_buffer(struct coresight_device *csdev, if (!buf) return 0; - mutex_lock(&drvdata->mutex); + spin_lock(&drvdata->spinlock); /* Don't do anything if another tracer is using this sink. */ if (atomic_read(&csdev->refcnt) != 1) @@ -432,7 +425,7 @@ static unsigned long smb_update_buffer(struct coresight_device *csdev, if (!buf->snapshot && lost) perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); out: - mutex_unlock(&drvdata->mutex); + spin_unlock(&drvdata->spinlock); return data_size; } @@ -590,7 +583,7 @@ static int smb_probe(struct platform_device *pdev) return ret; } - mutex_init(&drvdata->mutex); + spin_lock_init(&drvdata->spinlock); drvdata->pid = -1; ret = smb_register_sink(pdev, drvdata); diff --git a/drivers/hwtracing/coresight/ultrasoc-smb.h b/drivers/hwtracing/coresight/ultrasoc-smb.h index d2e14e8d2c8a..82a44c14a882 100644 --- a/drivers/hwtracing/coresight/ultrasoc-smb.h +++ b/drivers/hwtracing/coresight/ultrasoc-smb.h @@ -8,7 +8,7 @@ #define _ULTRASOC_SMB_H #include -#include +#include /* Offset of SMB global registers */ #define SMB_GLB_CFG_REG 0x00 @@ -105,7 +105,7 @@ struct smb_data_buffer { * @csdev: Component vitals needed by the framework. * @sdb: Data buffer for SMB. * @miscdev: Specifics to handle "/dev/xyz.smb" entry. - * @mutex: Control data access to one at a time. + * @spinlock: Control data access to one at a time. * @reading: Synchronise user space access to SMB buffer. * @pid: Process ID of the process being monitored by the * session that is using this component. @@ -116,7 +116,7 @@ struct smb_drv_data { struct coresight_device *csdev; struct smb_data_buffer sdb; struct miscdevice miscdev; - struct mutex mutex; + spinlock_t spinlock; bool reading; pid_t pid; enum cs_mode mode; From ab5091e1ccb05c855c78f67f93538bb703f3f64f Mon Sep 17 00:00:00 2001 From: Junhao He Date: Tue, 14 Nov 2023 21:33:44 +0800 Subject: [PATCH 0969/1397] coresight: ultrasoc-smb: Config SMB buffer before register sink [ Upstream commit 830a7f54db102c889a3fe1c0a225f369ac05f07f ] The SMB dirver register the enable/disable sysfs interface in function smb_register_sink(), however the buffer depends on the following configuration to work well. So it'll be possible for user to access an unreset one. Move the config buffer operation to before register_sink(). Ignore the return value, if smb_config_inport() fails. That will cause the hardwares disable trace path to fail, should not affect SMB driver remove. So we make smb_remove() return success, Fixes: 06f5c2926aaa ("drivers/coresight: Add UltraSoc System Memory Buffer driver") Signed-off-by: Junhao He Reviewed-by: James Clark Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20231114133346.30489-3-hejunhao3@huawei.com Signed-off-by: Sasha Levin --- drivers/hwtracing/coresight/ultrasoc-smb.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/drivers/hwtracing/coresight/ultrasoc-smb.c b/drivers/hwtracing/coresight/ultrasoc-smb.c index 0a0fe9fcc57f..2f2aba90a514 100644 --- a/drivers/hwtracing/coresight/ultrasoc-smb.c +++ b/drivers/hwtracing/coresight/ultrasoc-smb.c @@ -583,37 +583,32 @@ static int smb_probe(struct platform_device *pdev) return ret; } + ret = smb_config_inport(dev, true); + if (ret) + return ret; + + platform_set_drvdata(pdev, drvdata); spin_lock_init(&drvdata->spinlock); drvdata->pid = -1; ret = smb_register_sink(pdev, drvdata); if (ret) { + smb_config_inport(&pdev->dev, false); dev_err(dev, "Failed to register SMB sink\n"); return ret; } - ret = smb_config_inport(dev, true); - if (ret) { - smb_unregister_sink(drvdata); - return ret; - } - - platform_set_drvdata(pdev, drvdata); - return 0; } static int smb_remove(struct platform_device *pdev) { struct smb_drv_data *drvdata = platform_get_drvdata(pdev); - int ret; - - ret = smb_config_inport(&pdev->dev, false); - if (ret) - return ret; smb_unregister_sink(drvdata); + smb_config_inport(&pdev->dev, false); + return 0; } From c541d0edd82764017d69b78e479bf0db8acf8202 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Tue, 14 Nov 2023 21:33:45 +0800 Subject: [PATCH 0970/1397] coresight: ultrasoc-smb: Fix uninitialized before use buf_hw_base [ Upstream commit 862c135bde8bc185e8aae2110374175e6a1b6ed5 ] In smb_reset_buffer, the sdb->buf_hw_base variable is uninitialized before use, which initializes it in smb_init_data_buffer. And the SMB regiester are set in smb_config_inport. So move the call after smb_config_inport. Fixes: 06f5c2926aaa ("drivers/coresight: Add UltraSoc System Memory Buffer driver") Signed-off-by: Junhao He Reviewed-by: James Clark Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20231114133346.30489-4-hejunhao3@huawei.com Signed-off-by: Sasha Levin --- drivers/hwtracing/coresight/ultrasoc-smb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwtracing/coresight/ultrasoc-smb.c b/drivers/hwtracing/coresight/ultrasoc-smb.c index 2f2aba90a514..6e32d31a95fe 100644 --- a/drivers/hwtracing/coresight/ultrasoc-smb.c +++ b/drivers/hwtracing/coresight/ultrasoc-smb.c @@ -477,7 +477,6 @@ static int smb_init_data_buffer(struct platform_device *pdev, static void smb_init_hw(struct smb_drv_data *drvdata) { smb_disable_hw(drvdata); - smb_reset_buffer(drvdata); writel(SMB_LB_CFG_LO_DEFAULT, drvdata->base + SMB_LB_CFG_LO_REG); writel(SMB_LB_CFG_HI_DEFAULT, drvdata->base + SMB_LB_CFG_HI_REG); @@ -587,6 +586,7 @@ static int smb_probe(struct platform_device *pdev) if (ret) return ret; + smb_reset_buffer(drvdata); platform_set_drvdata(pdev, drvdata); spin_lock_init(&drvdata->spinlock); drvdata->pid = -1; From e2365ead012a811755c4e3c02a95c70b89b38e56 Mon Sep 17 00:00:00 2001 From: Su Hui Date: Mon, 20 Nov 2023 17:55:23 +0800 Subject: [PATCH 0971/1397] misc: mei: client.c: return negative error code in mei_cl_write [ Upstream commit 8f06aee8089cf42fd99a20184501bd1347ce61b9 ] mei_msg_hdr_init() return negative error code, rets should be 'PTR_ERR(mei_hdr)' rather than '-PTR_ERR(mei_hdr)'. Fixes: 0cd7c01a60f8 ("mei: add support for mei extended header.") Signed-off-by: Su Hui Link: https://lore.kernel.org/r/20231120095523.178385-1-suhui@nfschina.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/misc/mei/client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c index 5c19097266fe..75bc2a32ec0d 100644 --- a/drivers/misc/mei/client.c +++ b/drivers/misc/mei/client.c @@ -2011,7 +2011,7 @@ ssize_t mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb, unsigned long time mei_hdr = mei_msg_hdr_init(cb); if (IS_ERR(mei_hdr)) { - rets = -PTR_ERR(mei_hdr); + rets = PTR_ERR(mei_hdr); mei_hdr = NULL; goto err; } From f78fff4648bb0a797359dac69792463efd22bb00 Mon Sep 17 00:00:00 2001 From: Su Hui Date: Mon, 20 Nov 2023 17:55:26 +0800 Subject: [PATCH 0972/1397] misc: mei: client.c: fix problem of return '-EOVERFLOW' in mei_cl_write [ Upstream commit ee6236027218f8531916f1c5caa5dc330379f287 ] Clang static analyzer complains that value stored to 'rets' is never read.Let 'buf_len = -EOVERFLOW' to make sure we can return '-EOVERFLOW'. Fixes: 8c8d964ce90f ("mei: move hbuf_depth from the mei device to the hw modules") Signed-off-by: Su Hui Link: https://lore.kernel.org/r/20231120095523.178385-2-suhui@nfschina.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/misc/mei/client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c index 75bc2a32ec0d..32f228782318 100644 --- a/drivers/misc/mei/client.c +++ b/drivers/misc/mei/client.c @@ -2032,7 +2032,7 @@ ssize_t mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb, unsigned long time hbuf_slots = mei_hbuf_empty_slots(dev); if (hbuf_slots < 0) { - rets = -EOVERFLOW; + buf_len = -EOVERFLOW; goto out; } From 0fdd1b8848217a6c2ae79ded51880713de2a32ea Mon Sep 17 00:00:00 2001 From: Ilkka Koskinen Date: Thu, 30 Nov 2023 18:15:48 -0800 Subject: [PATCH 0973/1397] perf vendor events arm64: AmpereOne: Add missing DefaultMetricgroupName fields [ Upstream commit 90fe70d4e23cb57253d2668a171d5695c332deb7 ] AmpereOne metrics were missing DefaultMetricgroupName from metrics with "Default" in group name resulting perf to segfault. Add the missing field to address the issue. Fixes: 59faeaf80d02 ("perf vendor events arm64: Fix for AmpereOne metrics") Signed-off-by: Ilkka Koskinen Reviewed-by: Ian Rogers Cc: James Clark Cc: Will Deacon Cc: Leo Yan Cc: Mike Leach Cc: John Garry Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20231201021550.1109196-2-ilkka@os.amperecomputing.com Signed-off-by: Namhyung Kim Signed-off-by: Sasha Levin --- tools/perf/pmu-events/arch/arm64/ampere/ampereone/metrics.json | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/metrics.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/metrics.json index e2848a9d4848..afcdad58ef89 100644 --- a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/metrics.json +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/metrics.json @@ -231,6 +231,7 @@ "MetricName": "slots_lost_misspeculation_fraction", "MetricExpr": "100 * ((OP_SPEC - OP_RETIRED) / (CPU_CYCLES * #slots))", "BriefDescription": "Fraction of slots lost due to misspeculation", + "DefaultMetricgroupName": "TopdownL1", "MetricGroup": "Default;TopdownL1", "ScaleUnit": "1percent of slots" }, @@ -238,6 +239,7 @@ "MetricName": "retired_fraction", "MetricExpr": "100 * (OP_RETIRED / (CPU_CYCLES * #slots))", "BriefDescription": "Fraction of slots retiring, useful work", + "DefaultMetricgroupName": "TopdownL1", "MetricGroup": "Default;TopdownL1", "ScaleUnit": "1percent of slots" }, From 3275410b13b6275746abadcd76d9deaf7c85a9aa Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Sat, 9 Dec 2023 15:49:16 +0800 Subject: [PATCH 0974/1397] LoongArch: BPF: Don't sign extend memory load operand [ Upstream commit fe5757553bf9ebe45ae8ecab5922f6937c8d8dfc ] The `cgrp_local_storage` test triggers a kernel panic like: # ./test_progs -t cgrp_local_storage Can't find bpf_testmod.ko kernel module: -2 WARNING! Selftests relying on bpf_testmod.ko will be skipped. [ 550.930632] CPU 1 Unable to handle kernel paging request at virtual address 0000000000000080, era == ffff80000200be34, ra == ffff80000200be00 [ 550.931781] Oops[#1]: [ 550.931966] CPU: 1 PID: 1303 Comm: test_progs Not tainted 6.7.0-rc2-loong-devel-g2f56bb0d2327 #35 a896aca3f4164f09cc346f89f2e09832e07be5f6 [ 550.932215] Hardware name: QEMU QEMU Virtual Machine, BIOS unknown 2/2/2022 [ 550.932403] pc ffff80000200be34 ra ffff80000200be00 tp 9000000108350000 sp 9000000108353dc0 [ 550.932545] a0 0000000000000000 a1 0000000000000517 a2 0000000000000118 a3 00007ffffbb15558 [ 550.932682] a4 00007ffffbb15620 a5 90000001004e7700 a6 0000000000000021 a7 0000000000000118 [ 550.932824] t0 ffff80000200bdc0 t1 0000000000000517 t2 0000000000000517 t3 00007ffff1c06ee0 [ 550.932961] t4 0000555578ae04d0 t5 fffffffffffffff8 t6 0000000000000004 t7 0000000000000020 [ 550.933097] t8 0000000000000040 u0 00000000000007b8 s9 9000000108353e00 s0 90000001004e7700 [ 550.933241] s1 9000000004005000 s2 0000000000000001 s3 0000000000000000 s4 0000555555eb2ec8 [ 550.933379] s5 00007ffffbb15bb8 s6 00007ffff1dafd60 s7 000055555663f610 s8 00007ffff1db0050 [ 550.933520] ra: ffff80000200be00 bpf_prog_98f1b9e767be2a84_on_enter+0x40/0x200 [ 550.933911] ERA: ffff80000200be34 bpf_prog_98f1b9e767be2a84_on_enter+0x74/0x200 [ 550.934105] CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE) [ 550.934596] PRMD: 00000004 (PPLV0 +PIE -PWE) [ 550.934712] EUEN: 00000003 (+FPE +SXE -ASXE -BTE) [ 550.934836] ECFG: 00071c1c (LIE=2-4,10-12 VS=7) [ 550.934976] ESTAT: 00010000 [PIL] (IS= ECode=1 EsubCode=0) [ 550.935097] BADV: 0000000000000080 [ 550.935181] PRID: 0014c010 (Loongson-64bit, Loongson-3A5000) [ 550.935291] Modules linked in: [ 550.935391] Process test_progs (pid: 1303, threadinfo=000000006c3b1c41, task=0000000061f84a55) [ 550.935643] Stack : 00007ffffbb15bb8 0000555555eb2ec8 0000000000000000 0000000000000001 [ 550.935844] 9000000004005000 ffff80001b864000 00007ffffbb15450 90000000029aa034 [ 550.935990] 0000000000000000 9000000108353ec0 0000000000000118 d07d9dfb09721a09 [ 550.936175] 0000000000000001 0000000000000000 9000000108353ec0 0000000000000118 [ 550.936314] 9000000101d46ad0 900000000290abf0 000055555663f610 0000000000000000 [ 550.936479] 0000000000000003 9000000108353ec0 00007ffffbb15450 90000000029d7288 [ 550.936635] 00007ffff1dafd60 000055555663f610 0000000000000000 0000000000000003 [ 550.936779] 9000000108353ec0 90000000035dd1f0 00007ffff1dafd58 9000000002841c5c [ 550.936939] 0000000000000119 0000555555eea5a8 00007ffff1d78780 00007ffffbb153e0 [ 550.937083] ffffffffffffffda 00007ffffbb15518 0000000000000040 00007ffffbb15558 [ 550.937224] ... [ 550.937299] Call Trace: [ 550.937521] [] bpf_prog_98f1b9e767be2a84_on_enter+0x74/0x200 [ 550.937910] [<90000000029aa034>] bpf_trace_run2+0x90/0x154 [ 550.938105] [<900000000290abf0>] syscall_trace_enter.isra.0+0x1cc/0x200 [ 550.938224] [<90000000035dd1f0>] do_syscall+0x48/0x94 [ 550.938319] [<9000000002841c5c>] handle_syscall+0xbc/0x158 [ 550.938477] [ 550.938607] Code: 580009ae 50016000 262402e4 <28c20085> 14092084 03a00084 16000024 03240084 00150006 [ 550.938851] [ 550.939021] ---[ end trace 0000000000000000 ]--- Further investigation shows that this panic is triggered by memory load operations: ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); The expression `task->cgroups->dfl_cgrp` involves two memory load. Since the field offset fits in imm12 or imm14, we use ldd or ldptrd instructions. But both instructions have the side effect that it will signed-extended the imm operand. Finally, we got the wrong addresses and panics is inevitable. Use a generic ldxd instruction to avoid this kind of issues. With this change, we have: # ./test_progs -t cgrp_local_storage Can't find bpf_testmod.ko kernel module: -2 WARNING! Selftests relying on bpf_testmod.ko will be skipped. test_cgrp_local_storage:PASS:join_cgroup /cgrp_local_storage 0 nsec #48/1 cgrp_local_storage/tp_btf:OK test_attach_cgroup:PASS:skel_open 0 nsec test_attach_cgroup:PASS:prog_attach 0 nsec test_attach_cgroup:PASS:prog_attach 0 nsec libbpf: prog 'update_cookie_tracing': failed to attach: ERROR: strerror_r(-524)=22 test_attach_cgroup:FAIL:prog_attach unexpected error: -524 #48/2 cgrp_local_storage/attach_cgroup:FAIL test_recursion:PASS:skel_open_and_load 0 nsec libbpf: prog 'on_lookup': failed to attach: ERROR: strerror_r(-524)=22 libbpf: prog 'on_lookup': failed to auto-attach: -524 test_recursion:FAIL:skel_attach unexpected error: -524 (errno 524) #48/3 cgrp_local_storage/recursion:FAIL #48/4 cgrp_local_storage/negative:OK #48/5 cgrp_local_storage/cgroup_iter_sleepable:OK test_yes_rcu_lock:PASS:skel_open 0 nsec test_yes_rcu_lock:PASS:skel_load 0 nsec libbpf: prog 'yes_rcu_lock': failed to attach: ERROR: strerror_r(-524)=22 libbpf: prog 'yes_rcu_lock': failed to auto-attach: -524 test_yes_rcu_lock:FAIL:skel_attach unexpected error: -524 (errno 524) #48/6 cgrp_local_storage/yes_rcu_lock:FAIL #48/7 cgrp_local_storage/no_rcu_lock:OK #48 cgrp_local_storage:FAIL All error logs: test_cgrp_local_storage:PASS:join_cgroup /cgrp_local_storage 0 nsec test_attach_cgroup:PASS:skel_open 0 nsec test_attach_cgroup:PASS:prog_attach 0 nsec test_attach_cgroup:PASS:prog_attach 0 nsec libbpf: prog 'update_cookie_tracing': failed to attach: ERROR: strerror_r(-524)=22 test_attach_cgroup:FAIL:prog_attach unexpected error: -524 #48/2 cgrp_local_storage/attach_cgroup:FAIL test_recursion:PASS:skel_open_and_load 0 nsec libbpf: prog 'on_lookup': failed to attach: ERROR: strerror_r(-524)=22 libbpf: prog 'on_lookup': failed to auto-attach: -524 test_recursion:FAIL:skel_attach unexpected error: -524 (errno 524) #48/3 cgrp_local_storage/recursion:FAIL test_yes_rcu_lock:PASS:skel_open 0 nsec test_yes_rcu_lock:PASS:skel_load 0 nsec libbpf: prog 'yes_rcu_lock': failed to attach: ERROR: strerror_r(-524)=22 libbpf: prog 'yes_rcu_lock': failed to auto-attach: -524 test_yes_rcu_lock:FAIL:skel_attach unexpected error: -524 (errno 524) #48/6 cgrp_local_storage/yes_rcu_lock:FAIL #48 cgrp_local_storage:FAIL Summary: 0/4 PASSED, 0 SKIPPED, 1 FAILED No panics any more (The test still failed because lack of BPF trampoline which I am actively working on). Fixes: 5dc615520c4d ("LoongArch: Add BPF JIT support") Signed-off-by: Hengqi Chen Signed-off-by: Huacai Chen Signed-off-by: Sasha Levin --- arch/loongarch/net/bpf_jit.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index db9342b2d0e6..5d02bc6acec2 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -907,14 +907,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext } break; case BPF_DW: - if (is_signed_imm12(off)) { - emit_insn(ctx, ldd, dst, src, off); - } else if (is_signed_imm14(off)) { - emit_insn(ctx, ldptrd, dst, src, off); - } else { - move_imm(ctx, t1, off, is32); - emit_insn(ctx, ldxd, dst, src, t1); - } + move_imm(ctx, t1, off, is32); + emit_insn(ctx, ldxd, dst, src, t1); break; } From 4b117370d1d12cc00bb14ee2f400b1a91e32bb69 Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Sat, 9 Dec 2023 15:49:16 +0800 Subject: [PATCH 0975/1397] LoongArch: BPF: Don't sign extend function return value [ Upstream commit 5d47ec2e6f4c64e30e392cfe9532df98c9beb106 ] The `cls_redirect` test triggers a kernel panic like: # ./test_progs -t cls_redirect Can't find bpf_testmod.ko kernel module: -2 WARNING! Selftests relying on bpf_testmod.ko will be skipped. [ 30.938489] CPU 3 Unable to handle kernel paging request at virtual address fffffffffd814de0, era == ffff800002009fb8, ra == ffff800002009f9c [ 30.939331] Oops[#1]: [ 30.939513] CPU: 3 PID: 1260 Comm: test_progs Not tainted 6.7.0-rc2-loong-devel-g2f56bb0d2327 #35 a896aca3f4164f09cc346f89f2e09832e07be5f6 [ 30.939732] Hardware name: QEMU QEMU Virtual Machine, BIOS unknown 2/2/2022 [ 30.939901] pc ffff800002009fb8 ra ffff800002009f9c tp 9000000104da4000 sp 9000000104da7ab0 [ 30.940038] a0 fffffffffd814de0 a1 9000000104da7a68 a2 0000000000000000 a3 9000000104da7c10 [ 30.940183] a4 9000000104da7c14 a5 0000000000000002 a6 0000000000000021 a7 00005555904d7f90 [ 30.940321] t0 0000000000000110 t1 0000000000000000 t2 fffffffffd814de0 t3 0004c4b400000000 [ 30.940456] t4 ffffffffffffffff t5 00000000c3f63600 t6 0000000000000000 t7 0000000000000000 [ 30.940590] t8 000000000006d803 u0 0000000000000020 s9 9000000104da7b10 s0 900000010504c200 [ 30.940727] s1 fffffffffd814de0 s2 900000010504c200 s3 9000000104da7c10 s4 9000000104da7ad0 [ 30.940866] s5 0000000000000000 s6 90000000030e65bc s7 9000000104da7b44 s8 90000000044f6fc0 [ 30.941015] ra: ffff800002009f9c bpf_prog_846803e5ae81417f_cls_redirect+0xa0/0x590 [ 30.941535] ERA: ffff800002009fb8 bpf_prog_846803e5ae81417f_cls_redirect+0xbc/0x590 [ 30.941696] CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE) [ 30.942224] PRMD: 00000004 (PPLV0 +PIE -PWE) [ 30.942330] EUEN: 00000003 (+FPE +SXE -ASXE -BTE) [ 30.942453] ECFG: 00071c1c (LIE=2-4,10-12 VS=7) [ 30.942612] ESTAT: 00010000 [PIL] (IS= ECode=1 EsubCode=0) [ 30.942764] BADV: fffffffffd814de0 [ 30.942854] PRID: 0014c010 (Loongson-64bit, Loongson-3A5000) [ 30.942974] Modules linked in: [ 30.943078] Process test_progs (pid: 1260, threadinfo=00000000ce303226, task=000000007d10bb76) [ 30.943306] Stack : 900000010a064000 90000000044f6fc0 9000000104da7b48 0000000000000000 [ 30.943495] 0000000000000000 9000000104da7c14 9000000104da7c10 900000010504c200 [ 30.943626] 0000000000000001 ffff80001b88c000 9000000104da7b70 90000000030e6668 [ 30.943785] 0000000000000000 9000000104da7b58 ffff80001b88c048 9000000003d05000 [ 30.943936] 900000000303ac88 0000000000000000 0000000000000000 9000000104da7b70 [ 30.944091] 0000000000000000 0000000000000001 0000000731eeab00 0000000000000000 [ 30.944245] ffff80001b88c000 0000000000000000 0000000000000000 54b99959429f83b8 [ 30.944402] ffff80001b88c000 90000000044f6fc0 9000000101d70000 ffff80001b88c000 [ 30.944538] 000000000000005a 900000010504c200 900000010a064000 900000010a067000 [ 30.944697] 9000000104da7d88 0000000000000000 9000000003d05000 90000000030e794c [ 30.944852] ... [ 30.944924] Call Trace: [ 30.945120] [] bpf_prog_846803e5ae81417f_cls_redirect+0xbc/0x590 [ 30.945650] [<90000000030e6668>] bpf_test_run+0x1ec/0x2f8 [ 30.945958] [<90000000030e794c>] bpf_prog_test_run_skb+0x31c/0x684 [ 30.946065] [<90000000026d4f68>] __sys_bpf+0x678/0x2724 [ 30.946159] [<90000000026d7288>] sys_bpf+0x20/0x2c [ 30.946253] [<90000000032dd224>] do_syscall+0x7c/0x94 [ 30.946343] [<9000000002541c5c>] handle_syscall+0xbc/0x158 [ 30.946492] [ 30.946549] Code: 0015030e 5c0009c0 5001d000 <28c00304> 02c00484 29c00304 00150009 2a42d2e4 0280200d [ 30.946793] [ 30.946971] ---[ end trace 0000000000000000 ]--- [ 32.093225] Kernel panic - not syncing: Fatal exception in interrupt [ 32.093526] Kernel relocated by 0x2320000 [ 32.093630] .text @ 0x9000000002520000 [ 32.093725] .data @ 0x9000000003400000 [ 32.093792] .bss @ 0x9000000004413200 [ 34.971998] ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- This is because we signed-extend function return values. When subprog mode is enabled, we have: cls_redirect() -> get_global_metrics() returns pcpu ptr 0xfffffefffc00b480 The pointer returned is later signed-extended to 0xfffffffffc00b480 at `BPF_JMP | BPF_EXIT`. During BPF prog run, this triggers unhandled page fault and a kernel panic. Drop the unnecessary signed-extension on return values like other architectures do. With this change, we have: # ./test_progs -t cls_redirect Can't find bpf_testmod.ko kernel module: -2 WARNING! Selftests relying on bpf_testmod.ko will be skipped. #51/1 cls_redirect/cls_redirect_inlined:OK #51/2 cls_redirect/IPv4 TCP accept unknown (no hops, flags: SYN):OK #51/3 cls_redirect/IPv6 TCP accept unknown (no hops, flags: SYN):OK #51/4 cls_redirect/IPv4 TCP accept unknown (no hops, flags: ACK):OK #51/5 cls_redirect/IPv6 TCP accept unknown (no hops, flags: ACK):OK #51/6 cls_redirect/IPv4 TCP forward unknown (one hop, flags: ACK):OK #51/7 cls_redirect/IPv6 TCP forward unknown (one hop, flags: ACK):OK #51/8 cls_redirect/IPv4 TCP accept known (one hop, flags: ACK):OK #51/9 cls_redirect/IPv6 TCP accept known (one hop, flags: ACK):OK #51/10 cls_redirect/IPv4 UDP accept unknown (no hops, flags: none):OK #51/11 cls_redirect/IPv6 UDP accept unknown (no hops, flags: none):OK #51/12 cls_redirect/IPv4 UDP forward unknown (one hop, flags: none):OK #51/13 cls_redirect/IPv6 UDP forward unknown (one hop, flags: none):OK #51/14 cls_redirect/IPv4 UDP accept known (one hop, flags: none):OK #51/15 cls_redirect/IPv6 UDP accept known (one hop, flags: none):OK #51/16 cls_redirect/cls_redirect_subprogs:OK #51/17 cls_redirect/IPv4 TCP accept unknown (no hops, flags: SYN):OK #51/18 cls_redirect/IPv6 TCP accept unknown (no hops, flags: SYN):OK #51/19 cls_redirect/IPv4 TCP accept unknown (no hops, flags: ACK):OK #51/20 cls_redirect/IPv6 TCP accept unknown (no hops, flags: ACK):OK #51/21 cls_redirect/IPv4 TCP forward unknown (one hop, flags: ACK):OK #51/22 cls_redirect/IPv6 TCP forward unknown (one hop, flags: ACK):OK #51/23 cls_redirect/IPv4 TCP accept known (one hop, flags: ACK):OK #51/24 cls_redirect/IPv6 TCP accept known (one hop, flags: ACK):OK #51/25 cls_redirect/IPv4 UDP accept unknown (no hops, flags: none):OK #51/26 cls_redirect/IPv6 UDP accept unknown (no hops, flags: none):OK #51/27 cls_redirect/IPv4 UDP forward unknown (one hop, flags: none):OK #51/28 cls_redirect/IPv6 UDP forward unknown (one hop, flags: none):OK #51/29 cls_redirect/IPv4 UDP accept known (one hop, flags: none):OK #51/30 cls_redirect/IPv6 UDP accept known (one hop, flags: none):OK #51/31 cls_redirect/cls_redirect_dynptr:OK #51/32 cls_redirect/IPv4 TCP accept unknown (no hops, flags: SYN):OK #51/33 cls_redirect/IPv6 TCP accept unknown (no hops, flags: SYN):OK #51/34 cls_redirect/IPv4 TCP accept unknown (no hops, flags: ACK):OK #51/35 cls_redirect/IPv6 TCP accept unknown (no hops, flags: ACK):OK #51/36 cls_redirect/IPv4 TCP forward unknown (one hop, flags: ACK):OK #51/37 cls_redirect/IPv6 TCP forward unknown (one hop, flags: ACK):OK #51/38 cls_redirect/IPv4 TCP accept known (one hop, flags: ACK):OK #51/39 cls_redirect/IPv6 TCP accept known (one hop, flags: ACK):OK #51/40 cls_redirect/IPv4 UDP accept unknown (no hops, flags: none):OK #51/41 cls_redirect/IPv6 UDP accept unknown (no hops, flags: none):OK #51/42 cls_redirect/IPv4 UDP forward unknown (one hop, flags: none):OK #51/43 cls_redirect/IPv6 UDP forward unknown (one hop, flags: none):OK #51/44 cls_redirect/IPv4 UDP accept known (one hop, flags: none):OK #51/45 cls_redirect/IPv6 UDP accept known (one hop, flags: none):OK #51 cls_redirect:OK Summary: 1/45 PASSED, 0 SKIPPED, 0 FAILED Fixes: 5dc615520c4d ("LoongArch: Add BPF JIT support") Signed-off-by: Hengqi Chen Signed-off-by: Huacai Chen Signed-off-by: Sasha Levin --- arch/loongarch/net/bpf_jit.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index 5d02bc6acec2..00915fb3cb82 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -855,8 +855,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext /* function return */ case BPF_JMP | BPF_EXIT: - emit_sext_32(ctx, regmap[BPF_REG_0], true); - if (i == ctx->prog->len - 1) break; From 164960641e6c040e5593a962f5609255f008a8fe Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 23 Nov 2023 21:57:19 +0100 Subject: [PATCH 0976/1397] parisc: Reduce size of the bug_table on 64-bit kernel by half [ Upstream commit 43266838515d30dc0c45d5c7e6e7edacee6cce92 ] Enable GENERIC_BUG_RELATIVE_POINTERS which will store 32-bit relative offsets to the bug address and the source file name instead of 64-bit absolute addresses. This effectively reduces the size of the bug_table[] array by half on 64-bit kernels. Signed-off-by: Helge Deller Stable-dep-of: 487635756198 ("parisc: Fix asm operand number out of range build error in bug table") Signed-off-by: Sasha Levin --- arch/parisc/Kconfig | 7 +++++-- arch/parisc/include/asm/bug.h | 34 +++++++++++++++++----------------- 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 68cbe666510a..8c45b98dfe0e 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -113,9 +113,12 @@ config ARCH_HAS_ILOG2_U64 default n config GENERIC_BUG - bool - default y + def_bool y depends on BUG + select GENERIC_BUG_RELATIVE_POINTERS if 64BIT + +config GENERIC_BUG_RELATIVE_POINTERS + bool config GENERIC_HWEIGHT bool diff --git a/arch/parisc/include/asm/bug.h b/arch/parisc/include/asm/bug.h index b9cad0bb4461..1641ff9a8b83 100644 --- a/arch/parisc/include/asm/bug.h +++ b/arch/parisc/include/asm/bug.h @@ -17,26 +17,27 @@ #define PARISC_BUG_BREAK_ASM "break 0x1f, 0x1fff" #define PARISC_BUG_BREAK_INSN 0x03ffe01f /* PARISC_BUG_BREAK_ASM */ -#if defined(CONFIG_64BIT) -#define ASM_WORD_INSN ".dword\t" +#ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS +# define __BUG_REL(val) ".word " __stringify(val) " - ." #else -#define ASM_WORD_INSN ".word\t" +# define __BUG_REL(val) ".word " __stringify(val) #endif + #ifdef CONFIG_DEBUG_BUGVERBOSE #define BUG() \ do { \ asm volatile("\n" \ "1:\t" PARISC_BUG_BREAK_ASM "\n" \ "\t.pushsection __bug_table,\"a\"\n" \ - "\t.align %4\n" \ - "2:\t" ASM_WORD_INSN "1b, %c0\n" \ + "\t.align 4\n" \ + "2:\t" __BUG_REL(1b) "\n" \ + "\t" __BUG_REL(%c0) "\n" \ "\t.short %1, %2\n" \ - "\t.blockz %3-2*%4-2*2\n" \ + "\t.blockz %3-2*4-2*2\n" \ "\t.popsection" \ : : "i" (__FILE__), "i" (__LINE__), \ - "i" (0), "i" (sizeof(struct bug_entry)), \ - "i" (sizeof(long)) ); \ + "i" (0), "i" (sizeof(struct bug_entry)) ); \ unreachable(); \ } while(0) @@ -54,15 +55,15 @@ asm volatile("\n" \ "1:\t" PARISC_BUG_BREAK_ASM "\n" \ "\t.pushsection __bug_table,\"a\"\n" \ - "\t.align %4\n" \ - "2:\t" ASM_WORD_INSN "1b, %c0\n" \ + "\t.align 4\n" \ + "2:\t" __BUG_REL(1b) "\n" \ + "\t" __BUG_REL(%c0) "\n" \ "\t.short %1, %2\n" \ - "\t.blockz %3-2*%4-2*2\n" \ + "\t.blockz %3-2*4-2*2\n" \ "\t.popsection" \ : : "i" (__FILE__), "i" (__LINE__), \ "i" (BUGFLAG_WARNING|(flags)), \ - "i" (sizeof(struct bug_entry)), \ - "i" (sizeof(long)) ); \ + "i" (sizeof(struct bug_entry)) ); \ } while(0) #else #define __WARN_FLAGS(flags) \ @@ -71,13 +72,12 @@ "1:\t" PARISC_BUG_BREAK_ASM "\n" \ "\t.pushsection __bug_table,\"a\"\n" \ "\t.align %2\n" \ - "2:\t" ASM_WORD_INSN "1b\n" \ + "2:\t" __BUG_REL(1b) "\n" \ "\t.short %0\n" \ - "\t.blockz %1-%2-2\n" \ + "\t.blockz %1-4-2\n" \ "\t.popsection" \ : : "i" (BUGFLAG_WARNING|(flags)), \ - "i" (sizeof(struct bug_entry)), \ - "i" (sizeof(long)) ); \ + "i" (sizeof(struct bug_entry)) ); \ } while(0) #endif From 5bdd0ced1f9c215500b2e9dd77a6c18d9fdf2f01 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 27 Nov 2023 10:39:26 +0100 Subject: [PATCH 0977/1397] parisc: Fix asm operand number out of range build error in bug table [ Upstream commit 487635756198cad563feb47539c6a37ea57f1dae ] Build is broken if CONFIG_DEBUG_BUGVERBOSE=n. Fix it be using the correct asm operand number. Signed-off-by: Helge Deller Reported-by: Linux Kernel Functional Testing Fixes: fe76a1349f23 ("parisc: Use natural CPU alignment for bug_table") Cc: stable@vger.kernel.org # v6.0+ Signed-off-by: Sasha Levin --- arch/parisc/include/asm/bug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/include/asm/bug.h b/arch/parisc/include/asm/bug.h index 1641ff9a8b83..833555f74ffa 100644 --- a/arch/parisc/include/asm/bug.h +++ b/arch/parisc/include/asm/bug.h @@ -71,7 +71,7 @@ asm volatile("\n" \ "1:\t" PARISC_BUG_BREAK_ASM "\n" \ "\t.pushsection __bug_table,\"a\"\n" \ - "\t.align %2\n" \ + "\t.align 4\n" \ "2:\t" __BUG_REL(1b) "\n" \ "\t.short %0\n" \ "\t.blockz %1-4-2\n" \ From b6c47d915578160135fbe054b35d069188f60220 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 5 Jul 2023 17:00:06 +0200 Subject: [PATCH 0978/1397] arm64: dts: mediatek: add missing space before { [ Upstream commit a9c740c57f977deb41bc53c02d0dae3d0e2f191a ] Add missing whitespace between node name/label and opening {. Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20230705150006.293690-2-krzysztof.kozlowski@linaro.org Signed-off-by: Krzysztof Kozlowski Stable-dep-of: 8980c30141d3 ("arm64: dts: mt8183: kukui: Fix underscores in node names") Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt8183-evb.dts | 48 +++++++++---------- .../arm64/boot/dts/mediatek/mt8183-kukui.dtsi | 12 ++--- .../boot/dts/mediatek/mt8183-pumpkin.dts | 12 ++--- 3 files changed, 36 insertions(+), 36 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8183-evb.dts b/arch/arm64/boot/dts/mediatek/mt8183-evb.dts index a27d906db7ea..77f9ab94c00b 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-evb.dts +++ b/arch/arm64/boot/dts/mediatek/mt8183-evb.dts @@ -155,8 +155,8 @@ }; &pio { - i2c_pins_0: i2c0{ - pins_i2c{ + i2c_pins_0: i2c0 { + pins_i2c { pinmux = , ; mediatek,pull-up-adv = <3>; @@ -164,8 +164,8 @@ }; }; - i2c_pins_1: i2c1{ - pins_i2c{ + i2c_pins_1: i2c1 { + pins_i2c { pinmux = , ; mediatek,pull-up-adv = <3>; @@ -173,8 +173,8 @@ }; }; - i2c_pins_2: i2c2{ - pins_i2c{ + i2c_pins_2: i2c2 { + pins_i2c { pinmux = , ; mediatek,pull-up-adv = <3>; @@ -182,8 +182,8 @@ }; }; - i2c_pins_3: i2c3{ - pins_i2c{ + i2c_pins_3: i2c3 { + pins_i2c { pinmux = , ; mediatek,pull-up-adv = <3>; @@ -191,8 +191,8 @@ }; }; - i2c_pins_4: i2c4{ - pins_i2c{ + i2c_pins_4: i2c4 { + pins_i2c { pinmux = , ; mediatek,pull-up-adv = <3>; @@ -200,8 +200,8 @@ }; }; - i2c_pins_5: i2c5{ - pins_i2c{ + i2c_pins_5: i2c5 { + pins_i2c { pinmux = , ; mediatek,pull-up-adv = <3>; @@ -209,8 +209,8 @@ }; }; - spi_pins_0: spi0{ - pins_spi{ + spi_pins_0: spi0 { + pins_spi { pinmux = , , , @@ -324,8 +324,8 @@ }; }; - spi_pins_1: spi1{ - pins_spi{ + spi_pins_1: spi1 { + pins_spi { pinmux = , , , @@ -334,8 +334,8 @@ }; }; - spi_pins_2: spi2{ - pins_spi{ + spi_pins_2: spi2 { + pins_spi { pinmux = , , , @@ -344,8 +344,8 @@ }; }; - spi_pins_3: spi3{ - pins_spi{ + spi_pins_3: spi3 { + pins_spi { pinmux = , , , @@ -354,8 +354,8 @@ }; }; - spi_pins_4: spi4{ - pins_spi{ + spi_pins_4: spi4 { + pins_spi { pinmux = , , , @@ -364,8 +364,8 @@ }; }; - spi_pins_5: spi5{ - pins_spi{ + spi_pins_5: spi5 { + pins_spi { pinmux = , , , diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi index a3add2160233..9a6bfa5882e3 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi @@ -692,7 +692,7 @@ }; spi0_pins: spi0 { - pins_spi{ + pins_spi { pinmux = , , , @@ -702,7 +702,7 @@ }; spi1_pins: spi1 { - pins_spi{ + pins_spi { pinmux = , , , @@ -712,7 +712,7 @@ }; spi2_pins: spi2 { - pins_spi{ + pins_spi { pinmux = , , ; @@ -725,7 +725,7 @@ }; spi3_pins: spi3 { - pins_spi{ + pins_spi { pinmux = , , , @@ -735,7 +735,7 @@ }; spi4_pins: spi4 { - pins_spi{ + pins_spi { pinmux = , , , @@ -745,7 +745,7 @@ }; spi5_pins: spi5 { - pins_spi{ + pins_spi { pinmux = , , , diff --git a/arch/arm64/boot/dts/mediatek/mt8183-pumpkin.dts b/arch/arm64/boot/dts/mediatek/mt8183-pumpkin.dts index 526bcae7a3f8..b5784a60c315 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-pumpkin.dts +++ b/arch/arm64/boot/dts/mediatek/mt8183-pumpkin.dts @@ -193,7 +193,7 @@ &pio { i2c_pins_0: i2c0 { - pins_i2c{ + pins_i2c { pinmux = , ; mediatek,pull-up-adv = <3>; @@ -202,7 +202,7 @@ }; i2c_pins_1: i2c1 { - pins_i2c{ + pins_i2c { pinmux = , ; mediatek,pull-up-adv = <3>; @@ -211,7 +211,7 @@ }; i2c_pins_2: i2c2 { - pins_i2c{ + pins_i2c { pinmux = , ; mediatek,pull-up-adv = <3>; @@ -220,7 +220,7 @@ }; i2c_pins_3: i2c3 { - pins_i2c{ + pins_i2c { pinmux = , ; mediatek,pull-up-adv = <3>; @@ -229,7 +229,7 @@ }; i2c_pins_4: i2c4 { - pins_i2c{ + pins_i2c { pinmux = , ; mediatek,pull-up-adv = <3>; @@ -238,7 +238,7 @@ }; i2c_pins_5: i2c5 { - pins_i2c{ + pins_i2c { pinmux = , ; mediatek,pull-up-adv = <3>; From 3798c665fe7d783fda2834aa6973303b074b7549 Mon Sep 17 00:00:00 2001 From: Hsin-Yi Wang Date: Thu, 26 Oct 2023 12:09:10 -0700 Subject: [PATCH 0979/1397] arm64: dts: mt8183: kukui: Fix underscores in node names [ Upstream commit 8980c30141d3986beab815d85762b9c67196ed72 ] Replace underscores with hyphens in pinctrl node names both for consistency and to adhere to the bindings. Cc: stable@vger.kernel.org Fixes: cd894e274b74 ("arm64: dts: mt8183: Add krane-sku176 board") Fixes: 1652dbf7363a ("arm64: dts: mt8183: add scp node") Fixes: 27eaf34df364 ("arm64: dts: mt8183: config dsi node") Signed-off-by: Hsin-Yi Wang Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20231026191343.3345279-2-hsinyi@chromium.org Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- .../dts/mediatek/mt8183-kukui-jacuzzi.dtsi | 6 +- .../arm64/boot/dts/mediatek/mt8183-kukui.dtsi | 94 +++++++++---------- 2 files changed, 50 insertions(+), 50 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi index 649477af2f41..820260348de9 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi @@ -443,20 +443,20 @@ }; touchscreen_pins: touchscreen-pins { - touch_int_odl { + touch-int-odl { pinmux = ; input-enable; bias-pull-up; }; - touch_rst_l { + touch-rst-l { pinmux = ; output-high; }; }; trackpad_pins: trackpad-pins { - trackpad_int { + trackpad-int { pinmux = ; input-enable; bias-disable; /* pulled externally */ diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi index 9a6bfa5882e3..6f333f5cbeb9 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi @@ -432,7 +432,7 @@ &pio { aud_pins_default: audiopins { - pins_bus { + pins-bus { pinmux = , , , @@ -454,7 +454,7 @@ }; aud_pins_tdm_out_on: audiotdmouton { - pins_bus { + pins-bus { pinmux = , , , @@ -466,7 +466,7 @@ }; aud_pins_tdm_out_off: audiotdmoutoff { - pins_bus { + pins-bus { pinmux = , , , @@ -480,13 +480,13 @@ }; bt_pins: bt-pins { - pins_bt_en { + pins-bt-en { pinmux = ; output-low; }; }; - ec_ap_int_odl: ec_ap_int_odl { + ec_ap_int_odl: ec-ap-int-odl { pins1 { pinmux = ; input-enable; @@ -494,7 +494,7 @@ }; }; - h1_int_od_l: h1_int_od_l { + h1_int_od_l: h1-int-od-l { pins1 { pinmux = ; input-enable; @@ -502,7 +502,7 @@ }; i2c0_pins: i2c0 { - pins_bus { + pins-bus { pinmux = , ; mediatek,pull-up-adv = <3>; @@ -511,7 +511,7 @@ }; i2c1_pins: i2c1 { - pins_bus { + pins-bus { pinmux = , ; mediatek,pull-up-adv = <3>; @@ -520,7 +520,7 @@ }; i2c2_pins: i2c2 { - pins_bus { + pins-bus { pinmux = , ; bias-disable; @@ -529,7 +529,7 @@ }; i2c3_pins: i2c3 { - pins_bus { + pins-bus { pinmux = , ; mediatek,pull-up-adv = <3>; @@ -538,7 +538,7 @@ }; i2c4_pins: i2c4 { - pins_bus { + pins-bus { pinmux = , ; bias-disable; @@ -547,7 +547,7 @@ }; i2c5_pins: i2c5 { - pins_bus { + pins-bus { pinmux = , ; mediatek,pull-up-adv = <3>; @@ -556,7 +556,7 @@ }; i2c6_pins: i2c6 { - pins_bus { + pins-bus { pinmux = , ; bias-disable; @@ -564,7 +564,7 @@ }; mmc0_pins_default: mmc0-pins-default { - pins_cmd_dat { + pins-cmd-dat { pinmux = , , , @@ -579,13 +579,13 @@ mediatek,pull-up-adv = <01>; }; - pins_clk { + pins-clk { pinmux = ; drive-strength = ; mediatek,pull-down-adv = <10>; }; - pins_rst { + pins-rst { pinmux = ; drive-strength = ; mediatek,pull-down-adv = <01>; @@ -593,7 +593,7 @@ }; mmc0_pins_uhs: mmc0-pins-uhs { - pins_cmd_dat { + pins-cmd-dat { pinmux = , , , @@ -608,19 +608,19 @@ mediatek,pull-up-adv = <01>; }; - pins_clk { + pins-clk { pinmux = ; drive-strength = ; mediatek,pull-down-adv = <10>; }; - pins_ds { + pins-ds { pinmux = ; drive-strength = ; mediatek,pull-down-adv = <10>; }; - pins_rst { + pins-rst { pinmux = ; drive-strength = ; mediatek,pull-up-adv = <01>; @@ -628,7 +628,7 @@ }; mmc1_pins_default: mmc1-pins-default { - pins_cmd_dat { + pins-cmd-dat { pinmux = , , , @@ -638,7 +638,7 @@ mediatek,pull-up-adv = <10>; }; - pins_clk { + pins-clk { pinmux = ; input-enable; mediatek,pull-down-adv = <10>; @@ -646,7 +646,7 @@ }; mmc1_pins_uhs: mmc1-pins-uhs { - pins_cmd_dat { + pins-cmd-dat { pinmux = , , , @@ -657,7 +657,7 @@ mediatek,pull-up-adv = <10>; }; - pins_clk { + pins-clk { pinmux = ; drive-strength = ; mediatek,pull-down-adv = <10>; @@ -665,15 +665,15 @@ }; }; - panel_pins_default: panel_pins_default { - panel_reset { + panel_pins_default: panel-pins-default { + panel-reset { pinmux = ; output-low; bias-pull-up; }; }; - pwm0_pin_default: pwm0_pin_default { + pwm0_pin_default: pwm0-pin-default { pins1 { pinmux = ; output-high; @@ -685,14 +685,14 @@ }; scp_pins: scp { - pins_scp_uart { + pins-scp-uart { pinmux = , ; }; }; spi0_pins: spi0 { - pins_spi { + pins-spi { pinmux = , , , @@ -702,7 +702,7 @@ }; spi1_pins: spi1 { - pins_spi { + pins-spi { pinmux = , , , @@ -712,20 +712,20 @@ }; spi2_pins: spi2 { - pins_spi { + pins-spi { pinmux = , , ; bias-disable; }; - pins_spi_mi { + pins-spi-mi { pinmux = ; mediatek,pull-down-adv = <00>; }; }; spi3_pins: spi3 { - pins_spi { + pins-spi { pinmux = , , , @@ -735,7 +735,7 @@ }; spi4_pins: spi4 { - pins_spi { + pins-spi { pinmux = , , , @@ -745,7 +745,7 @@ }; spi5_pins: spi5 { - pins_spi { + pins-spi { pinmux = , , , @@ -755,63 +755,63 @@ }; uart0_pins_default: uart0-pins-default { - pins_rx { + pins-rx { pinmux = ; input-enable; bias-pull-up; }; - pins_tx { + pins-tx { pinmux = ; }; }; uart1_pins_default: uart1-pins-default { - pins_rx { + pins-rx { pinmux = ; input-enable; bias-pull-up; }; - pins_tx { + pins-tx { pinmux = ; }; - pins_rts { + pins-rts { pinmux = ; output-enable; }; - pins_cts { + pins-cts { pinmux = ; input-enable; }; }; uart1_pins_sleep: uart1-pins-sleep { - pins_rx { + pins-rx { pinmux = ; input-enable; bias-pull-up; }; - pins_tx { + pins-tx { pinmux = ; }; - pins_rts { + pins-rts { pinmux = ; output-enable; }; - pins_cts { + pins-cts { pinmux = ; input-enable; }; }; wifi_pins_pwrseq: wifi-pins-pwrseq { - pins_wifi_enable { + pins-wifi-enable { pinmux = ; output-low; }; }; wifi_pins_wakeup: wifi-pins-wakeup { - pins_wifi_wakeup { + pins-wifi-wakeup { pinmux = ; input-enable; }; From c66c479480f6e90dd883cc11c692e1f3b7a95e6a Mon Sep 17 00:00:00 2001 From: Jiadong Zhu Date: Fri, 1 Dec 2023 08:38:15 +0800 Subject: [PATCH 0980/1397] drm/amdgpu: disable MCBP by default [ Upstream commit d6a57588666301acd9d42d3b00d74240964f07f6 ] Disable MCBP(mid command buffer preemption) by default as old Mesa hangs with it. We shall not enable the feature that breaks old usermode driver. Fixes: 50a7c8765ca6 ("drm/amdgpu: enable mcbp by default on gfx9") Signed-off-by: Jiadong Zhu Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index a164857bdb9f..94e91516952c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3478,10 +3478,6 @@ static void amdgpu_device_set_mcbp(struct amdgpu_device *adev) adev->gfx.mcbp = true; else if (amdgpu_mcbp == 0) adev->gfx.mcbp = false; - else if ((adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 0, 0)) && - (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 0, 0)) && - adev->gfx.num_gfx_rings) - adev->gfx.mcbp = true; if (amdgpu_sriov_vf(adev)) adev->gfx.mcbp = true; From cfe9295db0932f1b8e0d94ffc75521898e5a5a8a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 29 Nov 2023 15:24:52 +0100 Subject: [PATCH 0981/1397] perf: Fix perf_event_validate_size() [ Upstream commit 382c27f4ed28f803b1f1473ac2d8db0afc795a1b ] Budimir noted that perf_event_validate_size() only checks the size of the newly added event, even though the sizes of all existing events can also change due to not all events having the same read_format. When we attach the new event, perf_group_attach(), we do re-compute the size for all events. Fixes: a723968c0ed3 ("perf: Fix u16 overflows") Reported-by: Budimir Markovic Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Sasha Levin --- kernel/events/core.c | 61 +++++++++++++++++++++++++++----------------- 1 file changed, 38 insertions(+), 23 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 6dbb03c53237..252387b6ac8d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1814,31 +1814,34 @@ static inline void perf_event__state_init(struct perf_event *event) PERF_EVENT_STATE_INACTIVE; } -static void __perf_event_read_size(struct perf_event *event, int nr_siblings) +static int __perf_event_read_size(u64 read_format, int nr_siblings) { int entry = sizeof(u64); /* value */ int size = 0; int nr = 1; - if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) size += sizeof(u64); - if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) size += sizeof(u64); - if (event->attr.read_format & PERF_FORMAT_ID) + if (read_format & PERF_FORMAT_ID) entry += sizeof(u64); - if (event->attr.read_format & PERF_FORMAT_LOST) + if (read_format & PERF_FORMAT_LOST) entry += sizeof(u64); - if (event->attr.read_format & PERF_FORMAT_GROUP) { + if (read_format & PERF_FORMAT_GROUP) { nr += nr_siblings; size += sizeof(u64); } - size += entry * nr; - event->read_size = size; + /* + * Since perf_event_validate_size() limits this to 16k and inhibits + * adding more siblings, this will never overflow. + */ + return size + nr * entry; } static void __perf_event_header_size(struct perf_event *event, u64 sample_type) @@ -1888,8 +1891,9 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type) */ static void perf_event__header_size(struct perf_event *event) { - __perf_event_read_size(event, - event->group_leader->nr_siblings); + event->read_size = + __perf_event_read_size(event->attr.read_format, + event->group_leader->nr_siblings); __perf_event_header_size(event, event->attr.sample_type); } @@ -1920,24 +1924,35 @@ static void perf_event__id_header_size(struct perf_event *event) event->id_header_size = size; } +/* + * Check that adding an event to the group does not result in anybody + * overflowing the 64k event limit imposed by the output buffer. + * + * Specifically, check that the read_size for the event does not exceed 16k, + * read_size being the one term that grows with groups size. Since read_size + * depends on per-event read_format, also (re)check the existing events. + * + * This leaves 48k for the constant size fields and things like callchains, + * branch stacks and register sets. + */ static bool perf_event_validate_size(struct perf_event *event) { - /* - * The values computed here will be over-written when we actually - * attach the event. - */ - __perf_event_read_size(event, event->group_leader->nr_siblings + 1); - __perf_event_header_size(event, event->attr.sample_type & ~PERF_SAMPLE_READ); - perf_event__id_header_size(event); + struct perf_event *sibling, *group_leader = event->group_leader; - /* - * Sum the lot; should not exceed the 64k limit we have on records. - * Conservative limit to allow for callchains and other variable fields. - */ - if (event->read_size + event->header_size + - event->id_header_size + sizeof(struct perf_event_header) >= 16*1024) + if (__perf_event_read_size(event->attr.read_format, + group_leader->nr_siblings + 1) > 16*1024) return false; + if (__perf_event_read_size(group_leader->attr.read_format, + group_leader->nr_siblings + 1) > 16*1024) + return false; + + for_each_sibling_event(sibling, group_leader) { + if (__perf_event_read_size(sibling->attr.read_format, + group_leader->nr_siblings + 1) > 16*1024) + return false; + } + return true; } From 73bbca1298645b995ce105d0290f05cbe37fc35a Mon Sep 17 00:00:00 2001 From: Ashwin Dayanand Kamat Date: Wed, 29 Nov 2023 16:10:29 +0530 Subject: [PATCH 0982/1397] x86/sev: Fix kernel crash due to late update to read-only ghcb_version [ Upstream commit 27d25348d42161837be08fc63b04a2559d2e781c ] A write-access violation page fault kernel crash was observed while running cpuhotplug LTP testcases on SEV-ES enabled systems. The crash was observed during hotplug, after the CPU was offlined and the process was migrated to different CPU. setup_ghcb() is called again which tries to update ghcb_version in sev_es_negotiate_protocol(). Ideally this is a read_only variable which is initialised during booting. Trying to write it results in a pagefault: BUG: unable to handle page fault for address: ffffffffba556e70 #PF: supervisor write access in kernel mode #PF: error_code(0x0003) - permissions violation [ ...] Call Trace: ? __die_body.cold+0x1a/0x1f ? __die+0x2a/0x35 ? page_fault_oops+0x10c/0x270 ? setup_ghcb+0x71/0x100 ? __x86_return_thunk+0x5/0x6 ? search_exception_tables+0x60/0x70 ? __x86_return_thunk+0x5/0x6 ? fixup_exception+0x27/0x320 ? kernelmode_fixup_or_oops+0xa2/0x120 ? __bad_area_nosemaphore+0x16a/0x1b0 ? kernel_exc_vmm_communication+0x60/0xb0 ? bad_area_nosemaphore+0x16/0x20 ? do_kern_addr_fault+0x7a/0x90 ? exc_page_fault+0xbd/0x160 ? asm_exc_page_fault+0x27/0x30 ? setup_ghcb+0x71/0x100 ? setup_ghcb+0xe/0x100 cpu_init_exception_handling+0x1b9/0x1f0 The fix is to call sev_es_negotiate_protocol() only in the BSP boot phase, and it only needs to be done once in any case. [ mingo: Refined the changelog. ] Fixes: 95d33bfaa3e1 ("x86/sev: Register GHCB memory when SEV-SNP is active") Suggested-by: Tom Lendacky Co-developed-by: Bo Gan Signed-off-by: Bo Gan Signed-off-by: Ashwin Dayanand Kamat Signed-off-by: Ingo Molnar Acked-by: Tom Lendacky Link: https://lore.kernel.org/r/1701254429-18250-1-git-send-email-kashwindayan@vmware.com Signed-off-by: Sasha Levin --- arch/x86/kernel/sev.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 6395bfd87b68..d87c6ff1f513 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -1234,10 +1234,6 @@ void setup_ghcb(void) if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) return; - /* First make sure the hypervisor talks a supported protocol. */ - if (!sev_es_negotiate_protocol()) - sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); - /* * Check whether the runtime #VC exception handler is active. It uses * the per-CPU GHCB page which is set up by sev_es_init_vc_handling(). @@ -1254,6 +1250,13 @@ void setup_ghcb(void) return; } + /* + * Make sure the hypervisor talks a supported protocol. + * This gets called only in the BSP boot phase. + */ + if (!sev_es_negotiate_protocol()) + sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); + /* * Clear the boot_ghcb. The first exception comes in before the bss * section is cleared. From 6df51c525a2d4f8cce843ced0da00452dc6f30a7 Mon Sep 17 00:00:00 2001 From: Boerge Struempfel Date: Wed, 29 Nov 2023 16:23:07 +0100 Subject: [PATCH 0983/1397] gpiolib: sysfs: Fix error handling on failed export [ Upstream commit 95dd1e34ff5bbee93a28ff3947eceaf6de811b1a ] If gpio_set_transitory() fails, we should free the GPIO again. Most notably, the flag FLAG_REQUESTED has previously been set in gpiod_request_commit(), and should be reset on failure. To my knowledge, this does not affect any current users, since the gpio_set_transitory() mainly returns 0 and -ENOTSUPP, which is converted to 0. However the gpio_set_transitory() function calles the .set_config() function of the corresponding GPIO chip and there are some GPIO drivers in which some (unlikely) branches return other values like -EPROBE_DEFER, and -EINVAL. In these cases, the above mentioned FLAG_REQUESTED would not be reset, which results in the pin being blocked until the next reboot. Fixes: e10f72bf4b3e ("gpio: gpiolib: Generalise state persistence beyond sleep") Signed-off-by: Boerge Struempfel Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- drivers/gpio/gpiolib-sysfs.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpio/gpiolib-sysfs.c b/drivers/gpio/gpiolib-sysfs.c index 50503a4525eb..c7c5c19ebc66 100644 --- a/drivers/gpio/gpiolib-sysfs.c +++ b/drivers/gpio/gpiolib-sysfs.c @@ -474,14 +474,17 @@ static ssize_t export_store(const struct class *class, goto done; status = gpiod_set_transitory(desc, false); - if (!status) { - status = gpiod_export(desc, true); - if (status < 0) - gpiod_free(desc); - else - set_bit(FLAG_SYSFS, &desc->flags); + if (status) { + gpiod_free(desc); + goto done; } + status = gpiod_export(desc, true); + if (status < 0) + gpiod_free(desc); + else + set_bit(FLAG_SYSFS, &desc->flags); + done: if (status) pr_debug("%s: status %d\n", __func__, status); From 395e52b7a1ad01e1b51adb09854a0aa5347428de Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Mon, 4 Dec 2023 12:47:35 +0000 Subject: [PATCH 0984/1397] ASoC: ops: add correct range check for limiting volume commit fb9ad24485087e0f00d84bee7a5914640b2b9024 upstream. Volume can have ranges that start with negative values, ex: -84dB to +40dB. Apply correct range check in snd_soc_limit_volume before setting the platform_max. Without this patch, for example setting a 0dB limit on a volume range of -84dB to +40dB would fail. Signed-off-by: Srinivas Kandagatla Tested-by: Johan Hovold Reviewed-by: Johan Hovold Link: https://lore.kernel.org/r/20231204124736.132185-2-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/soc-ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c index 55b009d3c681..2d25748ca706 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -661,7 +661,7 @@ int snd_soc_limit_volume(struct snd_soc_card *card, kctl = snd_soc_card_get_kcontrol(card, name); if (kctl) { struct soc_mixer_control *mc = (struct soc_mixer_control *)kctl->private_value; - if (max <= mc->max) { + if (max <= mc->max - mc->min) { mc->platform_max = max; ret = 0; } From 95a4c959b99f10b2405356c07e8092aed1cc3bd8 Mon Sep 17 00:00:00 2001 From: JP Kobryn Date: Fri, 1 Dec 2023 14:53:55 +0900 Subject: [PATCH 0985/1397] kprobes: consistent rcu api usage for kretprobe holder commit d839a656d0f3caca9f96e9bf912fd394ac6a11bc upstream. It seems that the pointer-to-kretprobe "rp" within the kretprobe_holder is RCU-managed, based on the (non-rethook) implementation of get_kretprobe(). The thought behind this patch is to make use of the RCU API where possible when accessing this pointer so that the needed barriers are always in place and to self-document the code. The __rcu annotation to "rp" allows for sparse RCU checking. Plain writes done to the "rp" pointer are changed to make use of the RCU macro for assignment. For the single read, the implementation of get_kretprobe() is simplified by making use of an RCU macro which accomplishes the same, but note that the log warning text will be more generic. I did find that there is a difference in assembly generated between the usage of the RCU macros vs without. For example, on arm64, when using rcu_assign_pointer(), the corresponding store instruction is a store-release (STLR) which has an implicit barrier. When normal assignment is done, a regular store (STR) is found. In the macro case, this seems to be a result of rcu_assign_pointer() using smp_store_release() when the value to write is not NULL. Link: https://lore.kernel.org/all/20231122132058.3359-1-inwardvessel@gmail.com/ Fixes: d741bf41d7c7 ("kprobes: Remove kretprobe hash") Cc: stable@vger.kernel.org Signed-off-by: JP Kobryn Acked-by: Masami Hiramatsu (Google) Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Greg Kroah-Hartman --- include/linux/kprobes.h | 7 ++----- kernel/kprobes.c | 4 ++-- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 38a774287bde..8de5d51a0b5e 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -140,7 +140,7 @@ static inline bool kprobe_ftrace(struct kprobe *p) * */ struct kretprobe_holder { - struct kretprobe *rp; + struct kretprobe __rcu *rp; refcount_t ref; }; @@ -248,10 +248,7 @@ unsigned long kretprobe_trampoline_handler(struct pt_regs *regs, static nokprobe_inline struct kretprobe *get_kretprobe(struct kretprobe_instance *ri) { - RCU_LOCKDEP_WARN(!rcu_read_lock_any_held(), - "Kretprobe is accessed from instance under preemptive context"); - - return READ_ONCE(ri->rph->rp); + return rcu_dereference_check(ri->rph->rp, rcu_read_lock_any_held()); } static nokprobe_inline unsigned long get_kretprobe_retaddr(struct kretprobe_instance *ri) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 0c6185aefaef..b486504766fb 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2253,7 +2253,7 @@ int register_kretprobe(struct kretprobe *rp) if (!rp->rph) return -ENOMEM; - rp->rph->rp = rp; + rcu_assign_pointer(rp->rph->rp, rp); for (i = 0; i < rp->maxactive; i++) { inst = kzalloc(struct_size(inst, data, rp->data_size), GFP_KERNEL); if (inst == NULL) { @@ -2313,7 +2313,7 @@ void unregister_kretprobes(struct kretprobe **rps, int num) #ifdef CONFIG_KRETPROBE_ON_RETHOOK rethook_free(rps[i]->rh); #else - rps[i]->rph->rp = NULL; + rcu_assign_pointer(rps[i]->rph->rp, NULL); #endif } mutex_unlock(&kprobe_mutex); From d9828961e5d77c94ce89379f2e3d7aeb501fff9d Mon Sep 17 00:00:00 2001 From: Konstantin Aladyshev Date: Wed, 6 Dec 2023 11:07:44 +0300 Subject: [PATCH 0986/1397] usb: gadget: f_hid: fix report descriptor allocation commit 61890dc28f7d9e9aac8a9471302613824c22fae4 upstream. The commit 89ff3dfac604 ("usb: gadget: f_hid: fix f_hidg lifetime vs cdev") has introduced a bug that leads to hid device corruption after the replug operation. Reverse device managed memory allocation for the report descriptor to fix the issue. Tested: This change was tested on the AMD EthanolX CRB server with the BMC based on the OpenBMC distribution. The BMC provides KVM functionality via the USB gadget device: - before: KVM page refresh results in a broken USB device, - after: KVM page refresh works without any issues. Fixes: 89ff3dfac604 ("usb: gadget: f_hid: fix f_hidg lifetime vs cdev") Cc: stable@vger.kernel.org Signed-off-by: Konstantin Aladyshev Link: https://lore.kernel.org/r/20231206080744.253-2-aladyshev22@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_hid.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c index ea85e2c701a1..3c8a9dd585c0 100644 --- a/drivers/usb/gadget/function/f_hid.c +++ b/drivers/usb/gadget/function/f_hid.c @@ -92,6 +92,7 @@ static void hidg_release(struct device *dev) { struct f_hidg *hidg = container_of(dev, struct f_hidg, dev); + kfree(hidg->report_desc); kfree(hidg->set_report_buf); kfree(hidg); } @@ -1287,9 +1288,9 @@ static struct usb_function *hidg_alloc(struct usb_function_instance *fi) hidg->report_length = opts->report_length; hidg->report_desc_length = opts->report_desc_length; if (opts->report_desc) { - hidg->report_desc = devm_kmemdup(&hidg->dev, opts->report_desc, - opts->report_desc_length, - GFP_KERNEL); + hidg->report_desc = kmemdup(opts->report_desc, + opts->report_desc_length, + GFP_KERNEL); if (!hidg->report_desc) { ret = -ENOMEM; goto err_put_device; From a05c646d3c7db07ba92b50463a29cefbd6a565a2 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Fri, 24 Nov 2023 20:38:14 +0100 Subject: [PATCH 0987/1397] nvmem: Do not expect fixed layouts to grab a layout driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b7c1e53751cb3990153084f31c41f25fde3b629c upstream. Two series lived in parallel for some time, which led to this situation: - The nvmem-layout container is used for dynamic layouts - We now expect fixed layouts to also use the nvmem-layout container but this does not require any additional driver, the support is built-in the nvmem core. Ensure we don't refuse to probe for wrong reasons. Fixes: 27f699e578b1 ("nvmem: core: add support for fixed cells *layout*") Cc: stable@vger.kernel.org Reported-by: Luca Ceresoli Signed-off-by: Miquel Raynal Tested-by: Rafał Miłecki Tested-by: Luca Ceresoli Reviewed-by: Luca Ceresoli Link: https://lore.kernel.org/r/20231124193814.360552-1-miquel.raynal@bootlin.com Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/core.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index eaf6a3fe8ca6..5b3955ad4053 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -796,6 +796,12 @@ static struct nvmem_layout *nvmem_layout_get(struct nvmem_device *nvmem) if (!layout_np) return NULL; + /* Fixed layouts don't have a matching driver */ + if (of_device_is_compatible(layout_np, "fixed-layout")) { + of_node_put(layout_np); + return NULL; + } + /* * In case the nvmem device was built-in while the layout was built as a * module, we shall manually request the layout driver loading otherwise From 977a60f0639fbeea0161e6ce516c8d2f3dbefe78 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 5 Dec 2023 21:55:24 +0200 Subject: [PATCH 0988/1397] serial: 8250_dw: Add ACPI ID for Granite Rapids-D UART commit e92fad024929c79460403acf946bc9c09ce5c3a9 upstream. Granite Rapids-D has an additional UART that is enumerated via ACPI. Add ACPI ID for it. Signed-off-by: Andy Shevchenko Cc: stable Link: https://lore.kernel.org/r/20231205195524.2705965-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_dw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c index f4cafca1a7da..a1f2259cc9a9 100644 --- a/drivers/tty/serial/8250/8250_dw.c +++ b/drivers/tty/serial/8250/8250_dw.c @@ -798,6 +798,7 @@ static const struct acpi_device_id dw8250_acpi_match[] = { { "INT33C5", (kernel_ulong_t)&dw8250_dw_apb }, { "INT3434", (kernel_ulong_t)&dw8250_dw_apb }, { "INT3435", (kernel_ulong_t)&dw8250_dw_apb }, + { "INTC10EE", (kernel_ulong_t)&dw8250_dw_apb }, { }, }; MODULE_DEVICE_TABLE(acpi, dw8250_acpi_match); From c88a91ca26b102042e816fdf850fa51d10be4749 Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Mon, 4 Dec 2023 17:38:03 +0100 Subject: [PATCH 0989/1397] serial: ma35d1: Validate console index before assignment commit f0b9d97a77fa8f18400450713358303a435ab688 upstream. The console is immediately assigned to the ma35d1 port without checking its index. This oversight can lead to out-of-bounds errors when the index falls outside the valid '0' to MA35_UART_NR range. Such scenario trigges ran error like the following: UBSAN: array-index-out-of-bounds in drivers/tty/serial/ma35d1_serial.c:555:51 index -1 is out of range for type 'uart_ma35d1_port [17] Check the index before using it and bail out with a warning. Fixes: 930cbf92db01 ("tty: serial: Add Nuvoton ma35d1 serial driver support") Signed-off-by: Andi Shyti Cc: Jacky Huang Cc: # v6.5+ Link: https://lore.kernel.org/r/20231204163804.1331415-2-andi.shyti@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/ma35d1_serial.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/ma35d1_serial.c b/drivers/tty/serial/ma35d1_serial.c index 465b1def9e11..69da24565b99 100644 --- a/drivers/tty/serial/ma35d1_serial.c +++ b/drivers/tty/serial/ma35d1_serial.c @@ -552,11 +552,19 @@ static void ma35d1serial_console_putchar(struct uart_port *port, unsigned char c */ static void ma35d1serial_console_write(struct console *co, const char *s, u32 count) { - struct uart_ma35d1_port *up = &ma35d1serial_ports[co->index]; + struct uart_ma35d1_port *up; unsigned long flags; int locked = 1; u32 ier; + if ((co->index < 0) || (co->index >= MA35_UART_NR)) { + pr_warn("Failed to write on ononsole port %x, out of range\n", + co->index); + return; + } + + up = &ma35d1serial_ports[co->index]; + if (up->port.sysrq) locked = 0; else if (oops_in_progress) From 67f45db9f2754d10ed7ef43aef2025824ee57360 Mon Sep 17 00:00:00 2001 From: Cameron Williams Date: Thu, 2 Nov 2023 21:10:40 +0000 Subject: [PATCH 0990/1397] parport: Add support for Brainboxes IX/UC/PX parallel cards commit 1a031f6edc460e9562098bdedc3918da07c30a6e upstream. Adds support for Intashield IX-500/IX-550, UC-146/UC-157, PX-146/PX-157, PX-203 and PX-475 (LPT port) Cc: stable@vger.kernel.org Signed-off-by: Cameron Williams Acked-by: Sudip Mukherjee Link: https://lore.kernel.org/r/AS4PR02MB790389C130410BD864C8DCC9C4A6A@AS4PR02MB7903.eurprd02.prod.outlook.com Signed-off-by: Greg Kroah-Hartman --- drivers/parport/parport_pc.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c index 1f236aaf7867..f33b5d1ddfc1 100644 --- a/drivers/parport/parport_pc.c +++ b/drivers/parport/parport_pc.c @@ -2658,6 +2658,8 @@ enum parport_pc_pci_cards { asix_ax99100, quatech_sppxp100, wch_ch382l, + brainboxes_uc146, + brainboxes_px203, }; @@ -2737,6 +2739,8 @@ static struct parport_pc_pci { /* asix_ax99100 */ { 1, { { 0, 1 }, } }, /* quatech_sppxp100 */ { 1, { { 0, 1 }, } }, /* wch_ch382l */ { 1, { { 2, -1 }, } }, + /* brainboxes_uc146 */ { 1, { { 3, -1 }, } }, + /* brainboxes_px203 */ { 1, { { 0, -1 }, } }, }; static const struct pci_device_id parport_pc_pci_tbl[] = { @@ -2833,6 +2837,23 @@ static const struct pci_device_id parport_pc_pci_tbl[] = { PCI_ANY_ID, PCI_ANY_ID, 0, 0, quatech_sppxp100 }, /* WCH CH382L PCI-E single parallel port card */ { 0x1c00, 0x3050, 0x1c00, 0x3050, 0, 0, wch_ch382l }, + /* Brainboxes IX-500/550 */ + { PCI_VENDOR_ID_INTASHIELD, 0x402a, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, oxsemi_pcie_pport }, + /* Brainboxes UC-146/UC-157 */ + { PCI_VENDOR_ID_INTASHIELD, 0x0be1, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, brainboxes_uc146 }, + { PCI_VENDOR_ID_INTASHIELD, 0x0be2, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, brainboxes_uc146 }, + /* Brainboxes PX-146/PX-257 */ + { PCI_VENDOR_ID_INTASHIELD, 0x401c, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, oxsemi_pcie_pport }, + /* Brainboxes PX-203 */ + { PCI_VENDOR_ID_INTASHIELD, 0x4007, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, brainboxes_px203 }, + /* Brainboxes PX-475 */ + { PCI_VENDOR_ID_INTASHIELD, 0x401f, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, oxsemi_pcie_pport }, { 0, } /* terminate list */ }; MODULE_DEVICE_TABLE(pci, parport_pc_pci_tbl); From 1bcb08e39d8450728117ea503496290a2bc4d60f Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 4 Dec 2023 14:01:59 +0000 Subject: [PATCH 0991/1397] cifs: Fix non-availability of dedup breaking generic/304 commit 691a41d8da4b34fe72f09393505f55f28a8f34ec upstream. Deduplication isn't supported on cifs, but cifs doesn't reject it, instead treating it as extent duplication/cloning. This can cause generic/304 to go silly and run for hours on end. Fix cifs to indicate EOPNOTSUPP if REMAP_FILE_DEDUP is set in ->remap_file_range(). Note that it's unclear whether or not commit b073a08016a1 is meant to cause cifs to return an error if REMAP_FILE_DEDUP. Fixes: b073a08016a1 ("cifs: fix that return -EINVAL when do dedupe operation") Cc: stable@vger.kernel.org Suggested-by: Dave Chinner cc: Xiaoli Feng cc: Shyam Prasad N cc: Rohith Surabattula cc: Jeff Layton cc: Darrick Wong cc: fstests@vger.kernel.org cc: linux-cifs@vger.kernel.org cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/3876191.1701555260@warthog.procyon.org.uk/ Signed-off-by: David Howells Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/cifsfs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index ea3a7a668b45..99497f1b479b 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -1208,7 +1208,9 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off, unsigned int xid; int rc; - if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) + if (remap_flags & REMAP_FILE_DEDUP) + return -EOPNOTSUPP; + if (remap_flags & ~REMAP_FILE_ADVISORY) return -EINVAL; cifs_dbg(FYI, "clone range\n"); From 05668be6020c277fd027baa2282ba559ea73a695 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Tue, 5 Dec 2023 11:05:48 +0200 Subject: [PATCH 0992/1397] Revert "xhci: Loosen RPM as default policy to cover for AMD xHC 1.1" commit 24be0b3c40594a14b65141ced486ae327398faf8 upstream. This reverts commit 4baf1218150985ee3ab0a27220456a1f027ea0ac. Enabling runtime pm as default for all AMD xHC 1.1 controllers caused regression. An initial attempt to fix those was done in commit a5d6264b638e ("xhci: Enable RPM on controllers that support low-power states") but new issues are still seen. Revert this to get those AMD xHC 1.1 systems working This patch went to stable an needs to be reverted from there as well. Fixes: 4baf12181509 ("xhci: Loosen RPM as default policy to cover for AMD xHC 1.1") Link: https://lore.kernel.org/linux-usb/55c50bf5-bffb-454e-906e-4408c591cb63@molgen.mpg.de Cc: Mario Limonciello Cc: Basavaraj Natikar Cc: stable@vger.kernel.org Signed-off-by: Mathias Nyman Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20231205090548.1377667-1-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 95ed9404f6f8..d6fc08e5db8f 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -535,8 +535,6 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) /* xHC spec requires PCI devices to support D3hot and D3cold */ if (xhci->hci_version >= 0x120) xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW; - else if (pdev->vendor == PCI_VENDOR_ID_AMD && xhci->hci_version >= 0x110) - xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW; if (xhci->quirks & XHCI_RESET_ON_RESUME) xhci_dbg_trace(xhci, trace_xhci_dbg_quirks, From 5ac34ba42e9aba7fc82997f778802b954efced33 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 5 Dec 2023 21:49:29 -0300 Subject: [PATCH 0993/1397] smb: client: fix potential NULL deref in parse_dfs_referrals() commit 92414333eb375ed64f4ae92d34d579e826936480 upstream. If server returned no data for FSCTL_DFS_GET_REFERRALS, @dfs_rsp will remain NULL and then parse_dfs_referrals() will dereference it. Fix this by returning -EIO when no output data is returned. Besides, we can't fix it in SMB2_ioctl() as some FSCTLs are allowed to return no data as per MS-SMB2 2.2.32. Fixes: 9d49640a21bf ("CIFS: implement get_dfs_refer for SMB2+") Cc: stable@vger.kernel.org Reported-by: Robert Morris Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/smb2ops.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index b2a60aa6564f..386b62d5c133 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -2834,6 +2834,8 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses, usleep_range(512, 2048); } while (++retry_count < 5); + if (!rc && !dfs_rsp) + rc = -EIO; if (rc) { if (!is_retryable_error(rc) && rc != -ENOENT && rc != -EOPNOTSUPP) cifs_tcon_dbg(VFS, "%s: ioctl error: rc=%d\n", __func__, rc); From 2214c3f4e71800087f569e06e633e5265ec4fc0f Mon Sep 17 00:00:00 2001 From: RD Babiera Date: Wed, 29 Nov 2023 19:23:50 +0000 Subject: [PATCH 0994/1397] usb: typec: class: fix typec_altmode_put_partner to put plugs commit b17b7fe6dd5c6ff74b38b0758ca799cdbb79e26e upstream. When typec_altmode_put_partner is called by a plug altmode upon release, the port altmode the plug belongs to will not remove its reference to the plug. The check to see if the altmode being released evaluates against the released altmode's partner instead of the calling altmode itself, so change adev in typec_altmode_put_partner to properly refer to the altmode being released. typec_altmode_set_partner is not run for port altmodes, so also add a check in typec_altmode_release to prevent typec_altmode_put_partner() calls on port altmode release. Fixes: 8a37d87d72f0 ("usb: typec: Bus type for alternate modes") Cc: stable@vger.kernel.org Signed-off-by: RD Babiera Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20231129192349.1773623-2-rdbabiera@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/class.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c index 9c1dbf3c00e0..6e80fab11788 100644 --- a/drivers/usb/typec/class.c +++ b/drivers/usb/typec/class.c @@ -266,7 +266,7 @@ static void typec_altmode_put_partner(struct altmode *altmode) if (!partner) return; - adev = &partner->adev; + adev = &altmode->adev; if (is_typec_plug(adev->dev.parent)) { struct typec_plug *plug = to_typec_plug(adev->dev.parent); @@ -496,7 +496,8 @@ static void typec_altmode_release(struct device *dev) { struct altmode *alt = to_altmode(to_typec_altmode(dev)); - typec_altmode_put_partner(alt); + if (!is_typec_port(dev->parent)) + typec_altmode_put_partner(alt); altmode_id_remove(alt->adev.dev.parent, alt->id); kfree(alt); From c865b77eb3b50ed73748669b27aec6c4601d266e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 22 Nov 2023 18:15:03 +0100 Subject: [PATCH 0995/1397] ARM: PL011: Fix DMA support commit 58ac1b3799799069d53f5bf95c093f2fe8dd3cc5 upstream. Since there is no guarantee that the memory returned by dma_alloc_coherent() is associated with a 'struct page', using the architecture specific phys_to_page() is wrong, but using virt_to_page() would be as well. Stop using sg lists altogether and just use the *_single() functions instead. This also simplifies the code a bit since the scatterlists in this driver always have only one entry anyway. https://lore.kernel.org/lkml/86db0fe5-930d-4cbb-bd7d-03367da38951@app.fastmail.com/ Use consistent names for dma buffers gc: Add a commit log from the initial thread: https://lore.kernel.org/lkml/86db0fe5-930d-4cbb-bd7d-03367da38951@app.fastmail.com/ Use consistent names for dma buffers Fixes: cb06ff102e2d7 ("ARM: PL011: Add support for Rx DMA buffer polling.") Signed-off-by: Arnd Bergmann Tested-by: Gregory CLEMENT Signed-off-by: Gregory CLEMENT Cc: stable Link: https://lore.kernel.org/r/20231122171503.235649-1-gregory.clement@bootlin.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/amba-pl011.c | 112 +++++++++++++++----------------- 1 file changed, 54 insertions(+), 58 deletions(-) diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index 3dc9b0fcab1c..cd3913b933c7 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -218,17 +218,18 @@ static struct vendor_data vendor_st = { /* Deals with DMA transactions */ -struct pl011_sgbuf { - struct scatterlist sg; - char *buf; +struct pl011_dmabuf { + dma_addr_t dma; + size_t len; + char *buf; }; struct pl011_dmarx_data { struct dma_chan *chan; struct completion complete; bool use_buf_b; - struct pl011_sgbuf sgbuf_a; - struct pl011_sgbuf sgbuf_b; + struct pl011_dmabuf dbuf_a; + struct pl011_dmabuf dbuf_b; dma_cookie_t cookie; bool running; struct timer_list timer; @@ -241,7 +242,8 @@ struct pl011_dmarx_data { struct pl011_dmatx_data { struct dma_chan *chan; - struct scatterlist sg; + dma_addr_t dma; + size_t len; char *buf; bool queued; }; @@ -366,32 +368,24 @@ static int pl011_fifo_to_tty(struct uart_amba_port *uap) #define PL011_DMA_BUFFER_SIZE PAGE_SIZE -static int pl011_sgbuf_init(struct dma_chan *chan, struct pl011_sgbuf *sg, +static int pl011_dmabuf_init(struct dma_chan *chan, struct pl011_dmabuf *db, enum dma_data_direction dir) { - dma_addr_t dma_addr; - - sg->buf = dma_alloc_coherent(chan->device->dev, - PL011_DMA_BUFFER_SIZE, &dma_addr, GFP_KERNEL); - if (!sg->buf) + db->buf = dma_alloc_coherent(chan->device->dev, PL011_DMA_BUFFER_SIZE, + &db->dma, GFP_KERNEL); + if (!db->buf) return -ENOMEM; - - sg_init_table(&sg->sg, 1); - sg_set_page(&sg->sg, phys_to_page(dma_addr), - PL011_DMA_BUFFER_SIZE, offset_in_page(dma_addr)); - sg_dma_address(&sg->sg) = dma_addr; - sg_dma_len(&sg->sg) = PL011_DMA_BUFFER_SIZE; + db->len = PL011_DMA_BUFFER_SIZE; return 0; } -static void pl011_sgbuf_free(struct dma_chan *chan, struct pl011_sgbuf *sg, +static void pl011_dmabuf_free(struct dma_chan *chan, struct pl011_dmabuf *db, enum dma_data_direction dir) { - if (sg->buf) { + if (db->buf) { dma_free_coherent(chan->device->dev, - PL011_DMA_BUFFER_SIZE, sg->buf, - sg_dma_address(&sg->sg)); + PL011_DMA_BUFFER_SIZE, db->buf, db->dma); } } @@ -552,8 +546,8 @@ static void pl011_dma_tx_callback(void *data) spin_lock_irqsave(&uap->port.lock, flags); if (uap->dmatx.queued) - dma_unmap_sg(dmatx->chan->device->dev, &dmatx->sg, 1, - DMA_TO_DEVICE); + dma_unmap_single(dmatx->chan->device->dev, dmatx->dma, + dmatx->len, DMA_TO_DEVICE); dmacr = uap->dmacr; uap->dmacr = dmacr & ~UART011_TXDMAE; @@ -639,18 +633,19 @@ static int pl011_dma_tx_refill(struct uart_amba_port *uap) memcpy(&dmatx->buf[first], &xmit->buf[0], second); } - dmatx->sg.length = count; - - if (dma_map_sg(dma_dev->dev, &dmatx->sg, 1, DMA_TO_DEVICE) != 1) { + dmatx->len = count; + dmatx->dma = dma_map_single(dma_dev->dev, dmatx->buf, count, + DMA_TO_DEVICE); + if (dmatx->dma == DMA_MAPPING_ERROR) { uap->dmatx.queued = false; dev_dbg(uap->port.dev, "unable to map TX DMA\n"); return -EBUSY; } - desc = dmaengine_prep_slave_sg(chan, &dmatx->sg, 1, DMA_MEM_TO_DEV, + desc = dmaengine_prep_slave_single(chan, dmatx->dma, dmatx->len, DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); if (!desc) { - dma_unmap_sg(dma_dev->dev, &dmatx->sg, 1, DMA_TO_DEVICE); + dma_unmap_single(dma_dev->dev, dmatx->dma, dmatx->len, DMA_TO_DEVICE); uap->dmatx.queued = false; /* * If DMA cannot be used right now, we complete this @@ -813,8 +808,8 @@ __acquires(&uap->port.lock) dmaengine_terminate_async(uap->dmatx.chan); if (uap->dmatx.queued) { - dma_unmap_sg(uap->dmatx.chan->device->dev, &uap->dmatx.sg, 1, - DMA_TO_DEVICE); + dma_unmap_single(uap->dmatx.chan->device->dev, uap->dmatx.dma, + uap->dmatx.len, DMA_TO_DEVICE); uap->dmatx.queued = false; uap->dmacr &= ~UART011_TXDMAE; pl011_write(uap->dmacr, uap, REG_DMACR); @@ -828,15 +823,15 @@ static int pl011_dma_rx_trigger_dma(struct uart_amba_port *uap) struct dma_chan *rxchan = uap->dmarx.chan; struct pl011_dmarx_data *dmarx = &uap->dmarx; struct dma_async_tx_descriptor *desc; - struct pl011_sgbuf *sgbuf; + struct pl011_dmabuf *dbuf; if (!rxchan) return -EIO; /* Start the RX DMA job */ - sgbuf = uap->dmarx.use_buf_b ? - &uap->dmarx.sgbuf_b : &uap->dmarx.sgbuf_a; - desc = dmaengine_prep_slave_sg(rxchan, &sgbuf->sg, 1, + dbuf = uap->dmarx.use_buf_b ? + &uap->dmarx.dbuf_b : &uap->dmarx.dbuf_a; + desc = dmaengine_prep_slave_single(rxchan, dbuf->dma, dbuf->len, DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); /* @@ -876,8 +871,8 @@ static void pl011_dma_rx_chars(struct uart_amba_port *uap, bool readfifo) { struct tty_port *port = &uap->port.state->port; - struct pl011_sgbuf *sgbuf = use_buf_b ? - &uap->dmarx.sgbuf_b : &uap->dmarx.sgbuf_a; + struct pl011_dmabuf *dbuf = use_buf_b ? + &uap->dmarx.dbuf_b : &uap->dmarx.dbuf_a; int dma_count = 0; u32 fifotaken = 0; /* only used for vdbg() */ @@ -886,7 +881,7 @@ static void pl011_dma_rx_chars(struct uart_amba_port *uap, if (uap->dmarx.poll_rate) { /* The data can be taken by polling */ - dmataken = sgbuf->sg.length - dmarx->last_residue; + dmataken = dbuf->len - dmarx->last_residue; /* Recalculate the pending size */ if (pending >= dmataken) pending -= dmataken; @@ -900,7 +895,7 @@ static void pl011_dma_rx_chars(struct uart_amba_port *uap, * Note that tty_insert_flip_buf() tries to take as many chars * as it can. */ - dma_count = tty_insert_flip_string(port, sgbuf->buf + dmataken, + dma_count = tty_insert_flip_string(port, dbuf->buf + dmataken, pending); uap->port.icount.rx += dma_count; @@ -911,7 +906,7 @@ static void pl011_dma_rx_chars(struct uart_amba_port *uap, /* Reset the last_residue for Rx DMA poll */ if (uap->dmarx.poll_rate) - dmarx->last_residue = sgbuf->sg.length; + dmarx->last_residue = dbuf->len; /* * Only continue with trying to read the FIFO if all DMA chars have @@ -946,8 +941,8 @@ static void pl011_dma_rx_irq(struct uart_amba_port *uap) { struct pl011_dmarx_data *dmarx = &uap->dmarx; struct dma_chan *rxchan = dmarx->chan; - struct pl011_sgbuf *sgbuf = dmarx->use_buf_b ? - &dmarx->sgbuf_b : &dmarx->sgbuf_a; + struct pl011_dmabuf *dbuf = dmarx->use_buf_b ? + &dmarx->dbuf_b : &dmarx->dbuf_a; size_t pending; struct dma_tx_state state; enum dma_status dmastat; @@ -969,7 +964,7 @@ static void pl011_dma_rx_irq(struct uart_amba_port *uap) pl011_write(uap->dmacr, uap, REG_DMACR); uap->dmarx.running = false; - pending = sgbuf->sg.length - state.residue; + pending = dbuf->len - state.residue; BUG_ON(pending > PL011_DMA_BUFFER_SIZE); /* Then we terminate the transfer - we now know our residue */ dmaengine_terminate_all(rxchan); @@ -996,8 +991,8 @@ static void pl011_dma_rx_callback(void *data) struct pl011_dmarx_data *dmarx = &uap->dmarx; struct dma_chan *rxchan = dmarx->chan; bool lastbuf = dmarx->use_buf_b; - struct pl011_sgbuf *sgbuf = dmarx->use_buf_b ? - &dmarx->sgbuf_b : &dmarx->sgbuf_a; + struct pl011_dmabuf *dbuf = dmarx->use_buf_b ? + &dmarx->dbuf_b : &dmarx->dbuf_a; size_t pending; struct dma_tx_state state; int ret; @@ -1015,7 +1010,7 @@ static void pl011_dma_rx_callback(void *data) * the DMA irq handler. So we check the residue here. */ rxchan->device->device_tx_status(rxchan, dmarx->cookie, &state); - pending = sgbuf->sg.length - state.residue; + pending = dbuf->len - state.residue; BUG_ON(pending > PL011_DMA_BUFFER_SIZE); /* Then we terminate the transfer - we now know our residue */ dmaengine_terminate_all(rxchan); @@ -1067,16 +1062,16 @@ static void pl011_dma_rx_poll(struct timer_list *t) unsigned long flags; unsigned int dmataken = 0; unsigned int size = 0; - struct pl011_sgbuf *sgbuf; + struct pl011_dmabuf *dbuf; int dma_count; struct dma_tx_state state; - sgbuf = dmarx->use_buf_b ? &uap->dmarx.sgbuf_b : &uap->dmarx.sgbuf_a; + dbuf = dmarx->use_buf_b ? &uap->dmarx.dbuf_b : &uap->dmarx.dbuf_a; rxchan->device->device_tx_status(rxchan, dmarx->cookie, &state); if (likely(state.residue < dmarx->last_residue)) { - dmataken = sgbuf->sg.length - dmarx->last_residue; + dmataken = dbuf->len - dmarx->last_residue; size = dmarx->last_residue - state.residue; - dma_count = tty_insert_flip_string(port, sgbuf->buf + dmataken, + dma_count = tty_insert_flip_string(port, dbuf->buf + dmataken, size); if (dma_count == size) dmarx->last_residue = state.residue; @@ -1123,7 +1118,7 @@ static void pl011_dma_startup(struct uart_amba_port *uap) return; } - sg_init_one(&uap->dmatx.sg, uap->dmatx.buf, PL011_DMA_BUFFER_SIZE); + uap->dmatx.len = PL011_DMA_BUFFER_SIZE; /* The DMA buffer is now the FIFO the TTY subsystem can use */ uap->port.fifosize = PL011_DMA_BUFFER_SIZE; @@ -1133,7 +1128,7 @@ static void pl011_dma_startup(struct uart_amba_port *uap) goto skip_rx; /* Allocate and map DMA RX buffers */ - ret = pl011_sgbuf_init(uap->dmarx.chan, &uap->dmarx.sgbuf_a, + ret = pl011_dmabuf_init(uap->dmarx.chan, &uap->dmarx.dbuf_a, DMA_FROM_DEVICE); if (ret) { dev_err(uap->port.dev, "failed to init DMA %s: %d\n", @@ -1141,12 +1136,12 @@ static void pl011_dma_startup(struct uart_amba_port *uap) goto skip_rx; } - ret = pl011_sgbuf_init(uap->dmarx.chan, &uap->dmarx.sgbuf_b, + ret = pl011_dmabuf_init(uap->dmarx.chan, &uap->dmarx.dbuf_b, DMA_FROM_DEVICE); if (ret) { dev_err(uap->port.dev, "failed to init DMA %s: %d\n", "RX buffer B", ret); - pl011_sgbuf_free(uap->dmarx.chan, &uap->dmarx.sgbuf_a, + pl011_dmabuf_free(uap->dmarx.chan, &uap->dmarx.dbuf_a, DMA_FROM_DEVICE); goto skip_rx; } @@ -1200,8 +1195,9 @@ static void pl011_dma_shutdown(struct uart_amba_port *uap) /* In theory, this should already be done by pl011_dma_flush_buffer */ dmaengine_terminate_all(uap->dmatx.chan); if (uap->dmatx.queued) { - dma_unmap_sg(uap->dmatx.chan->device->dev, &uap->dmatx.sg, 1, - DMA_TO_DEVICE); + dma_unmap_single(uap->dmatx.chan->device->dev, + uap->dmatx.dma, uap->dmatx.len, + DMA_TO_DEVICE); uap->dmatx.queued = false; } @@ -1212,8 +1208,8 @@ static void pl011_dma_shutdown(struct uart_amba_port *uap) if (uap->using_rx_dma) { dmaengine_terminate_all(uap->dmarx.chan); /* Clean up the RX DMA */ - pl011_sgbuf_free(uap->dmarx.chan, &uap->dmarx.sgbuf_a, DMA_FROM_DEVICE); - pl011_sgbuf_free(uap->dmarx.chan, &uap->dmarx.sgbuf_b, DMA_FROM_DEVICE); + pl011_dmabuf_free(uap->dmarx.chan, &uap->dmarx.dbuf_a, DMA_FROM_DEVICE); + pl011_dmabuf_free(uap->dmarx.chan, &uap->dmarx.dbuf_b, DMA_FROM_DEVICE); if (uap->dmarx.poll_rate) del_timer_sync(&uap->dmarx.timer); uap->using_rx_dma = false; From 98b0ca904c804125a9bc56bd188c0cb6c1bae776 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Thu, 23 Nov 2023 08:28:18 +0100 Subject: [PATCH 0996/1397] serial: sc16is7xx: address RX timeout interrupt errata commit 08ce9a1b72e38cf44c300a44ac5858533eb3c860 upstream. This device has a silicon bug that makes it report a timeout interrupt but no data in the FIFO. The datasheet states the following in the errata section 18.1.4: "If the host reads the receive FIFO at the same time as a time-out interrupt condition happens, the host might read 0xCC (time-out) in the Interrupt Indication Register (IIR), but bit 0 of the Line Status Register (LSR) is not set (means there is no data in the receive FIFO)." The errata description seems to indicate it concerns only polled mode of operation when reading bit 0 of the LSR register. However, tests have shown and NXP has confirmed that the RXLVL register also yields 0 when the bug is triggered, and hence the IRQ driven implementation in this driver is equally affected. This bug has hit us on production units and when it does, sc16is7xx_irq() would spin forever because sc16is7xx_port_irq() keeps seeing an interrupt in the IIR register that is not cleared because the driver does not call into sc16is7xx_handle_rx() unless the RXLVL register reports at least one byte in the FIFO. Fix this by always reading one byte from the FIFO when this condition is detected in order to clear the interrupt. This approach was confirmed to be correct by NXP through their support channels. Tested by: Hugo Villeneuve Signed-off-by: Daniel Mack Co-Developed-by: Maxim Popov Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231123072818.1394539-1-daniel@zonque.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sc16is7xx.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index f61d98e09dc3..6a0a3208d090 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -767,6 +767,18 @@ static bool sc16is7xx_port_irq(struct sc16is7xx_port *s, int portno) case SC16IS7XX_IIR_RTOI_SRC: case SC16IS7XX_IIR_XOFFI_SRC: rxlen = sc16is7xx_port_read(port, SC16IS7XX_RXLVL_REG); + + /* + * There is a silicon bug that makes the chip report a + * time-out interrupt but no data in the FIFO. This is + * described in errata section 18.1.4. + * + * When this happens, read one byte from the FIFO to + * clear the interrupt. + */ + if (iir == SC16IS7XX_IIR_RTOI_SRC && !rxlen) + rxlen = 1; + if (rxlen) sc16is7xx_handle_rx(port, rxlen, iir); break; From 70dc9a482fbbf04c1952c4446434909a8d190ede Mon Sep 17 00:00:00 2001 From: Ronald Wahl Date: Tue, 31 Oct 2023 12:09:09 +0100 Subject: [PATCH 0997/1397] serial: 8250: 8250_omap: Clear UART_HAS_RHR_IT_DIS bit commit 8973ab7a2441b286218f4a5c4c33680e2f139996 upstream. This fixes commit 439c7183e5b9 ("serial: 8250: 8250_omap: Disable RX interrupt after DMA enable") which unfortunately set the UART_HAS_RHR_IT_DIS bit in the UART_OMAP_IER2 register and never cleared it. Cc: stable@vger.kernel.org Fixes: 439c7183e5b9 ("serial: 8250: 8250_omap: Disable RX interrupt after DMA enable") Signed-off-by: Ronald Wahl Reviewed-by: Vignesh Raghavendra Link: https://lore.kernel.org/r/20231031110909.11695-1-rwahl@gmx.de Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_omap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index ca972fd37725..c7ab2963040b 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -914,7 +914,7 @@ static void __dma_rx_do_complete(struct uart_8250_port *p) if (priv->habit & UART_HAS_RHR_IT_DIS) { reg = serial_in(p, UART_OMAP_IER2); reg &= ~UART_OMAP_IER2_RHR_IT_DIS; - serial_out(p, UART_OMAP_IER2, UART_OMAP_IER2_RHR_IT_DIS); + serial_out(p, UART_OMAP_IER2, reg); } dmaengine_tx_status(rxchan, cookie, &state); @@ -1060,7 +1060,7 @@ static int omap_8250_rx_dma(struct uart_8250_port *p) if (priv->habit & UART_HAS_RHR_IT_DIS) { reg = serial_in(p, UART_OMAP_IER2); reg |= UART_OMAP_IER2_RHR_IT_DIS; - serial_out(p, UART_OMAP_IER2, UART_OMAP_IER2_RHR_IT_DIS); + serial_out(p, UART_OMAP_IER2, reg); } dma_async_issue_pending(dma->rxchan); From a2c0b05a8c00e2b8337275dd0fd8a3264d1d1b10 Mon Sep 17 00:00:00 2001 From: Ronald Wahl Date: Wed, 1 Nov 2023 18:14:31 +0100 Subject: [PATCH 0998/1397] serial: 8250: 8250_omap: Do not start RX DMA on THRI interrupt commit c6bb057418876cdfdd29a6f7b8cef54539ee8811 upstream. Starting RX DMA on THRI interrupt is too early because TX may not have finished yet. This change is inspired by commit 90b8596ac460 ("serial: 8250: Prevent starting up DMA Rx on THRI interrupt") and fixes DMA issues I had with an AM62 SoC that is using the 8250 OMAP variant. Cc: stable@vger.kernel.org Fixes: c26389f998a8 ("serial: 8250: 8250_omap: Add DMA support for UARTs on K3 SoCs") Signed-off-by: Ronald Wahl Reviewed-by: Vignesh Raghavendra Link: https://lore.kernel.org/r/20231101171431.16495-1-rwahl@gmx.de Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_omap.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index c7ab2963040b..1122f37fe744 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -1282,10 +1282,12 @@ static int omap_8250_dma_handle_irq(struct uart_port *port) status = serial_port_in(port, UART_LSR); - if (priv->habit & UART_HAS_EFR2) - am654_8250_handle_rx_dma(up, iir, status); - else - status = omap_8250_handle_rx_dma(up, iir, status); + if ((iir & 0x3f) != UART_IIR_THRI) { + if (priv->habit & UART_HAS_EFR2) + am654_8250_handle_rx_dma(up, iir, status); + else + status = omap_8250_handle_rx_dma(up, iir, status); + } serial8250_modem_status(up); if (status & UART_LSR_THRE && up->dma->tx_err) { From 22fdcaaf7d9d7696ce20a57c27dea8e44bc8c8a7 Mon Sep 17 00:00:00 2001 From: Ronald Wahl Date: Tue, 31 Oct 2023 14:12:42 +0100 Subject: [PATCH 0999/1397] serial: 8250_omap: Add earlycon support for the AM654 UART controller commit 8e42c301ce64e0dcca547626eb486877d502d336 upstream. Currently there is no support for earlycon on the AM654 UART controller. This commit adds it. Signed-off-by: Ronald Wahl Reviewed-by: Vignesh Raghavendra Link: https://lore.kernel.org/r/20231031131242.15516-1-rwahl@gmx.de Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_early.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/8250/8250_early.c b/drivers/tty/serial/8250/8250_early.c index 9837a27739fd..e3f482fd3de4 100644 --- a/drivers/tty/serial/8250/8250_early.c +++ b/drivers/tty/serial/8250/8250_early.c @@ -189,5 +189,6 @@ static int __init early_omap8250_setup(struct earlycon_device *device, OF_EARLYCON_DECLARE(omap8250, "ti,omap2-uart", early_omap8250_setup); OF_EARLYCON_DECLARE(omap8250, "ti,omap3-uart", early_omap8250_setup); OF_EARLYCON_DECLARE(omap8250, "ti,omap4-uart", early_omap8250_setup); +OF_EARLYCON_DECLARE(omap8250, "ti,am654-uart", early_omap8250_setup); #endif From 7d831c68bb92271c49f35ef920c9edcb154193ad Mon Sep 17 00:00:00 2001 From: Mukesh Ojha Date: Fri, 17 Nov 2023 20:19:32 +0530 Subject: [PATCH 1000/1397] devcoredump: Send uevent once devcd is ready commit af54d778a03853801d681c98c0c2a6c316ef9ca7 upstream. dev_coredumpm() creates a devcoredump device and adds it to the core kernel framework which eventually end up sending uevent to the user space and later creates a symbolic link to the failed device. An application running in userspace may be interested in this symbolic link to get the name of the failed device. In a issue scenario, once uevent sent to the user space it start reading '/sys/class/devcoredump/devcdX/failing_device' to get the actual name of the device which might not been created and it is in its path of creation. To fix this, suppress sending uevent till the failing device symbolic link gets created and send uevent once symbolic link is created successfully. Fixes: 833c95456a70 ("device coredump: add new device coredump class") Signed-off-by: Mukesh Ojha Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/1700232572-25823-1-git-send-email-quic_mojha@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/devcoredump.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c index 91536ee05f14..7e2d1f0d903a 100644 --- a/drivers/base/devcoredump.c +++ b/drivers/base/devcoredump.c @@ -362,6 +362,7 @@ void dev_coredumpm(struct device *dev, struct module *owner, devcd->devcd_dev.class = &devcd_class; mutex_lock(&devcd->mutex); + dev_set_uevent_suppress(&devcd->devcd_dev, true); if (device_add(&devcd->devcd_dev)) goto put_device; @@ -376,6 +377,8 @@ void dev_coredumpm(struct device *dev, struct module *owner, "devcoredump")) dev_warn(dev, "devcoredump create_link failed\n"); + dev_set_uevent_suppress(&devcd->devcd_dev, false); + kobject_uevent(&devcd->devcd_dev.kobj, KOBJ_ADD); INIT_DELAYED_WORK(&devcd->del_wk, devcd_del); schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT); mutex_unlock(&devcd->mutex); From 903a1a1949c6c56beec74a4c2e6f1a51848a7941 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Fri, 1 Dec 2023 19:37:27 +0100 Subject: [PATCH 1001/1397] x86/CPU/AMD: Check vendor in the AMD microcode callback commit 9b8493dc43044376716d789d07699f17d538a7c4 upstream. Commit in Fixes added an AMD-specific microcode callback. However, it didn't check the CPU vendor the kernel runs on explicitly. The only reason the Zenbleed check in it didn't run on other x86 vendors hardware was pure coincidental luck: if (!cpu_has_amd_erratum(c, amd_zenbleed)) return; gives true on other vendors because they don't have those families and models. However, with the removal of the cpu_has_amd_erratum() in 05f5f73936fa ("x86/CPU/AMD: Drop now unused CPU erratum checking function") that coincidental condition is gone, leading to the zenbleed check getting executed on other vendors too. Add the explicit vendor check for the whole callback as it should've been done in the first place. Fixes: 522b1d69219d ("x86/cpu/amd: Add a Zenbleed fix") Cc: Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20231201184226.16749-1-bp@alien8.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/amd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index ece2b5b7b0fe..6e4f23f314ac 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -1315,6 +1315,9 @@ static void zenbleed_check_cpu(void *unused) void amd_check_microcode(void) { + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) + return; + on_each_cpu(zenbleed_check_cpu, NULL, 1); } From fd728449807e6bed89e8c0f98d9b22760517f40a Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Thu, 30 Nov 2023 12:29:47 +0530 Subject: [PATCH 1002/1397] powerpc/ftrace: Fix stack teardown in ftrace_no_trace commit 4b3338aaa74d7d4ec5b6734dc298f0db94ec83d2 upstream. Commit 41a506ef71eb ("powerpc/ftrace: Create a dummy stackframe to fix stack unwind") added use of a new stack frame on ftrace entry to fix stack unwind. However, the commit missed updating the offset used while tearing down the ftrace stack when ftrace is disabled. Fix the same. In addition, the commit missed saving the correct stack pointer in pt_regs. Update the same. Fixes: 41a506ef71eb ("powerpc/ftrace: Create a dummy stackframe to fix stack unwind") Cc: stable@vger.kernel.org # v6.5+ Signed-off-by: Naveen N Rao Signed-off-by: Michael Ellerman Link: https://msgid.link/20231130065947.2188860-1-naveen@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/trace/ftrace_entry.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/trace/ftrace_entry.S b/arch/powerpc/kernel/trace/ftrace_entry.S index 90701885762c..40677416d7b2 100644 --- a/arch/powerpc/kernel/trace/ftrace_entry.S +++ b/arch/powerpc/kernel/trace/ftrace_entry.S @@ -62,7 +62,7 @@ .endif /* Save previous stack pointer (r1) */ - addi r8, r1, SWITCH_FRAME_SIZE + addi r8, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE PPC_STL r8, GPR1(r1) .if \allregs == 1 @@ -182,7 +182,7 @@ ftrace_no_trace: mflr r3 mtctr r3 REST_GPR(3, r1) - addi r1, r1, SWITCH_FRAME_SIZE + addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE mtlr r0 bctr #endif From 9a3e02063016f12287dde3077b787586220b3be6 Mon Sep 17 00:00:00 2001 From: Roy Luo Date: Tue, 28 Nov 2023 22:17:56 +0000 Subject: [PATCH 1003/1397] USB: gadget: core: adjust uevent timing on gadget unbind commit 73ea73affe8622bdf292de898da869d441da6a9d upstream. The KOBJ_CHANGE uevent is sent before gadget unbind is actually executed, resulting in inaccurate uevent emitted at incorrect timing (the uevent would have USB_UDC_DRIVER variable set while it would soon be removed). Move the KOBJ_CHANGE uevent to the end of the unbind function so that uevent is sent only after the change has been made. Fixes: 2ccea03a8f7e ("usb: gadget: introduce UDC Class") Cc: stable@vger.kernel.org Signed-off-by: Roy Luo Link: https://lore.kernel.org/r/20231128221756.2591158-1-royluo@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index 7166d1117742..781c8338546f 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -1635,8 +1635,6 @@ static void gadget_unbind_driver(struct device *dev) dev_dbg(&udc->dev, "unbinding gadget driver [%s]\n", driver->function); - kobject_uevent(&udc->dev.kobj, KOBJ_CHANGE); - udc->allow_connect = false; cancel_work_sync(&udc->vbus_work); mutex_lock(&udc->connect_lock); @@ -1656,6 +1654,8 @@ static void gadget_unbind_driver(struct device *dev) driver->is_bound = false; udc->driver = NULL; mutex_unlock(&udc_lock); + + kobject_uevent(&udc->dev.kobj, KOBJ_CHANGE); } /* ------------------------------------------------------------------------- */ From 63c80f574a8ee63739592494cb19884c31785445 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 1 Dec 2023 00:22:00 +0000 Subject: [PATCH 1004/1397] cifs: Fix flushing, invalidation and file size with copy_file_range() commit 7b2404a886f8b91250c31855d287e632123e1746 upstream. Fix a number of issues in the cifs filesystem implementation of the copy_file_range() syscall in cifs_file_copychunk_range(). Firstly, the invalidation of the destination range is handled incorrectly: We shouldn't just invalidate the whole file as dirty data in the file may get lost and we can't just call truncate_inode_pages_range() to invalidate the destination range as that will erase parts of a partial folio at each end whilst invalidating and discarding all the folios in the middle. We need to force all the folios covering the range to be reloaded, but we mustn't lose dirty data in them that's not in the destination range. Further, we shouldn't simply round out the range to PAGE_SIZE at each end as cifs should move to support multipage folios. Secondly, there's an issue whereby a write may have extended the file locally, but not have been written back yet. This can leaves the local idea of the EOF at a later point than the server's EOF. If a copy request is issued, this will fail on the server with STATUS_INVALID_VIEW_SIZE (which gets translated to -EIO locally) if the copy source extends past the server's EOF. Fix this by: (0) Flush the source region (already done). The flush does nothing and the EOF isn't moved if the source region has no dirty data. (1) Move the EOF to the end of the source region if it isn't already at least at this point. If we can't do this, for instance if the server doesn't support it, just flush the entire source file. (2) Find the folio (if present) at each end of the range, flushing it and increasing the region-to-be-invalidated to cover those in their entirety. (3) Fully discard all the folios covering the range as we want them to be reloaded. (4) Then perform the copy. Thirdly, set i_size after doing the copychunk_range operation as this value may be used by various things internally. stat() hides the issue because setting ->time to 0 causes cifs_getatr() to revalidate the attributes. These were causing the generic/075 xfstest to fail. Fixes: 620d8745b35d ("Introduce cifs_copy_file_range()") Cc: stable@vger.kernel.org Signed-off-by: David Howells cc: Paulo Alcantara cc: Shyam Prasad N cc: Rohith Surabattula cc: Matthew Wilcox cc: Jeff Layton cc: linux-cifs@vger.kernel.org cc: linux-mm@kvack.org Signed-off-by: David Howells Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/cifsfs.c | 102 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 99 insertions(+), 3 deletions(-) diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 99497f1b479b..56c1826fcc2f 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -1196,6 +1196,72 @@ const struct inode_operations cifs_symlink_inode_ops = { .listxattr = cifs_listxattr, }; +/* + * Advance the EOF marker to after the source range. + */ +static int cifs_precopy_set_eof(struct inode *src_inode, struct cifsInodeInfo *src_cifsi, + struct cifs_tcon *src_tcon, + unsigned int xid, loff_t src_end) +{ + struct cifsFileInfo *writeable_srcfile; + int rc = -EINVAL; + + writeable_srcfile = find_writable_file(src_cifsi, FIND_WR_FSUID_ONLY); + if (writeable_srcfile) { + if (src_tcon->ses->server->ops->set_file_size) + rc = src_tcon->ses->server->ops->set_file_size( + xid, src_tcon, writeable_srcfile, + src_inode->i_size, true /* no need to set sparse */); + else + rc = -ENOSYS; + cifsFileInfo_put(writeable_srcfile); + cifs_dbg(FYI, "SetFSize for copychunk rc = %d\n", rc); + } + + if (rc < 0) + goto set_failed; + + netfs_resize_file(&src_cifsi->netfs, src_end); + fscache_resize_cookie(cifs_inode_cookie(src_inode), src_end); + return 0; + +set_failed: + return filemap_write_and_wait(src_inode->i_mapping); +} + +/* + * Flush out either the folio that overlaps the beginning of a range in which + * pos resides or the folio that overlaps the end of a range unless that folio + * is entirely within the range we're going to invalidate. We extend the flush + * bounds to encompass the folio. + */ +static int cifs_flush_folio(struct inode *inode, loff_t pos, loff_t *_fstart, loff_t *_fend, + bool first) +{ + struct folio *folio; + unsigned long long fpos, fend; + pgoff_t index = pos / PAGE_SIZE; + size_t size; + int rc = 0; + + folio = filemap_get_folio(inode->i_mapping, index); + if (IS_ERR(folio)) + return 0; + + size = folio_size(folio); + fpos = folio_pos(folio); + fend = fpos + size - 1; + *_fstart = min_t(unsigned long long, *_fstart, fpos); + *_fend = max_t(unsigned long long, *_fend, fend); + if ((first && pos == fpos) || (!first && pos == fend)) + goto out; + + rc = filemap_write_and_wait_range(inode->i_mapping, fpos, fend); +out: + folio_put(folio); + return rc; +} + static loff_t cifs_remap_file_range(struct file *src_file, loff_t off, struct file *dst_file, loff_t destoff, loff_t len, unsigned int remap_flags) @@ -1265,10 +1331,12 @@ ssize_t cifs_file_copychunk_range(unsigned int xid, { struct inode *src_inode = file_inode(src_file); struct inode *target_inode = file_inode(dst_file); + struct cifsInodeInfo *src_cifsi = CIFS_I(src_inode); struct cifsFileInfo *smb_file_src; struct cifsFileInfo *smb_file_target; struct cifs_tcon *src_tcon; struct cifs_tcon *target_tcon; + unsigned long long destend, fstart, fend; ssize_t rc; cifs_dbg(FYI, "copychunk range\n"); @@ -1308,13 +1376,41 @@ ssize_t cifs_file_copychunk_range(unsigned int xid, if (rc) goto unlock; - /* should we flush first and last page first */ - truncate_inode_pages(&target_inode->i_data, 0); + /* The server-side copy will fail if the source crosses the EOF marker. + * Advance the EOF marker after the flush above to the end of the range + * if it's short of that. + */ + if (src_cifsi->server_eof < off + len) { + rc = cifs_precopy_set_eof(src_inode, src_cifsi, src_tcon, xid, off + len); + if (rc < 0) + goto unlock; + } + + destend = destoff + len - 1; + + /* Flush the folios at either end of the destination range to prevent + * accidental loss of dirty data outside of the range. + */ + fstart = destoff; + fend = destend; + + rc = cifs_flush_folio(target_inode, destoff, &fstart, &fend, true); + if (rc) + goto unlock; + rc = cifs_flush_folio(target_inode, destend, &fstart, &fend, false); + if (rc) + goto unlock; + + /* Discard all the folios that overlap the destination region. */ + truncate_inode_pages_range(&target_inode->i_data, fstart, fend); rc = file_modified(dst_file); - if (!rc) + if (!rc) { rc = target_tcon->ses->server->ops->copychunk_range(xid, smb_file_src, smb_file_target, off, len, destoff); + if (rc > 0 && destoff + rc > i_size_read(target_inode)) + truncate_setsize(target_inode, destoff + rc); + } file_accessed(src_file); From 43801359c8971cc49b4a4db5015fba77fbd09c32 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 1 Dec 2023 00:22:01 +0000 Subject: [PATCH 1005/1397] cifs: Fix flushing, invalidation and file size with FICLONE commit c54fc3a4f375663f2361a9cbb2955fb4ef912879 upstream. Fix a number of issues in the cifs filesystem implementation of the FICLONE ioctl in cifs_remap_file_range(). This is analogous to the previously fixed bug in cifs_file_copychunk_range() and can share the helper functions. Firstly, the invalidation of the destination range is handled incorrectly: We shouldn't just invalidate the whole file as dirty data in the file may get lost and we can't just call truncate_inode_pages_range() to invalidate the destination range as that will erase parts of a partial folio at each end whilst invalidating and discarding all the folios in the middle. We need to force all the folios covering the range to be reloaded, but we mustn't lose dirty data in them that's not in the destination range. Further, we shouldn't simply round out the range to PAGE_SIZE at each end as cifs should move to support multipage folios. Secondly, there's an issue whereby a write may have extended the file locally, but not have been written back yet. This can leaves the local idea of the EOF at a later point than the server's EOF. If a clone request is issued, this will fail on the server with STATUS_INVALID_VIEW_SIZE (which gets translated to -EIO locally) if the clone source extends past the server's EOF. Fix this by: (0) Flush the source region (already done). The flush does nothing and the EOF isn't moved if the source region has no dirty data. (1) Move the EOF to the end of the source region if it isn't already at least at this point. If we can't do this, for instance if the server doesn't support it, just flush the entire source file. (2) Find the folio (if present) at each end of the range, flushing it and increasing the region-to-be-invalidated to cover those in their entirety. (3) Fully discard all the folios covering the range as we want them to be reloaded. (4) Then perform the extent duplication. Thirdly, set i_size after doing the duplicate_extents operation as this value may be used by various things internally. stat() hides the issue because setting ->time to 0 causes cifs_getatr() to revalidate the attributes. These were causing the cifs/001 xfstest to fail. Fixes: 04b38d601239 ("vfs: pull btrfs clone API to vfs layer") Signed-off-by: David Howells Cc: stable@vger.kernel.org cc: Christoph Hellwig cc: Paulo Alcantara cc: Shyam Prasad N cc: Rohith Surabattula cc: Matthew Wilcox cc: Jeff Layton cc: linux-cifs@vger.kernel.org cc: linux-mm@kvack.org Signed-off-by: David Howells Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/cifsfs.c | 68 +++++++++++++++++++++++++++++++++++------- 1 file changed, 57 insertions(+), 11 deletions(-) diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 56c1826fcc2f..2131638f26d0 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -1268,9 +1268,12 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off, { struct inode *src_inode = file_inode(src_file); struct inode *target_inode = file_inode(dst_file); + struct cifsInodeInfo *src_cifsi = CIFS_I(src_inode); + struct cifsInodeInfo *target_cifsi = CIFS_I(target_inode); struct cifsFileInfo *smb_file_src = src_file->private_data; - struct cifsFileInfo *smb_file_target; - struct cifs_tcon *target_tcon; + struct cifsFileInfo *smb_file_target = dst_file->private_data; + struct cifs_tcon *target_tcon, *src_tcon; + unsigned long long destend, fstart, fend, new_size; unsigned int xid; int rc; @@ -1283,13 +1286,13 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off, xid = get_xid(); - if (!src_file->private_data || !dst_file->private_data) { + if (!smb_file_src || !smb_file_target) { rc = -EBADF; cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n"); goto out; } - smb_file_target = dst_file->private_data; + src_tcon = tlink_tcon(smb_file_src->tlink); target_tcon = tlink_tcon(smb_file_target->tlink); /* @@ -1302,20 +1305,63 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off, if (len == 0) len = src_inode->i_size - off; - cifs_dbg(FYI, "about to flush pages\n"); - /* should we flush first and last page first */ - truncate_inode_pages_range(&target_inode->i_data, destoff, - PAGE_ALIGN(destoff + len)-1); + cifs_dbg(FYI, "clone range\n"); + + /* Flush the source buffer */ + rc = filemap_write_and_wait_range(src_inode->i_mapping, off, + off + len - 1); + if (rc) + goto unlock; + + /* The server-side copy will fail if the source crosses the EOF marker. + * Advance the EOF marker after the flush above to the end of the range + * if it's short of that. + */ + if (src_cifsi->netfs.remote_i_size < off + len) { + rc = cifs_precopy_set_eof(src_inode, src_cifsi, src_tcon, xid, off + len); + if (rc < 0) + goto unlock; + } + + new_size = destoff + len; + destend = destoff + len - 1; - if (target_tcon->ses->server->ops->duplicate_extents) + /* Flush the folios at either end of the destination range to prevent + * accidental loss of dirty data outside of the range. + */ + fstart = destoff; + fend = destend; + + rc = cifs_flush_folio(target_inode, destoff, &fstart, &fend, true); + if (rc) + goto unlock; + rc = cifs_flush_folio(target_inode, destend, &fstart, &fend, false); + if (rc) + goto unlock; + + /* Discard all the folios that overlap the destination region. */ + cifs_dbg(FYI, "about to discard pages %llx-%llx\n", fstart, fend); + truncate_inode_pages_range(&target_inode->i_data, fstart, fend); + + fscache_invalidate(cifs_inode_cookie(target_inode), NULL, + i_size_read(target_inode), 0); + + rc = -EOPNOTSUPP; + if (target_tcon->ses->server->ops->duplicate_extents) { rc = target_tcon->ses->server->ops->duplicate_extents(xid, smb_file_src, smb_file_target, off, len, destoff); - else - rc = -EOPNOTSUPP; + if (rc == 0 && new_size > i_size_read(target_inode)) { + truncate_setsize(target_inode, new_size); + netfs_resize_file(&target_cifsi->netfs, new_size); + fscache_resize_cookie(cifs_inode_cookie(target_inode), + new_size); + } + } /* force revalidate of size and timestamps of target file now that target is updated on the server */ CIFS_I(target_inode)->time = 0; +unlock: /* although unlocking in the reverse order from locking is not strictly necessary here it is a little cleaner to be consistent */ unlock_two_nondirectories(src_inode, target_inode); From 6d9cbae4c032451d41c262cbf8b90018af0dbda0 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Thu, 30 Nov 2023 17:36:01 +0100 Subject: [PATCH 1006/1397] MIPS: kernel: Clear FPU states when setting up kernel threads commit a58a173444a68412bb08849bd81c679395f20ca0 upstream. io_uring sets up the io worker kernel thread via a syscall out of an user space prrocess. This process might have used FPU and since copy_thread() didn't clear FPU states for kernel threads a BUG() is triggered for using FPU inside kernel. Move code around to always clear FPU state for user and kernel threads. Cc: stable@vger.kernel.org Reported-by: Aurelien Jarno Closes: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1055021 Suggested-by: Jiaxun Yang Reviewed-by: Jiaxun Yang Signed-off-by: Thomas Bogendoerfer Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/process.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 5387ed0a5186..b630604c577f 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -121,6 +121,19 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) /* Put the stack after the struct pt_regs. */ childksp = (unsigned long) childregs; p->thread.cp0_status = (read_c0_status() & ~(ST0_CU2|ST0_CU1)) | ST0_KERNEL_CUMASK; + + /* + * New tasks lose permission to use the fpu. This accelerates context + * switching for most programs since they don't use the fpu. + */ + clear_tsk_thread_flag(p, TIF_USEDFPU); + clear_tsk_thread_flag(p, TIF_USEDMSA); + clear_tsk_thread_flag(p, TIF_MSA_CTX_LIVE); + +#ifdef CONFIG_MIPS_MT_FPAFF + clear_tsk_thread_flag(p, TIF_FPUBOUND); +#endif /* CONFIG_MIPS_MT_FPAFF */ + if (unlikely(args->fn)) { /* kernel thread */ unsigned long status = p->thread.cp0_status; @@ -149,20 +162,8 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) p->thread.reg29 = (unsigned long) childregs; p->thread.reg31 = (unsigned long) ret_from_fork; - /* - * New tasks lose permission to use the fpu. This accelerates context - * switching for most programs since they don't use the fpu. - */ childregs->cp0_status &= ~(ST0_CU2|ST0_CU1); - clear_tsk_thread_flag(p, TIF_USEDFPU); - clear_tsk_thread_flag(p, TIF_USEDMSA); - clear_tsk_thread_flag(p, TIF_MSA_CTX_LIVE); - -#ifdef CONFIG_MIPS_MT_FPAFF - clear_tsk_thread_flag(p, TIF_FPUBOUND); -#endif /* CONFIG_MIPS_MT_FPAFF */ - #ifdef CONFIG_MIPS_FP_SUPPORT atomic_set(&p->thread.bd_emu_frame, BD_EMUFRAME_NONE); #endif From bb356cc6615c8d924f4ac8ef1bc9642d861a584b Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Thu, 9 Nov 2023 13:36:24 +0100 Subject: [PATCH 1007/1397] KVM: s390/mm: Properly reset no-dat commit 27072b8e18a73ffeffb1c140939023915a35134b upstream. When the CMMA state needs to be reset, the no-dat bit also needs to be reset. Failure to do so could cause issues in the guest, since the guest expects the bit to be cleared after a reset. Cc: Reviewed-by: Nico Boehr Message-ID: <20231109123624.37314-1-imbrenda@linux.ibm.com> Signed-off-by: Claudio Imbrenda Signed-off-by: Greg Kroah-Hartman --- arch/s390/mm/pgtable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 3bd2ab2a9a34..5cb92941540b 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -756,7 +756,7 @@ void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, pte_clear(mm, addr, ptep); } if (reset) - pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK; + pgste_val(pgste) &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT); pgste_set_unlock(ptep, pgste); preempt_enable(); } From dea8cfe773b967466590568da0f020ccf41453f8 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 7 May 2021 09:59:46 -0700 Subject: [PATCH 1008/1397] KVM: SVM: Update EFER software model on CR0 trap for SEV-ES commit 4cdf351d3630a640ab6a05721ef055b9df62277f upstream. In general, activating long mode involves setting the EFER_LME bit in the EFER register and then enabling the X86_CR0_PG bit in the CR0 register. At this point, the EFER_LMA bit will be set automatically by hardware. In the case of SVM/SEV guests where writes to CR0 are intercepted, it's necessary for the host to set EFER_LMA on behalf of the guest since hardware does not see the actual CR0 write. In the case of SEV-ES guests where writes to CR0 are trapped instead of intercepted, the hardware *does* see/record the write to CR0 before exiting and passing the value on to the host, so as part of enabling SEV-ES support commit f1c6366e3043 ("KVM: SVM: Add required changes to support intercepts under SEV-ES") dropped special handling of the EFER_LMA bit with the understanding that it would be set automatically. However, since the guest never explicitly sets the EFER_LMA bit, the host never becomes aware that it has been set. This becomes problematic when userspace tries to get/set the EFER values via KVM_GET_SREGS/KVM_SET_SREGS, since the EFER contents tracked by the host will be missing the EFER_LMA bit, and when userspace attempts to pass the EFER value back via KVM_SET_SREGS it will fail a sanity check that asserts that EFER_LMA should always be set when X86_CR0_PG and EFER_LME are set. Fix this by always inferring the value of EFER_LMA based on X86_CR0_PG and EFER_LME, regardless of whether or not SEV-ES is enabled. Fixes: f1c6366e3043 ("KVM: SVM: Add required changes to support intercepts under SEV-ES") Reported-by: Peter Gonda Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20210507165947.2502412-2-seanjc@google.com> [A two year old patch that was revived after we noticed the failure in KVM_SET_SREGS and a similar patch was posted by Michael Roth. This is Sean's patch, but with Michael's more complete commit message. - Paolo] Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm/svm.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index beea99c8e8e0..77f1eeefcd34 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1873,15 +1873,17 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) bool old_paging = is_paging(vcpu); #ifdef CONFIG_X86_64 - if (vcpu->arch.efer & EFER_LME && !vcpu->arch.guest_state_protected) { + if (vcpu->arch.efer & EFER_LME) { if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { vcpu->arch.efer |= EFER_LMA; - svm->vmcb->save.efer |= EFER_LMA | EFER_LME; + if (!vcpu->arch.guest_state_protected) + svm->vmcb->save.efer |= EFER_LMA | EFER_LME; } if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) { vcpu->arch.efer &= ~EFER_LMA; - svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME); + if (!vcpu->arch.guest_state_protected) + svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME); } } #endif From 1f4d4d2cd6b5ac6bb2d97c8f8bc0f366579ee692 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 29 Nov 2023 13:34:26 -0800 Subject: [PATCH 1009/1397] perf list: Fix JSON segfault by setting the used skip_duplicate_pmus callback commit b1693747487442984050eb0f462b83a3a8307525 upstream. Json output didn't set the skip_duplicate_pmus callback yielding a segfault. Fixes: cd4e1efbbc40 ("perf pmus: Skip duplicate PMUs and don't print list suffix by default") Signed-off-by: Ian Rogers Cc: James Clark Cc: Kan Liang Cc: Athira Rajeev Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231129213428.2227448-2-irogers@google.com [namhyung: updated subject line according to Arnaldo] Signed-off-by: Namhyung Kim Signed-off-by: Greg Kroah-Hartman --- tools/perf/builtin-list.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index a343823c8ddf..61c2c96cc070 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -434,6 +434,11 @@ static void json_print_metric(void *ps __maybe_unused, const char *group, strbuf_release(&buf); } +static bool json_skip_duplicate_pmus(void *ps __maybe_unused) +{ + return false; +} + static bool default_skip_duplicate_pmus(void *ps) { struct print_state *print_state = ps; @@ -503,6 +508,7 @@ int cmd_list(int argc, const char **argv) .print_end = json_print_end, .print_event = json_print_event, .print_metric = json_print_metric, + .skip_duplicate_pmus = json_skip_duplicate_pmus, }; ps = &json_ps; } else { From 114d0e5b99376a489067d1fa3e23167f6e6cfb12 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 4 Dec 2023 10:23:29 -0800 Subject: [PATCH 1010/1397] perf metrics: Avoid segv if default metricgroup isn't set commit e2b005d6ec0e738df584190e21d2c7ada37266a0 upstream. A metric is default by having "Default" within its groups. The default metricgroup name needn't be set and this can result in segv in default_metricgroup_cmp and perf_stat__print_shadow_stats_metricgroup that assume it has a value when there is a Default metric group. To avoid the segv initialize the value to "". Fixes: 1c0e47956a8e ("perf metrics: Sort the Default metricgroup") Signed-off-by: Ian Rogers Reviewed-and-tested-by: Ilkka Koskinen Cc: James Clark Cc: Will Deacon Cc: Leo Yan Cc: Mike Leach Cc: Kajol Jain Cc: Kan Liang Cc: John Garry Cc: stable@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20231204182330.654255-1-irogers@google.com Signed-off-by: Namhyung Kim Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/metricgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 6231044a491e..bb5faaa25d51 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -225,7 +225,7 @@ static struct metric *metric__new(const struct pmu_metric *pm, m->pmu = pm->pmu ?: "cpu"; m->metric_name = pm->metric_name; - m->default_metricgroup_name = pm->default_metricgroup_name; + m->default_metricgroup_name = pm->default_metricgroup_name ?: ""; m->modifier = NULL; if (modifier) { m->modifier = strdup(modifier); From ae07e7d338c474cb6daf798618b33b6a346a88a0 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Tue, 7 Nov 2023 11:15:18 +0000 Subject: [PATCH 1011/1397] MIPS: Loongson64: Reserve vgabios memory on boot commit 8f7aa77a463f47c9e00592d02747a9fcf2271543 upstream. vgabios is passed from firmware to kernel on Loongson64 systems. Sane firmware will keep this pointer in reserved memory space passed from the firmware but insane firmware keeps it in low memory before kernel entry that is not reserved. Previously kernel won't try to allocate memory from low memory before kernel entry on boot, but after converting to memblock it will do that. Fix by resversing those memory on early boot. Cc: stable@vger.kernel.org Fixes: a94e4f24ec83 ("MIPS: init: Drop boot_mem_map") Signed-off-by: Jiaxun Yang Signed-off-by: Thomas Bogendoerfer Signed-off-by: Greg Kroah-Hartman --- arch/mips/loongson64/init.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/mips/loongson64/init.c b/arch/mips/loongson64/init.c index ee8de1735b7c..d62262f93069 100644 --- a/arch/mips/loongson64/init.c +++ b/arch/mips/loongson64/init.c @@ -88,6 +88,11 @@ void __init szmem(unsigned int node) break; } } + + /* Reserve vgabios if it comes from firmware */ + if (loongson_sysconf.vgabios_addr) + memblock_reserve(virt_to_phys((void *)loongson_sysconf.vgabios_addr), + SZ_256K); } #ifndef CONFIG_NUMA From 264927e3538169fe2973a5732f03ea01b0f9f9e8 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Tue, 7 Nov 2023 11:15:20 +0000 Subject: [PATCH 1012/1397] MIPS: Loongson64: Handle more memory types passed from firmware commit c7206e7bd214ebb3ca6fa474a4423662327d9beb upstream. There are many types of revsered memory passed from firmware that should be reserved in memblock, and UMA memory passed from firmware that should be added to system memory for system to use. Also for memblock there is no need to align those space into page, which actually cause problems. Handle them properly to prevent memory corruption on some systems. Cc: stable@vger.kernel.org Signed-off-by: Jiaxun Yang Signed-off-by: Thomas Bogendoerfer Signed-off-by: Greg Kroah-Hartman --- .../include/asm/mach-loongson64/boot_param.h | 6 ++- arch/mips/loongson64/init.c | 42 ++++++++++++------- 2 files changed, 31 insertions(+), 17 deletions(-) diff --git a/arch/mips/include/asm/mach-loongson64/boot_param.h b/arch/mips/include/asm/mach-loongson64/boot_param.h index 035b1a69e2d0..0f67e7efc509 100644 --- a/arch/mips/include/asm/mach-loongson64/boot_param.h +++ b/arch/mips/include/asm/mach-loongson64/boot_param.h @@ -14,7 +14,11 @@ #define ADAPTER_ROM 8 #define ACPI_TABLE 9 #define SMBIOS_TABLE 10 -#define MAX_MEMORY_TYPE 11 +#define UMA_VIDEO_RAM 11 +#define VUMA_VIDEO_RAM 12 +#define MAX_MEMORY_TYPE 13 + +#define MEM_SIZE_IS_IN_BYTES (1 << 31) #define LOONGSON3_BOOT_MEM_MAP_MAX 128 struct efi_memory_map_loongson { diff --git a/arch/mips/loongson64/init.c b/arch/mips/loongson64/init.c index d62262f93069..f25caa6aa9d3 100644 --- a/arch/mips/loongson64/init.c +++ b/arch/mips/loongson64/init.c @@ -49,8 +49,7 @@ void virtual_early_config(void) void __init szmem(unsigned int node) { u32 i, mem_type; - static unsigned long num_physpages; - u64 node_id, node_psize, start_pfn, end_pfn, mem_start, mem_size; + phys_addr_t node_id, mem_start, mem_size; /* Otherwise come from DTB */ if (loongson_sysconf.fw_interface != LOONGSON_LEFI) @@ -64,27 +63,38 @@ void __init szmem(unsigned int node) mem_type = loongson_memmap->map[i].mem_type; mem_size = loongson_memmap->map[i].mem_size; - mem_start = loongson_memmap->map[i].mem_start; + + /* Memory size comes in MB if MEM_SIZE_IS_IN_BYTES not set */ + if (mem_size & MEM_SIZE_IS_IN_BYTES) + mem_size &= ~MEM_SIZE_IS_IN_BYTES; + else + mem_size = mem_size << 20; + + mem_start = (node_id << 44) | loongson_memmap->map[i].mem_start; switch (mem_type) { case SYSTEM_RAM_LOW: case SYSTEM_RAM_HIGH: - start_pfn = ((node_id << 44) + mem_start) >> PAGE_SHIFT; - node_psize = (mem_size << 20) >> PAGE_SHIFT; - end_pfn = start_pfn + node_psize; - num_physpages += node_psize; - pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx MB\n", - (u32)node_id, mem_type, mem_start, mem_size); - pr_info(" start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n", - start_pfn, end_pfn, num_physpages); - memblock_add_node(PFN_PHYS(start_pfn), - PFN_PHYS(node_psize), node, + case UMA_VIDEO_RAM: + pr_info("Node %d, mem_type:%d\t[%pa], %pa bytes usable\n", + (u32)node_id, mem_type, &mem_start, &mem_size); + memblock_add_node(mem_start, mem_size, node, MEMBLOCK_NONE); break; case SYSTEM_RAM_RESERVED: - pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx MB\n", - (u32)node_id, mem_type, mem_start, mem_size); - memblock_reserve(((node_id << 44) + mem_start), mem_size << 20); + case VIDEO_ROM: + case ADAPTER_ROM: + case ACPI_TABLE: + case SMBIOS_TABLE: + pr_info("Node %d, mem_type:%d\t[%pa], %pa bytes reserved\n", + (u32)node_id, mem_type, &mem_start, &mem_size); + memblock_reserve(mem_start, mem_size); + break; + /* We should not reserve VUMA_VIDEO_RAM as it overlaps with MMIO */ + case VUMA_VIDEO_RAM: + default: + pr_info("Node %d, mem_type:%d\t[%pa], %pa bytes unhandled\n", + (u32)node_id, mem_type, &mem_start, &mem_size); break; } } From 1eed445d53640d614efc7c37c9b354bd6f471cd3 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Tue, 7 Nov 2023 11:15:19 +0000 Subject: [PATCH 1013/1397] MIPS: Loongson64: Enable DMA noncoherent support commit edc0378eee00200a5bedf1bb9f00ad390e0d1bd4 upstream. There are some Loongson64 systems come with broken coherent DMA support, firmware will set a bit in boot_param and pass nocoherentio in cmdline. However nonconherent support was missed out when spin off Loongson-2EF form Loongson64, and that boot_param change never made itself into upstream. Support DMA noncoherent properly to get those systems working. Cc: stable@vger.kernel.org Fixes: 71e2f4dd5a65 ("MIPS: Fork loongson2ef from loongson64") Signed-off-by: Jiaxun Yang Signed-off-by: Thomas Bogendoerfer Signed-off-by: Greg Kroah-Hartman --- arch/mips/Kconfig | 2 ++ arch/mips/include/asm/mach-loongson64/boot_param.h | 3 ++- arch/mips/loongson64/env.c | 10 +++++++++- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index bc8421859006..91c3a502156b 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -482,6 +482,7 @@ config MACH_LOONGSON2EF config MACH_LOONGSON64 bool "Loongson 64-bit family of machines" + select ARCH_DMA_DEFAULT_COHERENT select ARCH_SPARSEMEM_ENABLE select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO @@ -1273,6 +1274,7 @@ config CPU_LOONGSON64 select CPU_SUPPORTS_MSA select CPU_DIEI_BROKEN if !LOONGSON3_ENHANCEMENT select CPU_MIPSR2_IRQ_VI + select DMA_NONCOHERENT select WEAK_ORDERING select WEAK_REORDERING_BEYOND_LLSC select MIPS_ASID_BITS_VARIABLE diff --git a/arch/mips/include/asm/mach-loongson64/boot_param.h b/arch/mips/include/asm/mach-loongson64/boot_param.h index 0f67e7efc509..e007edd6b60a 100644 --- a/arch/mips/include/asm/mach-loongson64/boot_param.h +++ b/arch/mips/include/asm/mach-loongson64/boot_param.h @@ -121,7 +121,8 @@ struct irq_source_routing_table { u64 pci_io_start_addr; u64 pci_io_end_addr; u64 pci_config_addr; - u32 dma_mask_bits; + u16 dma_mask_bits; + u16 dma_noncoherent; } __packed; struct interface_info { diff --git a/arch/mips/loongson64/env.c b/arch/mips/loongson64/env.c index c961e2999f15..ef3750a6ffac 100644 --- a/arch/mips/loongson64/env.c +++ b/arch/mips/loongson64/env.c @@ -13,6 +13,8 @@ * Copyright (C) 2009 Lemote Inc. * Author: Wu Zhangjin, wuzhangjin@gmail.com */ + +#include #include #include #include @@ -147,8 +149,14 @@ void __init prom_lefi_init_env(void) loongson_sysconf.dma_mask_bits = eirq_source->dma_mask_bits; if (loongson_sysconf.dma_mask_bits < 32 || - loongson_sysconf.dma_mask_bits > 64) + loongson_sysconf.dma_mask_bits > 64) { loongson_sysconf.dma_mask_bits = 32; + dma_default_coherent = true; + } else { + dma_default_coherent = !eirq_source->dma_noncoherent; + } + + pr_info("Firmware: Coherent DMA: %s\n", dma_default_coherent ? "on" : "off"); loongson_sysconf.restart_addr = boot_p->reset_system.ResetWarm; loongson_sysconf.poweroff_addr = boot_p->reset_system.Shutdown; From c66d39f18e0ef16bac8c8491b657051f531a1eba Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Dec 2023 15:47:13 +0100 Subject: [PATCH 1014/1397] netfilter: nft_set_pipapo: skip inactive elements during set walk commit 317eb9685095678f2c9f5a8189de698c5354316a upstream. Otherwise set elements can be deactivated twice which will cause a crash. Reported-by: Xingyuan Mo Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nft_set_pipapo.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index c0dcc40de358..3ff31043f714 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -2041,6 +2041,9 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set, e = f->mt[r].e; + if (!nft_set_elem_active(&e->ext, iter->genmask)) + goto cont; + elem.priv = e; iter->err = iter->fn(ctx, set, iter, &elem); From ba6e4b997181497e51419fad2b73d229bffa1787 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Mon, 4 Dec 2023 12:47:36 +0000 Subject: [PATCH 1015/1397] ASoC: qcom: sc8280xp: Limit speaker digital volumes commit 716d4e5373e9d1ae993485ab2e3b893bf7104fb1 upstream. Limit the speaker digital gains to 0dB so that the users will not damage them. Currently there is a limit in UCM, but this does not stop the user form changing the digital gains from command line. So limit this in driver which makes the speakers more safer without active speaker protection in place. Signed-off-by: Srinivas Kandagatla Reviewed-by: Johan Hovold Tested-by: Johan Hovold Link: https://lore.kernel.org/r/20231204124736.132185-3-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown [ johan: backport to 6.6; rename snd_soc_rtd_to_cpu() ] Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- sound/soc/qcom/sc8280xp.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/sound/soc/qcom/sc8280xp.c b/sound/soc/qcom/sc8280xp.c index 14d9fea33d16..c61cad0180c8 100644 --- a/sound/soc/qcom/sc8280xp.c +++ b/sound/soc/qcom/sc8280xp.c @@ -27,6 +27,23 @@ struct sc8280xp_snd_data { static int sc8280xp_snd_init(struct snd_soc_pcm_runtime *rtd) { struct sc8280xp_snd_data *data = snd_soc_card_get_drvdata(rtd->card); + struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0); + struct snd_soc_card *card = rtd->card; + + switch (cpu_dai->id) { + case WSA_CODEC_DMA_RX_0: + case WSA_CODEC_DMA_RX_1: + /* + * set limit of 0dB on Digital Volume for Speakers, + * this can prevent damage of speakers to some extent without + * active speaker protection + */ + snd_soc_limit_volume(card, "WSA_RX0 Digital Volume", 84); + snd_soc_limit_volume(card, "WSA_RX1 Digital Volume", 84); + break; + default: + break; + } return qcom_snd_wcd_jack_setup(rtd, &data->jack, &data->jack_setup); } From b79210fa10db4cab09b19965f6cc47bc393e47e9 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sat, 25 Nov 2023 15:49:12 -0600 Subject: [PATCH 1016/1397] gcc-plugins: randstruct: Update code comment in relayout_struct() commit d71f22365a9caca82d424f3a33445de46567e198 upstream. Update code comment to clarify that the only element whose layout is not randomized is a proper C99 flexible-array member. This update is complementary to commit 1ee60356c2dc ("gcc-plugins: randstruct: Only warn about true flexible arrays") Signed-off-by: "Gustavo A. R. Silva" Link: https://lore.kernel.org/r/ZWJr2MWDjXLHE8ap@work Fixes: 1ee60356c2dc ("gcc-plugins: randstruct: Only warn about true flexible arrays") Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- scripts/gcc-plugins/randomize_layout_plugin.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/gcc-plugins/randomize_layout_plugin.c b/scripts/gcc-plugins/randomize_layout_plugin.c index 910bd21d08f4..746ff2d272f2 100644 --- a/scripts/gcc-plugins/randomize_layout_plugin.c +++ b/scripts/gcc-plugins/randomize_layout_plugin.c @@ -339,8 +339,7 @@ static int relayout_struct(tree type) /* * enforce that we don't randomize the layout of the last - * element of a struct if it's a 0 or 1-length array - * or a proper flexible array + * element of a struct if it's a proper flexible array */ if (is_flexible_array(newtree[num_fields - 1])) { has_flexarray = true; From 48411bc4c3fbb01571a827939f829a0a4aad951c Mon Sep 17 00:00:00 2001 From: Jia Jie Ho Date: Thu, 25 May 2023 14:18:36 +0800 Subject: [PATCH 1017/1397] riscv: Kconfig: Add select ARM_AMBA to SOC_STARFIVE commit 78a03b9f8e6b317f7c65738a3fc60e1e85106a64 upstream. Selects ARM_AMBA platform support for StarFive SoCs required by spi and crypto dma engine. Signed-off-by: Jia Jie Ho Acked-by: Palmer Dabbelt Signed-off-by: Conor Dooley Cc: Nam Cao Signed-off-by: Greg Kroah-Hartman --- arch/riscv/Kconfig.socs | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs index 6833d01e2e70..30fd6a512828 100644 --- a/arch/riscv/Kconfig.socs +++ b/arch/riscv/Kconfig.socs @@ -29,6 +29,7 @@ config SOC_STARFIVE bool "StarFive SoCs" select PINCTRL select RESET_CONTROLLER + select ARM_AMBA help This enables support for StarFive SoC platform hardware. From bebe0c07b806011ae99cfa50e1bf8fda51b9407f Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Wed, 6 Sep 2023 09:21:39 +0530 Subject: [PATCH 1018/1397] drm/amdgpu: Fix refclk reporting for SMU v13.0.6 [ Upstream commit 6b7d211740da2c3a7656be8cbb36f32e6d9c6cbd ] SMU v13.0.6 SOCs have 100MHz reference clock. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Stable-dep-of: 6fce23a4d8c5 ("drm/amdgpu: Restrict extended wait to PSP v13.0.6") Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/soc15.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index f5be40d7ba36..28094cd7d9c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -325,7 +325,8 @@ static u32 soc15_get_xclk(struct amdgpu_device *adev) u32 reference_clock = adev->clock.spll.reference_freq; if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(12, 0, 0) || - adev->ip_versions[MP1_HWIP][0] == IP_VERSION(12, 0, 1)) + adev->ip_versions[MP1_HWIP][0] == IP_VERSION(12, 0, 1) || + adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 6)) return 10000; if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(10, 0, 0) || adev->ip_versions[MP1_HWIP][0] == IP_VERSION(10, 0, 1)) From 9f29fe9b6184f6f6e78dd524a40982a3201004a4 Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Mon, 16 Oct 2023 22:10:34 +0800 Subject: [PATCH 1019/1397] drm/amdgpu: update retry times for psp BL wait [ Upstream commit d8c1925ba8cde2863297728a4c8fbf8fe766757a ] Increase retry time for PSP BL wait, to compensate for longer time to set c2pmsg 35 ready bit during mode1 with RAS Signed-off-by: Asad Kamal Reviewed-by: Hawking Zhang Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher Stable-dep-of: 6fce23a4d8c5 ("drm/amdgpu: Restrict extended wait to PSP v13.0.6") Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 52d80f286b3d..8e4372f24f85 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -168,7 +168,7 @@ static int psp_v13_0_wait_for_bootloader(struct psp_context *psp) * If there is an error in processing command, bits[7:0] will be set. * This is applicable for PSP v13.0.6 and newer. */ - for (retry_loop = 0; retry_loop < 10; retry_loop++) { + for (retry_loop = 0; retry_loop < PSP_VMBX_POLLING_LIMIT; retry_loop++) { ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35), 0x80000000, 0xffffffff, false); From 71a1ffe22d3382f61a6988a273733ac4212335d6 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Wed, 29 Nov 2023 18:06:55 +0530 Subject: [PATCH 1020/1397] drm/amdgpu: Restrict extended wait to PSP v13.0.6 [ Upstream commit 6fce23a4d8c5f93bf80b7f122449fbb97f1e40dd ] Only PSPv13.0.6 SOCs take a longer time to reach steady state. Other PSPv13 based SOCs don't need extended wait. Also, reduce PSPv13.0.6 wait time. Cc: stable@vger.kernel.org Fixes: fc5988907156 ("drm/amdgpu: update retry times for psp vmbx wait") Fixes: d8c1925ba8cd ("drm/amdgpu: update retry times for psp BL wait") Link: https://lore.kernel.org/amd-gfx/34dd4c66-f7bf-44aa-af8f-c82889dd652c@amd.com/ Signed-off-by: Lijo Lazar Reviewed-by: Asad Kamal Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 8e4372f24f85..fe1995ed13be 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -60,7 +60,7 @@ MODULE_FIRMWARE("amdgpu/psp_14_0_0_ta.bin"); #define GFX_CMD_USB_PD_USE_LFB 0x480 /* Retry times for vmbx ready wait */ -#define PSP_VMBX_POLLING_LIMIT 20000 +#define PSP_VMBX_POLLING_LIMIT 3000 /* VBIOS gfl defines */ #define MBOX_READY_MASK 0x80000000 @@ -161,14 +161,18 @@ static int psp_v13_0_wait_for_vmbx_ready(struct psp_context *psp) static int psp_v13_0_wait_for_bootloader(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; - int retry_loop, ret; + int retry_loop, retry_cnt, ret; + retry_cnt = + (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 6)) ? + PSP_VMBX_POLLING_LIMIT : + 10; /* Wait for bootloader to signify that it is ready having bit 31 of * C2PMSG_35 set to 1. All other bits are expected to be cleared. * If there is an error in processing command, bits[7:0] will be set. * This is applicable for PSP v13.0.6 and newer. */ - for (retry_loop = 0; retry_loop < PSP_VMBX_POLLING_LIMIT; retry_loop++) { + for (retry_loop = 0; retry_loop < retry_cnt; retry_loop++) { ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35), 0x80000000, 0xffffffff, false); From ac25535242acb0d0042b920e5232b10e76ad169b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 13 Dec 2023 18:45:36 +0100 Subject: [PATCH 1021/1397] Linux 6.6.7 Link: https://lore.kernel.org/r/20231211182045.784881756@linuxfoundation.org Tested-by: Florian Fainelli Tested-by: Ronald Warsow Tested-by: Takeshi Ogasawara Tested-by: Kelsey Steele Tested-by: Bagas Sanjaya Tested-by: Ron Economos Tested-by: Conor Dooley Tested-by: Linux Kernel Functional Testing Tested-by: Shuah Khan Tested-by: Guenter Roeck Tested-by: Justin M. Forbes Tested-by: Jon Hunter Tested-by: SeongJae Park Tested-by: Ricardo B. Marliere Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1eefa893f048..707952172ece 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 6 -SUBLEVEL = 6 +SUBLEVEL = 7 EXTRAVERSION = NAME = Hurr durr I'ma ninja sloth From da94fb0217e52ef10993135dd67263857b170b63 Mon Sep 17 00:00:00 2001 From: Kelly Kane Date: Sat, 2 Dec 2023 17:17:12 -0800 Subject: [PATCH 1022/1397] r8152: add vendor/device ID pair for ASUS USB-C2500 [ Upstream commit 7037d95a047cd89b1f680eed253c6ab586bef1ed ] The ASUS USB-C2500 is an RTL8156 based 2.5G Ethernet controller. Add the vendor and product ID values to the driver. This makes Ethernet work with the adapter. Signed-off-by: Kelly Kane Link: https://lore.kernel.org/r/20231203011712.6314-1-kelly@hawknetworks.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/usb/r8152.c | 1 + include/linux/usb/r8152.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 7611c406fdb9..127b34dcc5b3 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -10016,6 +10016,7 @@ static const struct usb_device_id rtl8152_table[] = { { USB_DEVICE(VENDOR_ID_NVIDIA, 0x09ff) }, { USB_DEVICE(VENDOR_ID_TPLINK, 0x0601) }, { USB_DEVICE(VENDOR_ID_DLINK, 0xb301) }, + { USB_DEVICE(VENDOR_ID_ASUS, 0x1976) }, {} }; diff --git a/include/linux/usb/r8152.h b/include/linux/usb/r8152.h index 287e9d83fb8b..33a4c146dc19 100644 --- a/include/linux/usb/r8152.h +++ b/include/linux/usb/r8152.h @@ -30,6 +30,7 @@ #define VENDOR_ID_NVIDIA 0x0955 #define VENDOR_ID_TPLINK 0x2357 #define VENDOR_ID_DLINK 0x2001 +#define VENDOR_ID_ASUS 0x0b05 #if IS_REACHABLE(CONFIG_USB_RTL8152) extern u8 rtl8152_get_version(struct usb_interface *intf); From 73dddf9858ff68b6cb6bd3193b1c53b3331a8efb Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 30 Nov 2023 10:56:53 +0100 Subject: [PATCH 1023/1397] ext4: fix warning in ext4_dio_write_end_io() [ Upstream commit 619f75dae2cf117b1d07f27b046b9ffb071c4685 ] The syzbot has reported that it can hit the warning in ext4_dio_write_end_io() because i_size < i_disksize. Indeed the reproducer creates a race between DIO IO completion and truncate expanding the file and thus ext4_dio_write_end_io() sees an inconsistent inode state where i_disksize is already updated but i_size is not updated yet. Since we are careful when setting up DIO write and consider it extending (and thus performing the IO synchronously with i_rwsem held exclusively) whenever it goes past either of i_size or i_disksize, we can use the same test during IO completion without risking entering ext4_handle_inode_extension() without i_rwsem held. This way we make it obvious both i_size and i_disksize are large enough when we report DIO completion without relying on unreliable WARN_ON. Reported-by: Fixes: 91562895f803 ("ext4: properly sync file size update after O_SYNC direct IO") Signed-off-by: Jan Kara Reviewed-by: Ritesh Harjani (IBM) Link: https://lore.kernel.org/r/20231130095653.22679-1-jack@suse.cz Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/file.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 0166bb9ca160..6aa15dafc677 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -349,9 +349,10 @@ static void ext4_inode_extension_cleanup(struct inode *inode, ssize_t count) return; } /* - * If i_disksize got extended due to writeback of delalloc blocks while - * the DIO was running we could fail to cleanup the orphan list in - * ext4_handle_inode_extension(). Do it now. + * If i_disksize got extended either due to writeback of delalloc + * blocks or extending truncate while the DIO was running we could fail + * to cleanup the orphan list in ext4_handle_inode_extension(). Do it + * now. */ if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) { handle_t *handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); @@ -386,10 +387,11 @@ static int ext4_dio_write_end_io(struct kiocb *iocb, ssize_t size, * blocks. But the code in ext4_iomap_alloc() is careful to use * zeroed/unwritten extents if this is possible; thus we won't leave * uninitialized blocks in a file even if we didn't succeed in writing - * as much as we intended. + * as much as we intended. Also we can race with truncate or write + * expanding the file so we have to be a bit careful here. */ - WARN_ON_ONCE(i_size_read(inode) < READ_ONCE(EXT4_I(inode)->i_disksize)); - if (pos + size <= READ_ONCE(EXT4_I(inode)->i_disksize)) + if (pos + size <= READ_ONCE(EXT4_I(inode)->i_disksize) && + pos + size <= i_size_read(inode)) return size; return ext4_handle_inode_extension(inode, pos, size); } From 809d50d36e71bbd56a2b253d94c991e82fe1d2dc Mon Sep 17 00:00:00 2001 From: Zizhi Wo Date: Fri, 1 Dec 2023 22:50:48 +0800 Subject: [PATCH 1024/1397] ksmbd: fix memory leak in smb2_lock() [ Upstream commit 8f1752723019db900fb60a5b9d0dfd3a2bdea36c ] In smb2_lock(), if setup_async_work() executes successfully, work->cancel_argv will bind the argv that generated by kmalloc(). And release_async_work() is called in ksmbd_conn_try_dequeue_request() or smb2_lock() to release argv. However, when setup_async_work function fails, work->cancel_argv has not been bound to the argv, resulting in the previously allocated argv not being released. Call kfree() to fix it. Fixes: e2f34481b24d ("cifsd: add server-side procedures for SMB3") Signed-off-by: Zizhi Wo Acked-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/server/smb2pdu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 269fbfb3cd67..2b248d45d40a 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -7074,6 +7074,7 @@ int smb2_lock(struct ksmbd_work *work) smb2_remove_blocked_lock, argv); if (rc) { + kfree(argv); err = -ENOMEM; goto out; } From 800f84d8f0de521c5681c0c6df4284b004588202 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 11 Dec 2023 10:00:57 +0100 Subject: [PATCH 1025/1397] efi/x86: Avoid physical KASLR on older Dell systems [ Upstream commit 50d7cdf7a9b1ab6f4f74a69c84e974d5dc0c1bf1 ] River reports boot hangs with v6.6 and v6.7, and the bisect points to commit a1b87d54f4e4 ("x86/efistub: Avoid legacy decompressor when doing EFI boot") which moves the memory allocation and kernel decompression from the legacy decompressor (which executes *after* ExitBootServices()) to the EFI stub, using boot services for allocating the memory. The memory allocation succeeds but the subsequent call to decompress_kernel() never returns, resulting in a failed boot and a hanging system. As it turns out, this issue only occurs when physical address randomization (KASLR) is enabled, and given that this is a feature we can live without (virtual KASLR is much more important), let's disable the physical part of KASLR when booting on AMI UEFI firmware claiming to implement revision v2.0 of the specification (which was released in 2006), as this is the version these systems advertise. Fixes: a1b87d54f4e4 ("x86/efistub: Avoid legacy decompressor when doing EFI boot") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218173 Signed-off-by: Ard Biesheuvel Signed-off-by: Sasha Levin --- drivers/firmware/efi/libstub/x86-stub.c | 31 +++++++++++++++++++------ 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 9d5df683f882..70b325a2f1f3 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -307,17 +307,20 @@ static void setup_unaccepted_memory(void) efi_err("Memory acceptance protocol failed\n"); } +static efi_char16_t *efistub_fw_vendor(void) +{ + unsigned long vendor = efi_table_attr(efi_system_table, fw_vendor); + + return (efi_char16_t *)vendor; +} + static const efi_char16_t apple[] = L"Apple"; static void setup_quirks(struct boot_params *boot_params) { - efi_char16_t *fw_vendor = (efi_char16_t *)(unsigned long) - efi_table_attr(efi_system_table, fw_vendor); - - if (!memcmp(fw_vendor, apple, sizeof(apple))) { - if (IS_ENABLED(CONFIG_APPLE_PROPERTIES)) - retrieve_apple_device_properties(boot_params); - } + if (IS_ENABLED(CONFIG_APPLE_PROPERTIES) && + !memcmp(efistub_fw_vendor(), apple, sizeof(apple))) + retrieve_apple_device_properties(boot_params); } /* @@ -799,11 +802,25 @@ static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry) if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && !efi_nokaslr) { u64 range = KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR - kernel_total_size; + static const efi_char16_t ami[] = L"American Megatrends"; efi_get_seed(seed, sizeof(seed)); virt_addr += (range * seed[1]) >> 32; virt_addr &= ~(CONFIG_PHYSICAL_ALIGN - 1); + + /* + * Older Dell systems with AMI UEFI firmware v2.0 may hang + * while decompressing the kernel if physical address + * randomization is enabled. + * + * https://bugzilla.kernel.org/show_bug.cgi?id=218173 + */ + if (efi_system_table->hdr.revision <= EFI_2_00_SYSTEM_TABLE_REVISION && + !memcmp(efistub_fw_vendor(), ami, sizeof(ami))) { + efi_debug("AMI firmware v2.0 or older detected - disabling physical KASLR\n"); + seed[0] = 0; + } } status = efi_random_alloc(alloc_size, CONFIG_PHYSICAL_ALIGN, &addr, From 8715fe2fc1e8b462b29381d342ab1ecd895d2637 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 11 Dec 2023 21:43:52 +0000 Subject: [PATCH 1026/1397] afs: Fix refcount underflow from error handling race [ Upstream commit 52bf9f6c09fca8c74388cd41cc24e5d1bff812a9 ] If an AFS cell that has an unreachable (eg. ENETUNREACH) server listed (VL server or fileserver), an asynchronous probe to one of its addresses may fail immediately because sendmsg() returns an error. When this happens, a refcount underflow can happen if certain events hit a very small window. The way this occurs is: (1) There are two levels of "call" object, the afs_call and the rxrpc_call. Each of them can be transitioned to a "completed" state in the event of success or failure. (2) Asynchronous afs_calls are self-referential whilst they are active to prevent them from evaporating when they're not being processed. This reference is disposed of when the afs_call is completed. Note that an afs_call may only be completed once; once completed completing it again will do nothing. (3) When a call transmission is made, the app-side rxrpc code queues a Tx buffer for the rxrpc I/O thread to transmit. The I/O thread invokes sendmsg() to transmit it - and in the case of failure, it transitions the rxrpc_call to the completed state. (4) When an rxrpc_call is completed, the app layer is notified. In this case, the app is kafs and it schedules a work item to process events pertaining to an afs_call. (5) When the afs_call event processor is run, it goes down through the RPC-specific handler to afs_extract_data() to retrieve data from rxrpc - and, in this case, it picks up the error from the rxrpc_call and returns it. The error is then propagated to the afs_call and that is completed too. At this point the self-reference is released. (6) If the rxrpc I/O thread manages to complete the rxrpc_call within the window between rxrpc_send_data() queuing the request packet and checking for call completion on the way out, then rxrpc_kernel_send_data() will return the error from sendmsg() to the app. (7) Then afs_make_call() will see an error and will jump to the error handling path which will attempt to clean up the afs_call. (8) The problem comes when the error handling path in afs_make_call() tries to unconditionally drop an async afs_call's self-reference. This self-reference, however, may already have been dropped by afs_extract_data() completing the afs_call (9) The refcount underflows when we return to afs_do_probe_vlserver() and that tries to drop its reference on the afs_call. Fix this by making afs_make_call() attempt to complete the afs_call rather than unconditionally putting it. That way, if afs_extract_data() manages to complete the call first, afs_make_call() won't do anything. The bug can be forced by making do_udp_sendmsg() return -ENETUNREACH and sticking an msleep() in rxrpc_send_data() after the 'success:' label to widen the race window. The error message looks something like: refcount_t: underflow; use-after-free. WARNING: CPU: 3 PID: 720 at lib/refcount.c:28 refcount_warn_saturate+0xba/0x110 ... RIP: 0010:refcount_warn_saturate+0xba/0x110 ... afs_put_call+0x1dc/0x1f0 [kafs] afs_fs_get_capabilities+0x8b/0xe0 [kafs] afs_fs_probe_fileserver+0x188/0x1e0 [kafs] afs_lookup_server+0x3bf/0x3f0 [kafs] afs_alloc_server_list+0x130/0x2e0 [kafs] afs_create_volume+0x162/0x400 [kafs] afs_get_tree+0x266/0x410 [kafs] vfs_get_tree+0x25/0xc0 fc_mount+0xe/0x40 afs_d_automount+0x1b3/0x390 [kafs] __traverse_mounts+0x8f/0x210 step_into+0x340/0x760 path_openat+0x13a/0x1260 do_filp_open+0xaf/0x160 do_sys_openat2+0xaf/0x170 or something like: refcount_t: underflow; use-after-free. ... RIP: 0010:refcount_warn_saturate+0x99/0xda ... afs_put_call+0x4a/0x175 afs_send_vl_probes+0x108/0x172 afs_select_vlserver+0xd6/0x311 afs_do_cell_detect_alias+0x5e/0x1e9 afs_cell_detect_alias+0x44/0x92 afs_validate_fc+0x9d/0x134 afs_get_tree+0x20/0x2e6 vfs_get_tree+0x1d/0xc9 fc_mount+0xe/0x33 afs_d_automount+0x48/0x9d __traverse_mounts+0xe0/0x166 step_into+0x140/0x274 open_last_lookups+0x1c1/0x1df path_openat+0x138/0x1c3 do_filp_open+0x55/0xb4 do_sys_openat2+0x6c/0xb6 Fixes: 34fa47612bfe ("afs: Fix race in async call refcounting") Reported-by: Bill MacAllister Closes: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1052304 Suggested-by: Jeffrey E Altman Signed-off-by: David Howells Reviewed-by: Jeffrey Altman cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/2633992.1702073229@warthog.procyon.org.uk/ # v1 Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/afs/rxrpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index ed1644e7683f..d642d06a453b 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -424,7 +424,7 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp) if (call->async) { if (cancel_work_sync(&call->async_work)) afs_put_call(call); - afs_put_call(call); + afs_set_call_complete(call, ret, 0); } ac->error = ret; From b89b7c7635705e0b1661e0a2a5965560ac37a0f2 Mon Sep 17 00:00:00 2001 From: Mikhail Khvainitski Date: Tue, 12 Dec 2023 15:31:48 +0200 Subject: [PATCH 1027/1397] HID: lenovo: Restrict detection of patched firmware only to USB cptkbd [ Upstream commit 43527a0094c10dfbf0d5a2e7979395a38de3ff65 ] Commit 46a0a2c96f0f ("HID: lenovo: Detect quirk-free fw on cptkbd and stop applying workaround") introduced a regression for ThinkPad TrackPoint Keyboard II which has similar quirks to cptkbd (so it uses the same workarounds) but slightly different so that there are false-positives during detecting well-behaving firmware. This commit restricts detecting well-behaving firmware to the only model which known to have one and have stable enough quirks to not cause false-positives. Fixes: 46a0a2c96f0f ("HID: lenovo: Detect quirk-free fw on cptkbd and stop applying workaround") Link: https://lore.kernel.org/linux-input/ZXRiiPsBKNasioqH@jekhomev/ Link: https://bbs.archlinux.org/viewtopic.php?pid=2135468#p2135468 Signed-off-by: Mikhail Khvainitski Tested-by: Yauhen Kharuzhy Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-lenovo.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-lenovo.c b/drivers/hid/hid-lenovo.c index 7c1b33be9d13..149a3c74346b 100644 --- a/drivers/hid/hid-lenovo.c +++ b/drivers/hid/hid-lenovo.c @@ -692,7 +692,8 @@ static int lenovo_event_cptkbd(struct hid_device *hdev, * so set middlebutton_state to 3 * to never apply workaround anymore */ - if (cptkbd_data->middlebutton_state == 1 && + if (hdev->product == USB_DEVICE_ID_LENOVO_CUSBKBD && + cptkbd_data->middlebutton_state == 1 && usage->type == EV_REL && (usage->code == REL_X || usage->code == REL_Y)) { cptkbd_data->middlebutton_state = 3; From 17e600e438c6b597ac1cf8c592b2ab53c680f6e4 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 20 Aug 2023 20:58:56 +0300 Subject: [PATCH 1028/1397] net/mlx5e: Honor user choice of IPsec replay window size [ Upstream commit a5e400a985df8041ed4659ed1462aa9134318130 ] Users can configure IPsec replay window size, but mlx5 driver didn't honor their choice and set always 32bits. Fix assignment logic to configure right size from the beginning. Fixes: 7db21ef4566e ("net/mlx5e: Set IPsec replay sequence numbers") Reviewed-by: Patrisious Haddad Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- .../mellanox/mlx5/core/en_accel/ipsec.c | 21 +++++++++++++++++++ .../mlx5/core/en_accel/ipsec_offload.c | 2 +- include/linux/mlx5/mlx5_ifc.h | 7 +++++++ 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 7d4ceb9b9c16..65678e89aea6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -335,6 +335,27 @@ void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry, attrs->replay_esn.esn = sa_entry->esn_state.esn; attrs->replay_esn.esn_msb = sa_entry->esn_state.esn_msb; attrs->replay_esn.overlap = sa_entry->esn_state.overlap; + switch (x->replay_esn->replay_window) { + case 32: + attrs->replay_esn.replay_window = + MLX5_IPSEC_ASO_REPLAY_WIN_32BIT; + break; + case 64: + attrs->replay_esn.replay_window = + MLX5_IPSEC_ASO_REPLAY_WIN_64BIT; + break; + case 128: + attrs->replay_esn.replay_window = + MLX5_IPSEC_ASO_REPLAY_WIN_128BIT; + break; + case 256: + attrs->replay_esn.replay_window = + MLX5_IPSEC_ASO_REPLAY_WIN_256BIT; + break; + default: + WARN_ON(true); + return; + } } attrs->dir = x->xso.dir; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c index 3245d1c9d539..55b11d8cba53 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c @@ -94,7 +94,7 @@ static void mlx5e_ipsec_packet_setup(void *obj, u32 pdn, if (attrs->dir == XFRM_DEV_OFFLOAD_IN) { MLX5_SET(ipsec_aso, aso_ctx, window_sz, - attrs->replay_esn.replay_window / 64); + attrs->replay_esn.replay_window); MLX5_SET(ipsec_aso, aso_ctx, mode, MLX5_IPSEC_ASO_REPLAY_PROTECTION); } diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index fc3db401f8a2..f08cd1303145 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -11936,6 +11936,13 @@ enum { MLX5_IPSEC_ASO_INC_SN = 0x2, }; +enum { + MLX5_IPSEC_ASO_REPLAY_WIN_32BIT = 0x0, + MLX5_IPSEC_ASO_REPLAY_WIN_64BIT = 0x1, + MLX5_IPSEC_ASO_REPLAY_WIN_128BIT = 0x2, + MLX5_IPSEC_ASO_REPLAY_WIN_256BIT = 0x3, +}; + struct mlx5_ifc_ipsec_aso_bits { u8 valid[0x1]; u8 reserved_at_201[0x1]; From 80299a1c685fff46755f336faed4b7a29cbd44fb Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 20 Sep 2023 10:07:13 +0300 Subject: [PATCH 1029/1397] net/mlx5e: Ensure that IPsec sequence packet number starts from 1 [ Upstream commit 3d42c8cc67a8fcbff0181f9ed6d03d353edcee07 ] According to RFC4303, section "3.3.3. Sequence Number Generation", the first packet sent using a given SA will contain a sequence number of 1. However if user didn't set seq/oseq, the HW used zero as first sequence packet number. Such misconfiguration causes to drop of first packet if replay window protection was enabled in SA. To fix it, set sequence number to be at least 1. Fixes: 7db21ef4566e ("net/mlx5e: Set IPsec replay sequence numbers") Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 65678e89aea6..0d4b8aef6add 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -121,7 +121,14 @@ static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry) if (x->xso.type == XFRM_DEV_OFFLOAD_CRYPTO) esn_msb = xfrm_replay_seqhi(x, htonl(seq_bottom)); - sa_entry->esn_state.esn = esn; + if (sa_entry->esn_state.esn_msb) + sa_entry->esn_state.esn = esn; + else + /* According to RFC4303, section "3.3.3. Sequence Number Generation", + * the first packet sent using a given SA will contain a sequence + * number of 1. + */ + sa_entry->esn_state.esn = max_t(u32, esn, 1); sa_entry->esn_state.esn_msb = esn_msb; if (unlikely(overlap && seq_bottom < MLX5E_IPSEC_ESN_SCOPE_MID)) { From 20af7afcd8b85a4cb413072d631bf9a6469eee3a Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Thu, 21 Sep 2023 14:06:18 +0300 Subject: [PATCH 1030/1397] net/mlx5e: Unify esw and normal IPsec status table creation/destruction [ Upstream commit 94af50c0a9bb961fe93cf0fdd14eb0883da86721 ] Change normal IPsec flow to use the same creation/destruction functions for status flow table as that of ESW, which first of all refines the code to have less code duplication. And more importantly, the ESW status table handles IPsec syndrome checks at steering by HW, which is more efficient than the previous behaviour we had where it was copied to WQE meta data and checked by the driver. Fixes: 1762f132d542 ("net/mlx5e: Support IPsec packet offload for RX in switchdev mode") Signed-off-by: Patrisious Haddad Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- .../mellanox/mlx5/core/en_accel/ipsec_fs.c | 187 +++++++++++++----- .../mellanox/mlx5/core/esw/ipsec_fs.c | 152 -------------- .../mellanox/mlx5/core/esw/ipsec_fs.h | 15 -- 3 files changed, 141 insertions(+), 213 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index 7dba4221993f..fc6aca7c05a4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -128,63 +128,166 @@ static struct mlx5_flow_table *ipsec_ft_create(struct mlx5_flow_namespace *ns, return mlx5_create_auto_grouped_flow_table(ns, &ft_attr); } -static int ipsec_status_rule(struct mlx5_core_dev *mdev, - struct mlx5e_ipsec_rx *rx, - struct mlx5_flow_destination *dest) +static void ipsec_rx_status_drop_destroy(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx) { - u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; + mlx5_del_flow_rules(rx->status_drop.rule); + mlx5_destroy_flow_group(rx->status_drop.group); + mlx5_fc_destroy(ipsec->mdev, rx->status_drop_cnt); +} + +static void ipsec_rx_status_pass_destroy(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx) +{ + mlx5_del_flow_rules(rx->status.rule); + + if (rx != ipsec->rx_esw) + return; + +#ifdef CONFIG_MLX5_ESWITCH + mlx5_chains_put_table(esw_chains(ipsec->mdev->priv.eswitch), 0, 1, 0); +#endif +} + +static int ipsec_rx_status_drop_create(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_table *ft = rx->ft.status; + struct mlx5_core_dev *mdev = ipsec->mdev; + struct mlx5_flow_destination dest = {}; struct mlx5_flow_act flow_act = {}; - struct mlx5_modify_hdr *modify_hdr; - struct mlx5_flow_handle *fte; + struct mlx5_flow_handle *rule; + struct mlx5_fc *flow_counter; struct mlx5_flow_spec *spec; - int err; + struct mlx5_flow_group *g; + u32 *flow_group_in; + int err = 0; + flow_group_in = kvzalloc(inlen, GFP_KERNEL); spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - if (!spec) - return -ENOMEM; + if (!flow_group_in || !spec) { + err = -ENOMEM; + goto err_out; + } - /* Action to copy 7 bit ipsec_syndrome to regB[24:30] */ - MLX5_SET(copy_action_in, action, action_type, MLX5_ACTION_TYPE_COPY); - MLX5_SET(copy_action_in, action, src_field, MLX5_ACTION_IN_FIELD_IPSEC_SYNDROME); - MLX5_SET(copy_action_in, action, src_offset, 0); - MLX5_SET(copy_action_in, action, length, 7); - MLX5_SET(copy_action_in, action, dst_field, MLX5_ACTION_IN_FIELD_METADATA_REG_B); - MLX5_SET(copy_action_in, action, dst_offset, 24); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ft->max_fte - 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ft->max_fte - 1); + g = mlx5_create_flow_group(ft, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + mlx5_core_err(mdev, + "Failed to add ipsec rx status drop flow group, err=%d\n", err); + goto err_out; + } - modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_KERNEL, - 1, action); + flow_counter = mlx5_fc_create(mdev, false); + if (IS_ERR(flow_counter)) { + err = PTR_ERR(flow_counter); + mlx5_core_err(mdev, + "Failed to add ipsec rx status drop rule counter, err=%d\n", err); + goto err_cnt; + } - if (IS_ERR(modify_hdr)) { - err = PTR_ERR(modify_hdr); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; + dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest.counter_id = mlx5_fc_id(flow_counter); + if (rx == ipsec->rx_esw) + spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; + rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); mlx5_core_err(mdev, - "fail to alloc ipsec copy modify_header_id err=%d\n", err); - goto out_spec; + "Failed to add ipsec rx status drop rule, err=%d\n", err); + goto err_rule; } - /* create fte */ - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + rx->status_drop.group = g; + rx->status_drop.rule = rule; + rx->status_drop_cnt = flow_counter; + + kvfree(flow_group_in); + kvfree(spec); + return 0; + +err_rule: + mlx5_fc_destroy(mdev, flow_counter); +err_cnt: + mlx5_destroy_flow_group(g); +err_out: + kvfree(flow_group_in); + kvfree(spec); + return err; +} + +static int ipsec_rx_status_pass_create(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + misc_parameters_2.ipsec_syndrome); + MLX5_SET(fte_match_param, spec->match_value, + misc_parameters_2.ipsec_syndrome, 0); + if (rx == ipsec->rx_esw) + spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + flow_act.flags = FLOW_ACT_NO_APPEND; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; - flow_act.modify_hdr = modify_hdr; - fte = mlx5_add_flow_rules(rx->ft.status, spec, &flow_act, dest, 2); - if (IS_ERR(fte)) { - err = PTR_ERR(fte); - mlx5_core_err(mdev, "fail to add ipsec rx err copy rule err=%d\n", err); - goto out; + rule = mlx5_add_flow_rules(rx->ft.status, spec, &flow_act, dest, 2); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_warn(ipsec->mdev, + "Failed to add ipsec rx status pass rule, err=%d\n", err); + goto err_rule; } + rx->status.rule = rule; kvfree(spec); - rx->status.rule = fte; - rx->status.modify_hdr = modify_hdr; return 0; -out: - mlx5_modify_header_dealloc(mdev, modify_hdr); -out_spec: +err_rule: kvfree(spec); return err; } +static void mlx5_ipsec_rx_status_destroy(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx) +{ + ipsec_rx_status_pass_destroy(ipsec, rx); + ipsec_rx_status_drop_destroy(ipsec, rx); +} + +static int mlx5_ipsec_rx_status_create(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx, + struct mlx5_flow_destination *dest) +{ + int err; + + err = ipsec_rx_status_drop_create(ipsec, rx); + if (err) + return err; + + err = ipsec_rx_status_pass_create(ipsec, rx, dest); + if (err) + goto err_pass_create; + + return 0; + +err_pass_create: + ipsec_rx_status_drop_destroy(ipsec, rx); + return err; +} + static int ipsec_miss_create(struct mlx5_core_dev *mdev, struct mlx5_flow_table *ft, struct mlx5e_ipsec_miss *miss, @@ -256,12 +359,7 @@ static void rx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, mlx5_destroy_flow_table(rx->ft.sa); if (rx->allow_tunnel_mode) mlx5_eswitch_unblock_encap(mdev); - if (rx == ipsec->rx_esw) { - mlx5_esw_ipsec_rx_status_destroy(ipsec, rx); - } else { - mlx5_del_flow_rules(rx->status.rule); - mlx5_modify_header_dealloc(mdev, rx->status.modify_hdr); - } + mlx5_ipsec_rx_status_destroy(ipsec, rx); mlx5_destroy_flow_table(rx->ft.status); mlx5_ipsec_fs_roce_rx_destroy(ipsec->roce, family); @@ -351,10 +449,7 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; dest[1].counter_id = mlx5_fc_id(rx->fc->cnt); - if (rx == ipsec->rx_esw) - err = mlx5_esw_ipsec_rx_status_create(ipsec, rx, dest); - else - err = ipsec_status_rule(mdev, rx, dest); + err = mlx5_ipsec_rx_status_create(ipsec, rx, dest); if (err) goto err_add; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c index 095f31f380fa..13b5916b64e2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c @@ -21,158 +21,6 @@ enum { MLX5_ESW_IPSEC_TX_ESP_FT_CNT_LEVEL, }; -static void esw_ipsec_rx_status_drop_destroy(struct mlx5e_ipsec *ipsec, - struct mlx5e_ipsec_rx *rx) -{ - mlx5_del_flow_rules(rx->status_drop.rule); - mlx5_destroy_flow_group(rx->status_drop.group); - mlx5_fc_destroy(ipsec->mdev, rx->status_drop_cnt); -} - -static void esw_ipsec_rx_status_pass_destroy(struct mlx5e_ipsec *ipsec, - struct mlx5e_ipsec_rx *rx) -{ - mlx5_del_flow_rules(rx->status.rule); - mlx5_chains_put_table(esw_chains(ipsec->mdev->priv.eswitch), 0, 1, 0); -} - -static int esw_ipsec_rx_status_drop_create(struct mlx5e_ipsec *ipsec, - struct mlx5e_ipsec_rx *rx) -{ - int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); - struct mlx5_flow_table *ft = rx->ft.status; - struct mlx5_core_dev *mdev = ipsec->mdev; - struct mlx5_flow_destination dest = {}; - struct mlx5_flow_act flow_act = {}; - struct mlx5_flow_handle *rule; - struct mlx5_fc *flow_counter; - struct mlx5_flow_spec *spec; - struct mlx5_flow_group *g; - u32 *flow_group_in; - int err = 0; - - flow_group_in = kvzalloc(inlen, GFP_KERNEL); - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - if (!flow_group_in || !spec) { - err = -ENOMEM; - goto err_out; - } - - MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ft->max_fte - 1); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ft->max_fte - 1); - g = mlx5_create_flow_group(ft, flow_group_in); - if (IS_ERR(g)) { - err = PTR_ERR(g); - mlx5_core_err(mdev, - "Failed to add ipsec rx status drop flow group, err=%d\n", err); - goto err_out; - } - - flow_counter = mlx5_fc_create(mdev, false); - if (IS_ERR(flow_counter)) { - err = PTR_ERR(flow_counter); - mlx5_core_err(mdev, - "Failed to add ipsec rx status drop rule counter, err=%d\n", err); - goto err_cnt; - } - - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; - dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest.counter_id = mlx5_fc_id(flow_counter); - spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; - rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); - if (IS_ERR(rule)) { - err = PTR_ERR(rule); - mlx5_core_err(mdev, - "Failed to add ipsec rx status drop rule, err=%d\n", err); - goto err_rule; - } - - rx->status_drop.group = g; - rx->status_drop.rule = rule; - rx->status_drop_cnt = flow_counter; - - kvfree(flow_group_in); - kvfree(spec); - return 0; - -err_rule: - mlx5_fc_destroy(mdev, flow_counter); -err_cnt: - mlx5_destroy_flow_group(g); -err_out: - kvfree(flow_group_in); - kvfree(spec); - return err; -} - -static int esw_ipsec_rx_status_pass_create(struct mlx5e_ipsec *ipsec, - struct mlx5e_ipsec_rx *rx, - struct mlx5_flow_destination *dest) -{ - struct mlx5_flow_act flow_act = {}; - struct mlx5_flow_handle *rule; - struct mlx5_flow_spec *spec; - int err; - - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - if (!spec) - return -ENOMEM; - - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, - misc_parameters_2.ipsec_syndrome); - MLX5_SET(fte_match_param, spec->match_value, - misc_parameters_2.ipsec_syndrome, 0); - spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; - spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; - flow_act.flags = FLOW_ACT_NO_APPEND; - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_COUNT; - rule = mlx5_add_flow_rules(rx->ft.status, spec, &flow_act, dest, 2); - if (IS_ERR(rule)) { - err = PTR_ERR(rule); - mlx5_core_warn(ipsec->mdev, - "Failed to add ipsec rx status pass rule, err=%d\n", err); - goto err_rule; - } - - rx->status.rule = rule; - kvfree(spec); - return 0; - -err_rule: - kvfree(spec); - return err; -} - -void mlx5_esw_ipsec_rx_status_destroy(struct mlx5e_ipsec *ipsec, - struct mlx5e_ipsec_rx *rx) -{ - esw_ipsec_rx_status_pass_destroy(ipsec, rx); - esw_ipsec_rx_status_drop_destroy(ipsec, rx); -} - -int mlx5_esw_ipsec_rx_status_create(struct mlx5e_ipsec *ipsec, - struct mlx5e_ipsec_rx *rx, - struct mlx5_flow_destination *dest) -{ - int err; - - err = esw_ipsec_rx_status_drop_create(ipsec, rx); - if (err) - return err; - - err = esw_ipsec_rx_status_pass_create(ipsec, rx, dest); - if (err) - goto err_pass_create; - - return 0; - -err_pass_create: - esw_ipsec_rx_status_drop_destroy(ipsec, rx); - return err; -} - void mlx5_esw_ipsec_rx_create_attr_set(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_rx_create_attr *attr) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.h index 0c90f7a8b0d3..ac9c65b89166 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.h @@ -8,11 +8,6 @@ struct mlx5e_ipsec; struct mlx5e_ipsec_sa_entry; #ifdef CONFIG_MLX5_ESWITCH -void mlx5_esw_ipsec_rx_status_destroy(struct mlx5e_ipsec *ipsec, - struct mlx5e_ipsec_rx *rx); -int mlx5_esw_ipsec_rx_status_create(struct mlx5e_ipsec *ipsec, - struct mlx5e_ipsec_rx *rx, - struct mlx5_flow_destination *dest); void mlx5_esw_ipsec_rx_create_attr_set(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_rx_create_attr *attr); int mlx5_esw_ipsec_rx_status_pass_dest_get(struct mlx5e_ipsec *ipsec, @@ -26,16 +21,6 @@ void mlx5_esw_ipsec_tx_create_attr_set(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_tx_create_attr *attr); void mlx5_esw_ipsec_restore_dest_uplink(struct mlx5_core_dev *mdev); #else -static inline void mlx5_esw_ipsec_rx_status_destroy(struct mlx5e_ipsec *ipsec, - struct mlx5e_ipsec_rx *rx) {} - -static inline int mlx5_esw_ipsec_rx_status_create(struct mlx5e_ipsec *ipsec, - struct mlx5e_ipsec_rx *rx, - struct mlx5_flow_destination *dest) -{ - return -EINVAL; -} - static inline void mlx5_esw_ipsec_rx_create_attr_set(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_rx_create_attr *attr) {} From 1a0d0e97a750f883b0f4ae6938346092bc41fcdb Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 12 Nov 2023 13:50:00 +0200 Subject: [PATCH 1031/1397] net/mlx5e: Tidy up IPsec NAT-T SA discovery [ Upstream commit c2bf84f1d1a1595dcc45fe867f0e02b331993fee ] IPsec NAT-T packets are UDP encapsulated packets over ESP normal ones. In case they arrive to RX, the SPI and ESP are located in inner header, while the check was performed on outer header instead. That wrong check caused to the situation where received rekeying request was missed and caused to rekey timeout, which "compensated" this failure by completing rekeying. Fixes: d65954934937 ("net/mlx5e: Support IPsec NAT-T functionality") Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- .../mellanox/mlx5/core/en_accel/ipsec_fs.c | 22 ++++++++++++++----- include/linux/mlx5/mlx5_ifc.h | 2 +- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index fc6aca7c05a4..6dc60be2a697 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -974,13 +974,22 @@ static void setup_fte_esp(struct mlx5_flow_spec *spec) MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, IPPROTO_ESP); } -static void setup_fte_spi(struct mlx5_flow_spec *spec, u32 spi) +static void setup_fte_spi(struct mlx5_flow_spec *spec, u32 spi, bool encap) { /* SPI number */ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters.outer_esp_spi); - MLX5_SET(fte_match_param, spec->match_value, misc_parameters.outer_esp_spi, spi); + if (encap) { + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + misc_parameters.inner_esp_spi); + MLX5_SET(fte_match_param, spec->match_value, + misc_parameters.inner_esp_spi, spi); + } else { + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + misc_parameters.outer_esp_spi); + MLX5_SET(fte_match_param, spec->match_value, + misc_parameters.outer_esp_spi, spi); + } } static void setup_fte_no_frags(struct mlx5_flow_spec *spec) @@ -1339,8 +1348,9 @@ static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) else setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6); - setup_fte_spi(spec, attrs->spi); - setup_fte_esp(spec); + setup_fte_spi(spec, attrs->spi, attrs->encap); + if (!attrs->encap) + setup_fte_esp(spec); setup_fte_no_frags(spec); setup_fte_upper_proto_match(spec, &attrs->upspec); @@ -1443,7 +1453,7 @@ static int tx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) switch (attrs->type) { case XFRM_DEV_OFFLOAD_CRYPTO: - setup_fte_spi(spec, attrs->spi); + setup_fte_spi(spec, attrs->spi, false); setup_fte_esp(spec); setup_fte_reg_a(spec); break; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index f08cd1303145..8ac6ae79e083 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -620,7 +620,7 @@ struct mlx5_ifc_fte_match_set_misc_bits { u8 reserved_at_140[0x8]; u8 bth_dst_qp[0x18]; - u8 reserved_at_160[0x20]; + u8 inner_esp_spi[0x20]; u8 outer_esp_spi[0x20]; u8 reserved_at_1a0[0x60]; }; From 594a306461de8617a4b497b88a0c7670c4d93f2e Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Wed, 11 Oct 2023 03:38:29 +0000 Subject: [PATCH 1032/1397] net/mlx5e: Reduce eswitch mode_lock protection context [ Upstream commit baac8351f74c543896b8fd40138b7ad9365587a3 ] Currently eswitch mode_lock is so heavy, for example, it's locked during the whole process of the mode change, which may need to hold other locks. As the mode_lock is also used by IPSec to block mode and encap change now, it is easy to cause lock dependency. Since some of protections are also done by devlink lock, the eswitch mode_lock is not needed at those places, and thus the possibility of lockdep issue is reduced. Fixes: c8e350e62fc5 ("net/mlx5e: Make TC and IPsec offloads mutually exclusive on a netdev") Signed-off-by: Jianbo Liu Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- .../mellanox/mlx5/core/en_accel/ipsec_fs.c | 9 +++-- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 35 ++++++++++------- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 2 + .../mellanox/mlx5/core/eswitch_offloads.c | 38 +++++++++++-------- 4 files changed, 52 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index 6dc60be2a697..03f69c485a00 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -1834,8 +1834,11 @@ static int mlx5e_ipsec_block_tc_offload(struct mlx5_core_dev *mdev) struct mlx5_eswitch *esw = mdev->priv.eswitch; int err = 0; - if (esw) - down_write(&esw->mode_lock); + if (esw) { + err = mlx5_esw_lock(esw); + if (err) + return err; + } if (mdev->num_block_ipsec) { err = -EBUSY; @@ -1846,7 +1849,7 @@ static int mlx5e_ipsec_block_tc_offload(struct mlx5_core_dev *mdev) unlock: if (esw) - up_write(&esw->mode_lock); + mlx5_esw_unlock(esw); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 8d0b915a3121..3047d7015c52 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1463,7 +1463,7 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs) { int err; - lockdep_assert_held(&esw->mode_lock); + devl_assert_locked(priv_to_devlink(esw->dev)); if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) { esw_warn(esw->dev, "FDB is not supported, aborting ...\n"); @@ -1531,7 +1531,6 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) if (toggle_lag) mlx5_lag_disable_change(esw->dev); - down_write(&esw->mode_lock); if (!mlx5_esw_is_fdb_created(esw)) { ret = mlx5_eswitch_enable_locked(esw, num_vfs); } else { @@ -1554,8 +1553,6 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) } } - up_write(&esw->mode_lock); - if (toggle_lag) mlx5_lag_enable_change(esw->dev); @@ -1569,12 +1566,11 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) return; devl_assert_locked(priv_to_devlink(esw->dev)); - down_write(&esw->mode_lock); /* If driver is unloaded, this function is called twice by remove_one() * and mlx5_unload(). Prevent the second call. */ if (!esw->esw_funcs.num_vfs && !esw->esw_funcs.num_ec_vfs && !clear_vf) - goto unlock; + return; esw_info(esw->dev, "Unload vfs: mode(%s), nvfs(%d), necvfs(%d), active vports(%d)\n", esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", @@ -1603,9 +1599,6 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) esw->esw_funcs.num_vfs = 0; else esw->esw_funcs.num_ec_vfs = 0; - -unlock: - up_write(&esw->mode_lock); } /* Free resources for corresponding eswitch mode. It is called by devlink @@ -1647,10 +1640,8 @@ void mlx5_eswitch_disable(struct mlx5_eswitch *esw) devl_assert_locked(priv_to_devlink(esw->dev)); mlx5_lag_disable_change(esw->dev); - down_write(&esw->mode_lock); mlx5_eswitch_disable_locked(esw); esw->mode = MLX5_ESWITCH_LEGACY; - up_write(&esw->mode_lock); mlx5_lag_enable_change(esw->dev); } @@ -2254,8 +2245,13 @@ bool mlx5_esw_hold(struct mlx5_core_dev *mdev) if (!mlx5_esw_allowed(esw)) return true; - if (down_read_trylock(&esw->mode_lock) != 0) + if (down_read_trylock(&esw->mode_lock) != 0) { + if (esw->eswitch_operation_in_progress) { + up_read(&esw->mode_lock); + return false; + } return true; + } return false; } @@ -2312,7 +2308,8 @@ int mlx5_esw_try_lock(struct mlx5_eswitch *esw) if (down_write_trylock(&esw->mode_lock) == 0) return -EINVAL; - if (atomic64_read(&esw->user_count) > 0) { + if (esw->eswitch_operation_in_progress || + atomic64_read(&esw->user_count) > 0) { up_write(&esw->mode_lock); return -EBUSY; } @@ -2320,6 +2317,18 @@ int mlx5_esw_try_lock(struct mlx5_eswitch *esw) return esw->mode; } +int mlx5_esw_lock(struct mlx5_eswitch *esw) +{ + down_write(&esw->mode_lock); + + if (esw->eswitch_operation_in_progress) { + up_write(&esw->mode_lock); + return -EBUSY; + } + + return 0; +} + /** * mlx5_esw_unlock() - Release write lock on esw mode lock * @esw: eswitch device. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 37ab66e7b403..b674b57d05aa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -383,6 +383,7 @@ struct mlx5_eswitch { struct xarray paired; struct mlx5_devcom_comp_dev *devcom; u16 enabled_ipsec_vf_count; + bool eswitch_operation_in_progress; }; void esw_offloads_disable(struct mlx5_eswitch *esw); @@ -827,6 +828,7 @@ void mlx5_esw_release(struct mlx5_core_dev *dev); void mlx5_esw_get(struct mlx5_core_dev *dev); void mlx5_esw_put(struct mlx5_core_dev *dev); int mlx5_esw_try_lock(struct mlx5_eswitch *esw); +int mlx5_esw_lock(struct mlx5_eswitch *esw); void mlx5_esw_unlock(struct mlx5_eswitch *esw); void esw_vport_change_handle_locked(struct mlx5_vport *vport); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 88236e75fd90..bf78eeca401b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -3733,13 +3733,16 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, goto unlock; } + esw->eswitch_operation_in_progress = true; + up_write(&esw->mode_lock); + mlx5_eswitch_disable_locked(esw); if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) { if (mlx5_devlink_trap_get_num_active(esw->dev)) { NL_SET_ERR_MSG_MOD(extack, "Can't change mode while devlink traps are active"); err = -EOPNOTSUPP; - goto unlock; + goto skip; } err = esw_offloads_start(esw, extack); } else if (mode == DEVLINK_ESWITCH_MODE_LEGACY) { @@ -3749,6 +3752,9 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, err = -EINVAL; } +skip: + down_write(&esw->mode_lock); + esw->eswitch_operation_in_progress = false; unlock: mlx5_esw_unlock(esw); enable_lag: @@ -3759,16 +3765,12 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) { struct mlx5_eswitch *esw; - int err; esw = mlx5_devlink_eswitch_get(devlink); if (IS_ERR(esw)) return PTR_ERR(esw); - down_read(&esw->mode_lock); - err = esw_mode_to_devlink(esw->mode, mode); - up_read(&esw->mode_lock); - return err; + return esw_mode_to_devlink(esw->mode, mode); } static int mlx5_esw_vports_inline_set(struct mlx5_eswitch *esw, u8 mlx5_mode, @@ -3862,11 +3864,15 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, if (err) goto out; + esw->eswitch_operation_in_progress = true; + up_write(&esw->mode_lock); + err = mlx5_esw_vports_inline_set(esw, mlx5_mode, extack); - if (err) - goto out; + if (!err) + esw->offloads.inline_mode = mlx5_mode; - esw->offloads.inline_mode = mlx5_mode; + down_write(&esw->mode_lock); + esw->eswitch_operation_in_progress = false; up_write(&esw->mode_lock); return 0; @@ -3878,16 +3884,12 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode) { struct mlx5_eswitch *esw; - int err; esw = mlx5_devlink_eswitch_get(devlink); if (IS_ERR(esw)) return PTR_ERR(esw); - down_read(&esw->mode_lock); - err = esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode); - up_read(&esw->mode_lock); - return err; + return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode); } bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev) @@ -3969,6 +3971,9 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, goto unlock; } + esw->eswitch_operation_in_progress = true; + up_write(&esw->mode_lock); + esw_destroy_offloads_fdb_tables(esw); esw->offloads.encap = encap; @@ -3982,6 +3987,9 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, (void)esw_create_offloads_fdb_tables(esw); } + down_write(&esw->mode_lock); + esw->eswitch_operation_in_progress = false; + unlock: up_write(&esw->mode_lock); return err; @@ -3996,9 +4004,7 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, if (IS_ERR(esw)) return PTR_ERR(esw); - down_read(&esw->mode_lock); *encap = esw->offloads.encap; - up_read(&esw->mode_lock); return 0; } From 4a95f412b7ee000b279df4842ad227a6eadec89c Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Thu, 12 Oct 2023 02:00:44 +0000 Subject: [PATCH 1033/1397] net/mlx5e: Check the number of elements before walk TC rhashtable [ Upstream commit 4e25b661f484df54b6751b65f9ea2434a3b67539 ] After IPSec TX tables are destroyed, the flow rules in TC rhashtable, which have the destination to IPSec, are restored to the original one, the uplink. However, when the device is in switchdev mode and unload driver with IPSec rules configured, TC rhashtable cleanup is done before IPSec cleanup, which means tc_ht->tbl is already freed when walking TC rhashtable, in order to restore the destination. So add the checking before walking to avoid unexpected behavior. Fixes: d1569537a837 ("net/mlx5e: Modify and restore TC rules for IPSec TX rules") Signed-off-by: Jianbo Liu Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c index 13b5916b64e2..d5d33c3b3aa2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c @@ -152,7 +152,7 @@ void mlx5_esw_ipsec_restore_dest_uplink(struct mlx5_core_dev *mdev) xa_for_each(&esw->offloads.vport_reps, i, rep) { rpriv = rep->rep_data[REP_ETH].priv; - if (!rpriv || !rpriv->netdev) + if (!rpriv || !rpriv->netdev || !atomic_read(&rpriv->tc_ht.nelems)) continue; rhashtable_walk_enter(&rpriv->tc_ht, &iter); From fdd350fe5e1a8a87b3bda46792a72980b93e72ab Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Thu, 21 Sep 2023 15:10:27 +0300 Subject: [PATCH 1034/1397] RDMA/mlx5: Send events from IB driver about device affiliation state [ Upstream commit 0d293714ac32650bfb669ceadf7cc2fad8161401 ] Send blocking events from IB driver whenever the device is done being affiliated or if it is removed from an affiliation. This is useful since now the EN driver can register to those event and know when a device is affiliated or not. Signed-off-by: Patrisious Haddad Reviewed-by: Mark Bloch Link: https://lore.kernel.org/r/a7491c3e483cfd8d962f5f75b9a25f253043384a.1695296682.git.leon@kernel.org Signed-off-by: Leon Romanovsky Stable-dep-of: 762a55a54eec ("net/mlx5e: Disable IPsec offload support if not FW steering") Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mlx5/main.c | 17 +++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/main.c | 6 ++++++ include/linux/mlx5/device.h | 2 ++ include/linux/mlx5/driver.h | 2 ++ 4 files changed, 27 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 5d963abb7e60..4c4233b9c8b0 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -3175,6 +3176,13 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, lockdep_assert_held(&mlx5_ib_multiport_mutex); + mlx5_core_mp_event_replay(ibdev->mdev, + MLX5_DRIVER_EVENT_AFFILIATION_REMOVED, + NULL); + mlx5_core_mp_event_replay(mpi->mdev, + MLX5_DRIVER_EVENT_AFFILIATION_REMOVED, + NULL); + mlx5_ib_cleanup_cong_debugfs(ibdev, port_num); spin_lock(&port->mp.mpi_lock); @@ -3226,6 +3234,7 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, struct mlx5_ib_multiport_info *mpi) { u32 port_num = mlx5_core_native_port_num(mpi->mdev) - 1; + u64 key; int err; lockdep_assert_held(&mlx5_ib_multiport_mutex); @@ -3254,6 +3263,14 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, mlx5_ib_init_cong_debugfs(ibdev, port_num); + key = ibdev->ib_dev.index; + mlx5_core_mp_event_replay(mpi->mdev, + MLX5_DRIVER_EVENT_AFFILIATION_DONE, + &key); + mlx5_core_mp_event_replay(ibdev->mdev, + MLX5_DRIVER_EVENT_AFFILIATION_DONE, + &key); + return true; unbind: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 15561965d2af..6ca91c0e8a6a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -361,6 +361,12 @@ void mlx5_core_uplink_netdev_event_replay(struct mlx5_core_dev *dev) } EXPORT_SYMBOL(mlx5_core_uplink_netdev_event_replay); +void mlx5_core_mp_event_replay(struct mlx5_core_dev *dev, u32 event, void *data) +{ + mlx5_blocking_notifier_call_chain(dev, event, data); +} +EXPORT_SYMBOL(mlx5_core_mp_event_replay); + int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type, enum mlx5_cap_mode cap_mode) { diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 4d5be378fa8c..26333d602a50 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -366,6 +366,8 @@ enum mlx5_driver_event { MLX5_DRIVER_EVENT_UPLINK_NETDEV, MLX5_DRIVER_EVENT_MACSEC_SA_ADDED, MLX5_DRIVER_EVENT_MACSEC_SA_DELETED, + MLX5_DRIVER_EVENT_AFFILIATION_DONE, + MLX5_DRIVER_EVENT_AFFILIATION_REMOVED, }; enum { diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 3033bbaeac81..5ca4e085d813 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1027,6 +1027,8 @@ bool mlx5_cmd_is_down(struct mlx5_core_dev *dev); void mlx5_core_uplink_netdev_set(struct mlx5_core_dev *mdev, struct net_device *netdev); void mlx5_core_uplink_netdev_event_replay(struct mlx5_core_dev *mdev); +void mlx5_core_mp_event_replay(struct mlx5_core_dev *dev, u32 event, void *data); + void mlx5_health_cleanup(struct mlx5_core_dev *dev); int mlx5_health_init(struct mlx5_core_dev *dev); void mlx5_start_health_poll(struct mlx5_core_dev *dev); From 7e46db5e2a311ff4b28d8a4e0a3f23d9cdf3e78b Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Mon, 30 Oct 2023 15:44:47 +0200 Subject: [PATCH 1035/1397] net/mlx5e: Disable IPsec offload support if not FW steering [ Upstream commit 762a55a54eec4217e4cec9265ab6e5d4c11b61bd ] IPsec FDB offload can only work with FW steering as of now, disable the cap upon non FW steering. And since the IPSec cap is dynamic now based on steering mode. Cleanup the resources if they exist instead of checking the IPsec cap again. Fixes: edd8b295f9e2 ("Merge branch 'mlx5-ipsec-packet-offload-support-in-eswitch-mode'") Signed-off-by: Chris Mi Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- .../mellanox/mlx5/core/en_accel/ipsec.c | 26 ++++++++----------- .../mlx5/core/en_accel/ipsec_offload.c | 8 +++++- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 0d4b8aef6add..5834e47e72d8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -929,9 +929,11 @@ void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv) return; mlx5e_accel_ipsec_fs_cleanup(ipsec); - if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL) + if (ipsec->netevent_nb.notifier_call) { unregister_netevent_notifier(&ipsec->netevent_nb); - if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD) + ipsec->netevent_nb.notifier_call = NULL; + } + if (ipsec->aso) mlx5e_ipsec_aso_cleanup(ipsec); destroy_workqueue(ipsec->wq); kfree(ipsec); @@ -1040,6 +1042,12 @@ static int mlx5e_xfrm_validate_policy(struct mlx5_core_dev *mdev, } } + if (x->xdo.type == XFRM_DEV_OFFLOAD_PACKET && + !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD)) { + NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported"); + return -EINVAL; + } + return 0; } @@ -1135,14 +1143,6 @@ static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = { .xdo_dev_state_free = mlx5e_xfrm_free_state, .xdo_dev_offload_ok = mlx5e_ipsec_offload_ok, .xdo_dev_state_advance_esn = mlx5e_xfrm_advance_esn_state, -}; - -static const struct xfrmdev_ops mlx5e_ipsec_packet_xfrmdev_ops = { - .xdo_dev_state_add = mlx5e_xfrm_add_state, - .xdo_dev_state_delete = mlx5e_xfrm_del_state, - .xdo_dev_state_free = mlx5e_xfrm_free_state, - .xdo_dev_offload_ok = mlx5e_ipsec_offload_ok, - .xdo_dev_state_advance_esn = mlx5e_xfrm_advance_esn_state, .xdo_dev_state_update_curlft = mlx5e_xfrm_update_curlft, .xdo_dev_policy_add = mlx5e_xfrm_add_policy, @@ -1160,11 +1160,7 @@ void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv) mlx5_core_info(mdev, "mlx5e: IPSec ESP acceleration enabled\n"); - if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD) - netdev->xfrmdev_ops = &mlx5e_ipsec_packet_xfrmdev_ops; - else - netdev->xfrmdev_ops = &mlx5e_ipsec_xfrmdev_ops; - + netdev->xfrmdev_ops = &mlx5e_ipsec_xfrmdev_ops; netdev->features |= NETIF_F_HW_ESP; netdev->hw_enc_features |= NETIF_F_HW_ESP; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c index 55b11d8cba53..ce29e3172120 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c @@ -5,6 +5,8 @@ #include "en.h" #include "ipsec.h" #include "lib/crypto.h" +#include "fs_core.h" +#include "eswitch.h" enum { MLX5_IPSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET, @@ -37,7 +39,10 @@ u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev) MLX5_CAP_ETH(mdev, insert_trailer) && MLX5_CAP_ETH(mdev, swp)) caps |= MLX5_IPSEC_CAP_CRYPTO; - if (MLX5_CAP_IPSEC(mdev, ipsec_full_offload)) { + if (MLX5_CAP_IPSEC(mdev, ipsec_full_offload) && + (mdev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_DMFS || + (mdev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS && + is_mdev_legacy_mode(mdev)))) { if (MLX5_CAP_FLOWTABLE_NIC_TX(mdev, reformat_add_esp_trasport) && MLX5_CAP_FLOWTABLE_NIC_RX(mdev, @@ -558,6 +563,7 @@ void mlx5e_ipsec_aso_cleanup(struct mlx5e_ipsec *ipsec) dma_unmap_single(pdev, aso->dma_addr, sizeof(aso->ctx), DMA_BIDIRECTIONAL); kfree(aso); + ipsec->aso = NULL; } static void mlx5e_ipsec_aso_copy(struct mlx5_wqe_aso_ctrl_seg *ctrl, From 8ce3d969348a7c7fa3469588eb1319f9f3cc0eaa Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Wed, 21 Sep 2022 18:45:11 +0300 Subject: [PATCH 1036/1397] net/mlx5e: Fix possible deadlock on mlx5e_tx_timeout_work [ Upstream commit eab0da38912ebdad922ed0388209f7eb0a5163cd ] Due to the cited patch, devlink health commands take devlink lock and this may result in deadlock for mlx5e_tx_reporter as it takes local state_lock before calling devlink health report and on the other hand devlink health commands such as diagnose for same reporter take local state_lock after taking devlink lock (see kernel log below). To fix it, remove local state_lock from mlx5e_tx_timeout_work() before calling devlink_health_report() and take care to cancel the work before any call to close channels, which may free the SQs that should be handled by the work. Before cancel_work_sync(), use current_work() to check we are not calling it from within the work, as mlx5e_tx_timeout_work() itself may close the channels and reopen as part of recovery flow. While removing state_lock from mlx5e_tx_timeout_work() keep rtnl_lock to ensure no change in netdev->real_num_tx_queues, but use rtnl_trylock() and a flag to avoid deadlock by calling cancel_work_sync() before closing the channels while holding rtnl_lock too. Kernel log: ====================================================== WARNING: possible circular locking dependency detected 6.0.0-rc3_for_upstream_debug_2022_08_30_13_10 #1 Not tainted ------------------------------------------------------ kworker/u16:2/65 is trying to acquire lock: ffff888122f6c2f8 (&devlink->lock_key#2){+.+.}-{3:3}, at: devlink_health_report+0x2f1/0x7e0 but task is already holding lock: ffff888121d20be0 (&priv->state_lock){+.+.}-{3:3}, at: mlx5e_tx_timeout_work+0x70/0x280 [mlx5_core] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&priv->state_lock){+.+.}-{3:3}: __mutex_lock+0x12c/0x14b0 mlx5e_rx_reporter_diagnose+0x71/0x700 [mlx5_core] devlink_nl_cmd_health_reporter_diagnose_doit+0x212/0xa50 genl_family_rcv_msg_doit+0x1e9/0x2f0 genl_rcv_msg+0x2e9/0x530 netlink_rcv_skb+0x11d/0x340 genl_rcv+0x24/0x40 netlink_unicast+0x438/0x710 netlink_sendmsg+0x788/0xc40 sock_sendmsg+0xb0/0xe0 __sys_sendto+0x1c1/0x290 __x64_sys_sendto+0xdd/0x1b0 do_syscall_64+0x3d/0x90 entry_SYSCALL_64_after_hwframe+0x46/0xb0 -> #0 (&devlink->lock_key#2){+.+.}-{3:3}: __lock_acquire+0x2c8a/0x6200 lock_acquire+0x1c1/0x550 __mutex_lock+0x12c/0x14b0 devlink_health_report+0x2f1/0x7e0 mlx5e_health_report+0xc9/0xd7 [mlx5_core] mlx5e_reporter_tx_timeout+0x2ab/0x3d0 [mlx5_core] mlx5e_tx_timeout_work+0x1c1/0x280 [mlx5_core] process_one_work+0x7c2/0x1340 worker_thread+0x59d/0xec0 kthread+0x28f/0x330 ret_from_fork+0x1f/0x30 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&priv->state_lock); lock(&devlink->lock_key#2); lock(&priv->state_lock); lock(&devlink->lock_key#2); *** DEADLOCK *** 4 locks held by kworker/u16:2/65: #0: ffff88811a55b138 ((wq_completion)mlx5e#2){+.+.}-{0:0}, at: process_one_work+0x6e2/0x1340 #1: ffff888101de7db8 ((work_completion)(&priv->tx_timeout_work)){+.+.}-{0:0}, at: process_one_work+0x70f/0x1340 #2: ffffffff84ce8328 (rtnl_mutex){+.+.}-{3:3}, at: mlx5e_tx_timeout_work+0x53/0x280 [mlx5_core] #3: ffff888121d20be0 (&priv->state_lock){+.+.}-{3:3}, at: mlx5e_tx_timeout_work+0x70/0x280 [mlx5_core] stack backtrace: CPU: 1 PID: 65 Comm: kworker/u16:2 Not tainted 6.0.0-rc3_for_upstream_debug_2022_08_30_13_10 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Workqueue: mlx5e mlx5e_tx_timeout_work [mlx5_core] Call Trace: dump_stack_lvl+0x57/0x7d check_noncircular+0x278/0x300 ? print_circular_bug+0x460/0x460 ? find_held_lock+0x2d/0x110 ? __stack_depot_save+0x24c/0x520 ? alloc_chain_hlocks+0x228/0x700 __lock_acquire+0x2c8a/0x6200 ? register_lock_class+0x1860/0x1860 ? kasan_save_stack+0x1e/0x40 ? kasan_set_free_info+0x20/0x30 ? ____kasan_slab_free+0x11d/0x1b0 ? kfree+0x1ba/0x520 ? devlink_health_do_dump.part.0+0x171/0x3a0 ? devlink_health_report+0x3d5/0x7e0 lock_acquire+0x1c1/0x550 ? devlink_health_report+0x2f1/0x7e0 ? lockdep_hardirqs_on_prepare+0x400/0x400 ? find_held_lock+0x2d/0x110 __mutex_lock+0x12c/0x14b0 ? devlink_health_report+0x2f1/0x7e0 ? devlink_health_report+0x2f1/0x7e0 ? mutex_lock_io_nested+0x1320/0x1320 ? trace_hardirqs_on+0x2d/0x100 ? bit_wait_io_timeout+0x170/0x170 ? devlink_health_do_dump.part.0+0x171/0x3a0 ? kfree+0x1ba/0x520 ? devlink_health_do_dump.part.0+0x171/0x3a0 devlink_health_report+0x2f1/0x7e0 mlx5e_health_report+0xc9/0xd7 [mlx5_core] mlx5e_reporter_tx_timeout+0x2ab/0x3d0 [mlx5_core] ? lockdep_hardirqs_on_prepare+0x400/0x400 ? mlx5e_reporter_tx_err_cqe+0x1b0/0x1b0 [mlx5_core] ? mlx5e_tx_reporter_timeout_dump+0x70/0x70 [mlx5_core] ? mlx5e_tx_reporter_dump_sq+0x320/0x320 [mlx5_core] ? mlx5e_tx_timeout_work+0x70/0x280 [mlx5_core] ? mutex_lock_io_nested+0x1320/0x1320 ? process_one_work+0x70f/0x1340 ? lockdep_hardirqs_on_prepare+0x400/0x400 ? lock_downgrade+0x6e0/0x6e0 mlx5e_tx_timeout_work+0x1c1/0x280 [mlx5_core] process_one_work+0x7c2/0x1340 ? lockdep_hardirqs_on_prepare+0x400/0x400 ? pwq_dec_nr_in_flight+0x230/0x230 ? rwlock_bug.part.0+0x90/0x90 worker_thread+0x59d/0xec0 ? process_one_work+0x1340/0x1340 kthread+0x28f/0x330 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x1f/0x30 Fixes: c90005b5f75c ("devlink: Hold the instance lock in health callbacks") Signed-off-by: Moshe Shemesh Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + .../net/ethernet/mellanox/mlx5/core/en_main.c | 27 ++++++++++++++++--- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 86f2690c5e01..20a6bc1a234f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -818,6 +818,7 @@ enum { MLX5E_STATE_DESTROYING, MLX5E_STATE_XDP_TX_ENABLED, MLX5E_STATE_XDP_ACTIVE, + MLX5E_STATE_CHANNELS_ACTIVE, }; struct mlx5e_modify_sq_param { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index acb40770cf0c..c3961c2bbc57 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2668,6 +2668,7 @@ void mlx5e_close_channels(struct mlx5e_channels *chs) { int i; + ASSERT_RTNL(); if (chs->ptp) { mlx5e_ptp_close(chs->ptp); chs->ptp = NULL; @@ -2945,17 +2946,29 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) if (mlx5e_is_vport_rep(priv)) mlx5e_rep_activate_channels(priv); + set_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state); + mlx5e_wait_channels_min_rx_wqes(&priv->channels); if (priv->rx_res) mlx5e_rx_res_channels_activate(priv->rx_res, &priv->channels); } +static void mlx5e_cancel_tx_timeout_work(struct mlx5e_priv *priv) +{ + WARN_ON_ONCE(test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state)); + if (current_work() != &priv->tx_timeout_work) + cancel_work_sync(&priv->tx_timeout_work); +} + void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv) { if (priv->rx_res) mlx5e_rx_res_channels_deactivate(priv->rx_res); + clear_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state); + mlx5e_cancel_tx_timeout_work(priv); + if (mlx5e_is_vport_rep(priv)) mlx5e_rep_deactivate_channels(priv); @@ -4734,8 +4747,17 @@ static void mlx5e_tx_timeout_work(struct work_struct *work) struct net_device *netdev = priv->netdev; int i; - rtnl_lock(); - mutex_lock(&priv->state_lock); + /* Take rtnl_lock to ensure no change in netdev->real_num_tx_queues + * through this flow. However, channel closing flows have to wait for + * this work to finish while holding rtnl lock too. So either get the + * lock or find that channels are being closed for other reason and + * this work is not relevant anymore. + */ + while (!rtnl_trylock()) { + if (!test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state)) + return; + msleep(20); + } if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) goto unlock; @@ -4754,7 +4776,6 @@ static void mlx5e_tx_timeout_work(struct work_struct *work) } unlock: - mutex_unlock(&priv->state_lock); rtnl_unlock(); } From b766f8b8d4d13d358ebc4d169ededd50d1468a74 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Mon, 11 Sep 2023 13:28:10 +0300 Subject: [PATCH 1037/1397] net/mlx5e: TC, Don't offload post action rule if not supported [ Upstream commit ccbe33003b109f14c4dde2a4fca9c2a50c423601 ] If post action is not supported, eg. ignore_flow_level is not supported, don't offload post action rule. Otherwise, will hit panic [1]. Fix it by checking if post action table is valid or not. [1] [445537.863880] BUG: unable to handle page fault for address: ffffffffffffffb1 [445537.864617] #PF: supervisor read access in kernel mode [445537.865244] #PF: error_code(0x0000) - not-present page [445537.865860] PGD 70683a067 P4D 70683a067 PUD 70683c067 PMD 0 [445537.866497] Oops: 0000 [#1] PREEMPT SMP NOPTI [445537.867077] CPU: 19 PID: 248742 Comm: tc Kdump: loaded Tainted: G O 6.5.0+ #1 [445537.867888] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [445537.868834] RIP: 0010:mlx5e_tc_post_act_add+0x51/0x130 [mlx5_core] [445537.869635] Code: c0 0d 00 00 e8 20 96 c6 d3 48 85 c0 0f 84 e5 00 00 00 c7 83 b0 01 00 00 00 00 00 00 49 89 c5 31 c0 31 d2 66 89 83 b4 01 00 00 <49> 8b 44 24 10 83 23 df 83 8b d8 01 00 00 04 48 89 83 c0 01 00 00 [445537.871318] RSP: 0018:ffffb98741cef428 EFLAGS: 00010246 [445537.871962] RAX: 0000000000000000 RBX: ffff8df341167000 RCX: 0000000000000001 [445537.872704] RDX: 0000000000000000 RSI: ffffffff954844e1 RDI: ffffffff9546e9cb [445537.873430] RBP: ffffb98741cef448 R08: 0000000000000020 R09: 0000000000000246 [445537.874160] R10: 0000000000000000 R11: ffffffff943f73ff R12: ffffffffffffffa1 [445537.874893] R13: ffff8df36d336c20 R14: ffffffffffffffa1 R15: ffff8df341167000 [445537.875628] FS: 00007fcd6564f800(0000) GS:ffff8dfa9ea00000(0000) knlGS:0000000000000000 [445537.876425] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [445537.877090] CR2: ffffffffffffffb1 CR3: 00000003b5884001 CR4: 0000000000770ee0 [445537.877832] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [445537.878564] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [445537.879300] PKRU: 55555554 [445537.879797] Call Trace: [445537.880263] [445537.880713] ? show_regs+0x6e/0x80 [445537.881232] ? __die+0x29/0x70 [445537.881731] ? page_fault_oops+0x85/0x160 [445537.882276] ? search_exception_tables+0x65/0x70 [445537.882852] ? kernelmode_fixup_or_oops+0xa2/0x120 [445537.883432] ? __bad_area_nosemaphore+0x18b/0x250 [445537.884019] ? bad_area_nosemaphore+0x16/0x20 [445537.884566] ? do_kern_addr_fault+0x8b/0xa0 [445537.885105] ? exc_page_fault+0xf5/0x1c0 [445537.885623] ? asm_exc_page_fault+0x2b/0x30 [445537.886149] ? __kmem_cache_alloc_node+0x1df/0x2a0 [445537.886717] ? mlx5e_tc_post_act_add+0x51/0x130 [mlx5_core] [445537.887431] ? mlx5e_tc_post_act_add+0x30/0x130 [mlx5_core] [445537.888172] alloc_flow_post_acts+0xfb/0x1c0 [mlx5_core] [445537.888849] parse_tc_actions+0x582/0x5c0 [mlx5_core] [445537.889505] parse_tc_fdb_actions+0xd7/0x1f0 [mlx5_core] [445537.890175] __mlx5e_add_fdb_flow+0x1ab/0x2b0 [mlx5_core] [445537.890843] mlx5e_add_fdb_flow+0x56/0x120 [mlx5_core] [445537.891491] ? debug_smp_processor_id+0x1b/0x30 [445537.892037] mlx5e_tc_add_flow+0x79/0x90 [mlx5_core] [445537.892676] mlx5e_configure_flower+0x305/0x450 [mlx5_core] [445537.893341] mlx5e_rep_setup_tc_cls_flower+0x3d/0x80 [mlx5_core] [445537.894037] mlx5e_rep_setup_tc_cb+0x5c/0xa0 [mlx5_core] [445537.894693] tc_setup_cb_add+0xdc/0x220 [445537.895177] fl_hw_replace_filter+0x15f/0x220 [cls_flower] [445537.895767] fl_change+0xe87/0x1190 [cls_flower] [445537.896302] tc_new_tfilter+0x484/0xa50 Fixes: f0da4daa3413 ("net/mlx5e: Refactor ct to use post action infrastructure") Signed-off-by: Chris Mi Reviewed-by: Jianbo Liu Signed-off-by: Saeed Mahameed Reviewed-by: Automatic Verification Reviewed-by: Maher Sanalla Reviewed-by: Shay Drory Reviewed-by: Moshe Shemesh Reviewed-by: Shachar Kagan Reviewed-by: Tariq Toukan Signed-off-by: Sasha Levin --- .../mellanox/mlx5/core/en/tc/post_act.c | 6 +++++ .../net/ethernet/mellanox/mlx5/core/en_tc.c | 25 ++++++++++++++++--- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c index 4e923a2874ae..86bf007fd05b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c @@ -83,6 +83,9 @@ mlx5e_tc_post_act_offload(struct mlx5e_post_act *post_act, struct mlx5_flow_spec *spec; int err; + if (IS_ERR(post_act)) + return PTR_ERR(post_act); + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) return -ENOMEM; @@ -111,6 +114,9 @@ mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *po struct mlx5e_post_act_handle *handle; int err; + if (IS_ERR(post_act)) + return ERR_CAST(post_act); + handle = kzalloc(sizeof(*handle), GFP_KERNEL); if (!handle) return ERR_PTR(-ENOMEM); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index b62fd3749341..1bead98f73bf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -444,6 +444,9 @@ mlx5e_tc_add_flow_meter(struct mlx5e_priv *priv, struct mlx5e_flow_meter_handle *meter; enum mlx5e_post_meter_type type; + if (IS_ERR(post_act)) + return PTR_ERR(post_act); + meter = mlx5e_tc_meter_replace(priv->mdev, &attr->meter_attr.params); if (IS_ERR(meter)) { mlx5_core_err(priv->mdev, "Failed to get flow meter\n"); @@ -3736,6 +3739,20 @@ alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) return err; } +static int +set_branch_dest_ft(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr) +{ + struct mlx5e_post_act *post_act = get_post_action(priv); + + if (IS_ERR(post_act)) + return PTR_ERR(post_act); + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + attr->dest_ft = mlx5e_tc_post_act_get_ft(post_act); + + return 0; +} + static int alloc_branch_attr(struct mlx5e_tc_flow *flow, struct mlx5e_tc_act_branch_ctrl *cond, @@ -3759,8 +3776,8 @@ alloc_branch_attr(struct mlx5e_tc_flow *flow, break; case FLOW_ACTION_ACCEPT: case FLOW_ACTION_PIPE: - attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - attr->dest_ft = mlx5e_tc_post_act_get_ft(get_post_action(flow->priv)); + if (set_branch_dest_ft(flow->priv, attr)) + goto out_err; break; case FLOW_ACTION_JUMP: if (*jump_count) { @@ -3769,8 +3786,8 @@ alloc_branch_attr(struct mlx5e_tc_flow *flow, goto out_err; } *jump_count = cond->extval; - attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - attr->dest_ft = mlx5e_tc_post_act_get_ft(get_post_action(flow->priv)); + if (set_branch_dest_ft(flow->priv, attr)) + goto out_err; break; default: err = -EOPNOTSUPP; From a4839771d7b9d74b98b8209967ffdbfb28ae9104 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Mon, 7 Aug 2023 13:11:32 +0300 Subject: [PATCH 1038/1397] net/mlx5: Nack sync reset request when HotPlug is enabled [ Upstream commit 3d7a3f2612d75de5f371a681038b089ded6667eb ] Current sync reset flow is not supported when PCIe bridge connected directly to mlx5 device has HotPlug interrupt enabled and can be triggered on link state change event. Return nack on reset request in such case. Fixes: 92501fa6e421 ("net/mlx5: Ack on sync_reset_request only if PF can do reset_now") Signed-off-by: Moshe Shemesh Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- .../ethernet/mellanox/mlx5/core/fw_reset.c | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index b568988e92e3..c4e19d627da2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -325,6 +325,29 @@ static void mlx5_fw_live_patch_event(struct work_struct *work) mlx5_core_err(dev, "Failed to reload FW tracer\n"); } +#if IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE) +static int mlx5_check_hotplug_interrupt(struct mlx5_core_dev *dev) +{ + struct pci_dev *bridge = dev->pdev->bus->self; + u16 reg16; + int err; + + if (!bridge) + return -EOPNOTSUPP; + + err = pcie_capability_read_word(bridge, PCI_EXP_SLTCTL, ®16); + if (err) + return err; + + if ((reg16 & PCI_EXP_SLTCTL_HPIE) && (reg16 & PCI_EXP_SLTCTL_DLLSCE)) { + mlx5_core_warn(dev, "FW reset is not supported as HotPlug is enabled\n"); + return -EOPNOTSUPP; + } + + return 0; +} +#endif + static int mlx5_check_dev_ids(struct mlx5_core_dev *dev, u16 dev_id) { struct pci_bus *bridge_bus = dev->pdev->bus; @@ -357,6 +380,12 @@ static bool mlx5_is_reset_now_capable(struct mlx5_core_dev *dev) return false; } +#if IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE) + err = mlx5_check_hotplug_interrupt(dev); + if (err) + return false; +#endif + err = pci_read_config_word(dev->pdev, PCI_DEVICE_ID, &dev_id); if (err) return false; From ef3b2d5f21526a47067ac0ab885fd1c096fcec82 Mon Sep 17 00:00:00 2001 From: Gavin Li Date: Thu, 31 Aug 2023 05:47:09 +0300 Subject: [PATCH 1039/1397] net/mlx5e: Check netdev pointer before checking its net ns [ Upstream commit 7aaf975238c47b710fcc4eca0da1e7902a53abe2 ] Previously, when comparing the net namespaces, the case where the netdev doesn't exist wasn't taken into account, and therefore can cause a crash. In such a case, the comparing function should return false, as there is no netdev->net to compare the devlink->net to. Furthermore, this will result in an attempt to enter switchdev mode without a netdev to fail, and which is the desired result as there is no meaning in switchdev mode without a net device. Fixes: 662404b24a4c ("net/mlx5e: Block entering switchdev mode with ns inconsistency") Signed-off-by: Gavin Li Reviewed-by: Gavi Teitz Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- .../mellanox/mlx5/core/eswitch_offloads.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index bf78eeca401b..bb8bcb448ae9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -3653,14 +3653,18 @@ static int esw_inline_mode_to_devlink(u8 mlx5_mode, u8 *mode) static bool esw_offloads_devlink_ns_eq_netdev_ns(struct devlink *devlink) { + struct mlx5_core_dev *dev = devlink_priv(devlink); struct net *devl_net, *netdev_net; - struct mlx5_eswitch *esw; - - esw = mlx5_devlink_eswitch_nocheck_get(devlink); - netdev_net = dev_net(esw->dev->mlx5e_res.uplink_netdev); - devl_net = devlink_net(devlink); + bool ret = false; - return net_eq(devl_net, netdev_net); + mutex_lock(&dev->mlx5e_res.uplink_netdev_lock); + if (dev->mlx5e_res.uplink_netdev) { + netdev_net = dev_net(dev->mlx5e_res.uplink_netdev); + devl_net = devlink_net(devlink); + ret = net_eq(devl_net, netdev_net); + } + mutex_unlock(&dev->mlx5e_res.uplink_netdev_lock); + return ret; } int mlx5_eswitch_block_mode(struct mlx5_core_dev *dev) From 6cb39c79bca9ed05ba9b33083a6f09211d79f05e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 3 Nov 2023 09:36:20 +0300 Subject: [PATCH 1040/1397] net/mlx5: Fix a NULL vs IS_ERR() check [ Upstream commit ca4ef28d0ad831d2521fa2b16952f37fd9324ca3 ] The mlx5_esw_offloads_devlink_port() function returns error pointers, not NULL. Fixes: 7bef147a6ab6 ("net/mlx5: Don't skip vport check") Signed-off-by: Dan Carpenter Reviewed-by: Wojciech Drewek Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 825f9c687633..007cb167cabc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1503,7 +1503,7 @@ mlx5e_vport_vf_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport); - if (dl_port) { + if (!IS_ERR(dl_port)) { SET_NETDEV_DEVLINK_PORT(netdev, dl_port); mlx5e_rep_vnic_reporter_create(priv, dl_port); } From 92d813f73f649a32cb936583030c2edcc309eb37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Wed, 6 Dec 2023 09:36:12 -0800 Subject: [PATCH 1041/1397] net: ipv6: support reporting otherwise unknown prefix flags in RTM_NEWPREFIX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit bd4a816752bab609dd6d65ae021387beb9e2ddbd ] Lorenzo points out that we effectively clear all unknown flags from PIO when copying them to userspace in the netlink RTM_NEWPREFIX notification. We could fix this one at a time as new flags are defined, or in one fell swoop - I choose the latter. We could either define 6 new reserved flags (reserved1..6) and handle them individually (and rename them as new flags are defined), or we could simply copy the entire unmodified byte over - I choose the latter. This unfortunately requires some anonymous union/struct magic, so we add a static assert on the struct size for a little extra safety. Cc: David Ahern Cc: Lorenzo Colitti Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Maciej Żenczykowski Reviewed-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/addrconf.h | 12 ++++++++++-- include/net/if_inet6.h | 4 ---- net/ipv6/addrconf.c | 6 +----- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 82da55101b5a..61ebe723ee4d 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -31,17 +31,22 @@ struct prefix_info { __u8 length; __u8 prefix_len; + union __packed { + __u8 flags; + struct __packed { #if defined(__BIG_ENDIAN_BITFIELD) - __u8 onlink : 1, + __u8 onlink : 1, autoconf : 1, reserved : 6; #elif defined(__LITTLE_ENDIAN_BITFIELD) - __u8 reserved : 6, + __u8 reserved : 6, autoconf : 1, onlink : 1; #else #error "Please fix " #endif + }; + }; __be32 valid; __be32 prefered; __be32 reserved2; @@ -49,6 +54,9 @@ struct prefix_info { struct in6_addr prefix; }; +/* rfc4861 4.6.2: IPv6 PIO is 32 bytes in size */ +static_assert(sizeof(struct prefix_info) == 32); + #include #include #include diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index c8490729b4ae..31bf475eca76 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -22,10 +22,6 @@ #define IF_RS_SENT 0x10 #define IF_READY 0x80000000 -/* prefix flags */ -#define IF_PREFIX_ONLINK 0x01 -#define IF_PREFIX_AUTOCONF 0x02 - enum { INET6_IFADDR_STATE_PREDAD, INET6_IFADDR_STATE_DAD, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 0b6ee962c84e..b007d098ffe2 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -6137,11 +6137,7 @@ static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev, pmsg->prefix_len = pinfo->prefix_len; pmsg->prefix_type = pinfo->type; pmsg->prefix_pad3 = 0; - pmsg->prefix_flags = 0; - if (pinfo->onlink) - pmsg->prefix_flags |= IF_PREFIX_ONLINK; - if (pinfo->autoconf) - pmsg->prefix_flags |= IF_PREFIX_AUTOCONF; + pmsg->prefix_flags = pinfo->flags; if (nla_put(skb, PREFIX_ADDRESS, sizeof(pinfo->prefix), &pinfo->prefix)) goto nla_put_failure; From 21b9dc814d3fe447b8f1fef67efeb7a3bc0f1502 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 6 Dec 2023 15:12:20 +0100 Subject: [PATCH 1042/1397] qca_debug: Prevent crash on TX ring changes [ Upstream commit f4e6064c97c050bd9904925ff7d53d0c9954fc7b ] The qca_spi driver stop and restart the SPI kernel thread (via ndo_stop & ndo_open) in case of TX ring changes. This is a big issue because it allows userspace to prevent restart of the SPI kernel thread (via signals). A subsequent change of TX ring wrongly assume a valid spi_thread pointer which result in a crash. So prevent this by stopping the network traffic handling and temporary park the SPI thread. Fixes: 291ab06ecf67 ("net: qualcomm: new Ethernet over SPI driver for QCA7000") Signed-off-by: Stefan Wahren Link: https://lore.kernel.org/r/20231206141222.52029-2-wahrenst@gmx.net Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/qualcomm/qca_debug.c | 9 ++++----- drivers/net/ethernet/qualcomm/qca_spi.c | 12 ++++++++++++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/qca_debug.c b/drivers/net/ethernet/qualcomm/qca_debug.c index 6f2fa2a42770..a5445252b0c4 100644 --- a/drivers/net/ethernet/qualcomm/qca_debug.c +++ b/drivers/net/ethernet/qualcomm/qca_debug.c @@ -263,7 +263,6 @@ qcaspi_set_ringparam(struct net_device *dev, struct ethtool_ringparam *ring, struct kernel_ethtool_ringparam *kernel_ring, struct netlink_ext_ack *extack) { - const struct net_device_ops *ops = dev->netdev_ops; struct qcaspi *qca = netdev_priv(dev); if ((ring->rx_pending) || @@ -271,14 +270,14 @@ qcaspi_set_ringparam(struct net_device *dev, struct ethtool_ringparam *ring, (ring->rx_jumbo_pending)) return -EINVAL; - if (netif_running(dev)) - ops->ndo_stop(dev); + if (qca->spi_thread) + kthread_park(qca->spi_thread); qca->txr.count = max_t(u32, ring->tx_pending, TX_RING_MIN_LEN); qca->txr.count = min_t(u16, qca->txr.count, TX_RING_MAX_LEN); - if (netif_running(dev)) - ops->ndo_open(dev); + if (qca->spi_thread) + kthread_unpark(qca->spi_thread); return 0; } diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c index bec723028e96..b0fad69bb755 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.c +++ b/drivers/net/ethernet/qualcomm/qca_spi.c @@ -580,6 +580,18 @@ qcaspi_spi_thread(void *data) netdev_info(qca->net_dev, "SPI thread created\n"); while (!kthread_should_stop()) { set_current_state(TASK_INTERRUPTIBLE); + if (kthread_should_park()) { + netif_tx_disable(qca->net_dev); + netif_carrier_off(qca->net_dev); + qcaspi_flush_tx_ring(qca); + kthread_parkme(); + if (qca->sync == QCASPI_SYNC_READY) { + netif_carrier_on(qca->net_dev); + netif_wake_queue(qca->net_dev); + } + continue; + } + if ((qca->intr_req == qca->intr_svc) && !qca->txr.skb[qca->txr.head]) schedule(); From 02296b1d8449980b81cab590717ca10743d93527 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 6 Dec 2023 15:12:21 +0100 Subject: [PATCH 1043/1397] qca_debug: Fix ethtool -G iface tx behavior [ Upstream commit 96a7e861d9e04d07febd3011c30cd84cd141d81f ] After calling ethtool -g it was not possible to adjust the TX ring size again: # ethtool -g eth1 Ring parameters for eth1: Pre-set maximums: RX: 4 RX Mini: n/a RX Jumbo: n/a TX: 10 Current hardware settings: RX: 4 RX Mini: n/a RX Jumbo: n/a TX: 10 # ethtool -G eth1 tx 8 netlink error: Invalid argument The reason for this is that the readonly setting rx_pending get initialized and after that the range check in qcaspi_set_ringparam() fails regardless of the provided parameter. So fix this by accepting the exposed RX defaults. Instead of adding another magic number better use a new define here. Fixes: 291ab06ecf67 ("net: qualcomm: new Ethernet over SPI driver for QCA7000") Suggested-by: Paolo Abeni Signed-off-by: Stefan Wahren Link: https://lore.kernel.org/r/20231206141222.52029-3-wahrenst@gmx.net Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/qualcomm/qca_debug.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/qca_debug.c b/drivers/net/ethernet/qualcomm/qca_debug.c index a5445252b0c4..1822f2ad8f0d 100644 --- a/drivers/net/ethernet/qualcomm/qca_debug.c +++ b/drivers/net/ethernet/qualcomm/qca_debug.c @@ -30,6 +30,8 @@ #define QCASPI_MAX_REGS 0x20 +#define QCASPI_RX_MAX_FRAMES 4 + static const u16 qcaspi_spi_regs[] = { SPI_REG_BFR_SIZE, SPI_REG_WRBUF_SPC_AVA, @@ -252,9 +254,9 @@ qcaspi_get_ringparam(struct net_device *dev, struct ethtool_ringparam *ring, { struct qcaspi *qca = netdev_priv(dev); - ring->rx_max_pending = 4; + ring->rx_max_pending = QCASPI_RX_MAX_FRAMES; ring->tx_max_pending = TX_RING_MAX_LEN; - ring->rx_pending = 4; + ring->rx_pending = QCASPI_RX_MAX_FRAMES; ring->tx_pending = qca->txr.count; } @@ -265,7 +267,7 @@ qcaspi_set_ringparam(struct net_device *dev, struct ethtool_ringparam *ring, { struct qcaspi *qca = netdev_priv(dev); - if ((ring->rx_pending) || + if (ring->rx_pending != QCASPI_RX_MAX_FRAMES || (ring->rx_mini_pending) || (ring->rx_jumbo_pending)) return -EINVAL; From f7dac967e17081a84b718990a85e48bc74fb9412 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 6 Dec 2023 15:12:22 +0100 Subject: [PATCH 1044/1397] qca_spi: Fix reset behavior [ Upstream commit 1057812d146dd658c9a9a96d869c2551150207b5 ] In case of a reset triggered by the QCA7000 itself, the behavior of the qca_spi driver was not quite correct: - in case of a pending RX frame decoding the drop counter must be incremented and decoding state machine reseted - also the reset counter must always be incremented regardless of sync state Fixes: 291ab06ecf67 ("net: qualcomm: new Ethernet over SPI driver for QCA7000") Signed-off-by: Stefan Wahren Link: https://lore.kernel.org/r/20231206141222.52029-4-wahrenst@gmx.net Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/qualcomm/qca_spi.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c index b0fad69bb755..5f3c11fb3fa2 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.c +++ b/drivers/net/ethernet/qualcomm/qca_spi.c @@ -620,11 +620,17 @@ qcaspi_spi_thread(void *data) if (intr_cause & SPI_INT_CPU_ON) { qcaspi_qca7k_sync(qca, QCASPI_EVENT_CPUON); + /* Frame decoding in progress */ + if (qca->frm_handle.state != qca->frm_handle.init) + qca->net_dev->stats.rx_dropped++; + + qcafrm_fsm_init_spi(&qca->frm_handle); + qca->stats.device_reset++; + /* not synced. */ if (qca->sync != QCASPI_SYNC_READY) continue; - qca->stats.device_reset++; netif_wake_queue(qca->net_dev); netif_carrier_on(qca->net_dev); } From bf9ceb1633621d15e83723ec87c938e4b73b301f Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Thu, 7 Dec 2023 16:16:55 -0800 Subject: [PATCH 1045/1397] bnxt_en: Clear resource reservation during resume [ Upstream commit 9ef7c58f5abe41e6d91f37f28fe2d851ffedd92a ] We are issuing HWRM_FUNC_RESET cmd to reset the device including all reserved resources, but not clearing the reservations within the driver struct. As a result, when the driver re-initializes as part of resume, it believes that there is no need to do any resource reservation and goes ahead and tries to allocate rings which will eventually fail beyond a certain number pre-reserved by the firmware. Fixes: 674f50a5b026 ("bnxt_en: Implement new method to reserve rings.") Reviewed-by: Kalesh AP Reviewed-by: Ajit Khaparde Reviewed-by: Andy Gospodarek Signed-off-by: Somnath Kotur Signed-off-by: Michael Chan Link: https://lore.kernel.org/r/20231208001658.14230-2-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 7551aa8068f8..4d2296f201ad 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -13897,6 +13897,8 @@ static int bnxt_resume(struct device *device) if (rc) goto resume_exit; + bnxt_clear_reservations(bp, true); + if (bnxt_hwrm_func_drv_rgtr(bp, NULL, 0, false)) { rc = -ENODEV; goto resume_exit; From d8ea6b0d549bb1d7522d17124a66d6e52e8c4d9e Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Thu, 7 Dec 2023 16:16:56 -0800 Subject: [PATCH 1046/1397] bnxt_en: Fix skb recycling logic in bnxt_deliver_skb() [ Upstream commit aded5d1feb08e48d544845d3594d70c4d5fe6e54 ] Receive SKBs can go through the VF-rep path or the normal path. skb_mark_for_recycle() is only called for the normal path. Fix it to do it for both paths to fix possible stalled page pool shutdown errors. Fixes: 86b05508f775 ("bnxt_en: Use the unified RX page pool buffers for XDP and non-XDP") Reviewed-by: Somnath Kotur Reviewed-by: Andy Gospodarek Reviewed-by: Vikas Gupta Signed-off-by: Sreekanth Reddy Signed-off-by: Michael Chan Link: https://lore.kernel.org/r/20231208001658.14230-3-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 4d2296f201ad..9f52b943fede 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1749,13 +1749,14 @@ static void bnxt_tpa_agg(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, static void bnxt_deliver_skb(struct bnxt *bp, struct bnxt_napi *bnapi, struct sk_buff *skb) { + skb_mark_for_recycle(skb); + if (skb->dev != bp->dev) { /* this packet belongs to a vf-rep */ bnxt_vf_rep_rx(bp, skb); return; } skb_record_rx_queue(skb, bnapi->index); - skb_mark_for_recycle(skb); napi_gro_receive(&bnapi->napi, skb); } From 909f5a48bf23b0b2e5e45fc68f8b4e136c5153ac Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Thu, 7 Dec 2023 16:16:57 -0800 Subject: [PATCH 1047/1397] bnxt_en: Fix wrong return value check in bnxt_close_nic() [ Upstream commit bd6781c18cb5b5e5d8c5873fa9a51668e89ec76e ] The wait_event_interruptible_timeout() function returns 0 if the timeout elapsed, -ERESTARTSYS if it was interrupted by a signal, and the remaining jiffies otherwise if the condition evaluated to true before the timeout elapsed. Driver should have checked for zero return value instead of a positive value. MChan: Print a warning for -ERESTARTSYS. The close operation will proceed anyway when wait_event_interruptible_timeout() returns for any reason. Since we do the close no matter what, we should not return this error code to the caller. Change bnxt_close_nic() to a void function and remove all error handling from some of the callers. Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.") Reviewed-by: Andy Gospodarek Reviewed-by: Vikas Gupta Reviewed-by: Somnath Kotur Signed-off-by: Kalesh AP Signed-off-by: Michael Chan Link: https://lore.kernel.org/r/20231208001658.14230-4-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 13 +++++++------ drivers/net/ethernet/broadcom/bnxt/bnxt.h | 2 +- .../net/ethernet/broadcom/bnxt/bnxt_devlink.c | 11 ++--------- .../net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 19 ++++--------------- drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c | 5 ++--- 5 files changed, 16 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 9f52b943fede..4ce34a39bb5e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -10704,10 +10704,8 @@ static void __bnxt_close_nic(struct bnxt *bp, bool irq_re_init, bnxt_free_mem(bp, irq_re_init); } -int bnxt_close_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) +void bnxt_close_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) { - int rc = 0; - if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) { /* If we get here, it means firmware reset is in progress * while we are trying to close. We can safely proceed with @@ -10722,15 +10720,18 @@ int bnxt_close_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) #ifdef CONFIG_BNXT_SRIOV if (bp->sriov_cfg) { + int rc; + rc = wait_event_interruptible_timeout(bp->sriov_cfg_wait, !bp->sriov_cfg, BNXT_SRIOV_CFG_WAIT_TMO); - if (rc) - netdev_warn(bp->dev, "timeout waiting for SRIOV config operation to complete!\n"); + if (!rc) + netdev_warn(bp->dev, "timeout waiting for SRIOV config operation to complete, proceeding to close!\n"); + else if (rc < 0) + netdev_warn(bp->dev, "SRIOV config operation interrupted, proceeding to close!\n"); } #endif __bnxt_close_nic(bp, irq_re_init, link_re_init); - return rc; } static int bnxt_close(struct net_device *dev) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 84cbcfa61bc1..ea0f47eceea7 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2362,7 +2362,7 @@ int bnxt_open_nic(struct bnxt *, bool, bool); int bnxt_half_open_nic(struct bnxt *bp); void bnxt_half_close_nic(struct bnxt *bp); void bnxt_reenable_sriov(struct bnxt *bp); -int bnxt_close_nic(struct bnxt *, bool, bool); +void bnxt_close_nic(struct bnxt *, bool, bool); void bnxt_get_ring_err_stats(struct bnxt *bp, struct bnxt_total_ring_err_stats *stats); int bnxt_dbg_hwrm_rd_reg(struct bnxt *bp, u32 reg_off, u16 num_words, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 8b3e7697390f..9d39f194b260 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -478,15 +478,8 @@ static int bnxt_dl_reload_down(struct devlink *dl, bool netns_change, return -ENODEV; } bnxt_ulp_stop(bp); - if (netif_running(bp->dev)) { - rc = bnxt_close_nic(bp, true, true); - if (rc) { - NL_SET_ERR_MSG_MOD(extack, "Failed to close"); - dev_close(bp->dev); - rtnl_unlock(); - break; - } - } + if (netif_running(bp->dev)) + bnxt_close_nic(bp, true, true); bnxt_vf_reps_free(bp); rc = bnxt_hwrm_func_drv_unrgtr(bp); if (rc) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 547247d98eba..3c36dd805148 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -164,9 +164,8 @@ static int bnxt_set_coalesce(struct net_device *dev, reset_coalesce: if (test_bit(BNXT_STATE_OPEN, &bp->state)) { if (update_stats) { - rc = bnxt_close_nic(bp, true, false); - if (!rc) - rc = bnxt_open_nic(bp, true, false); + bnxt_close_nic(bp, true, false); + rc = bnxt_open_nic(bp, true, false); } else { rc = bnxt_hwrm_set_coal(bp); } @@ -955,12 +954,7 @@ static int bnxt_set_channels(struct net_device *dev, * before PF unload */ } - rc = bnxt_close_nic(bp, true, false); - if (rc) { - netdev_err(bp->dev, "Set channel failure rc :%x\n", - rc); - return rc; - } + bnxt_close_nic(bp, true, false); } if (sh) { @@ -3737,12 +3731,7 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest, bnxt_run_fw_tests(bp, test_mask, &test_results); } else { bnxt_ulp_stop(bp); - rc = bnxt_close_nic(bp, true, false); - if (rc) { - etest->flags |= ETH_TEST_FL_FAILED; - bnxt_ulp_start(bp, rc); - return; - } + bnxt_close_nic(bp, true, false); bnxt_run_fw_tests(bp, test_mask, &test_results); buf[BNXT_MACLPBK_TEST_IDX] = 1; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index f3886710e778..6e3da3362bd6 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -521,9 +521,8 @@ static int bnxt_hwrm_ptp_cfg(struct bnxt *bp) if (netif_running(bp->dev)) { if (ptp->rx_filter == HWTSTAMP_FILTER_ALL) { - rc = bnxt_close_nic(bp, false, false); - if (!rc) - rc = bnxt_open_nic(bp, false, false); + bnxt_close_nic(bp, false, false); + rc = bnxt_open_nic(bp, false, false); } else { bnxt_ptp_cfg_tstamp_filters(bp); } From 9542105eb4ffe5fb498737dd93d57532b3f5ab58 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 7 Dec 2023 16:16:58 -0800 Subject: [PATCH 1048/1397] bnxt_en: Fix HWTSTAMP_FILTER_ALL packet timestamp logic [ Upstream commit c13e268c0768659cdaae4bfe2fb24860bcc8ddb4 ] When the chip is configured to timestamp all receive packets, the timestamp in the RX completion is only valid if the metadata present flag is not set for packets received on the wire. In addition, internal loopback packets will never have a valid timestamp and the timestamp field will always be zero. We must exclude any 0 value in the timestamp field because there is no way to determine if it is a loopback packet or not. Add a new function bnxt_rx_ts_valid() to check for all timestamp valid conditions. Fixes: 66ed81dcedc6 ("bnxt_en: Enable packet timestamping for all RX packets") Reviewed-by: Andy Gospodarek Reviewed-by: Pavan Chebbi Signed-off-by: Michael Chan Link: https://lore.kernel.org/r/20231208001658.14230-5-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 20 +++++++++++++++++--- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 8 +++++++- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 4ce34a39bb5e..f811d59fd71f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1760,6 +1760,21 @@ static void bnxt_deliver_skb(struct bnxt *bp, struct bnxt_napi *bnapi, napi_gro_receive(&bnapi->napi, skb); } +static bool bnxt_rx_ts_valid(struct bnxt *bp, u32 flags, + struct rx_cmp_ext *rxcmp1, u32 *cmpl_ts) +{ + u32 ts = le32_to_cpu(rxcmp1->rx_cmp_timestamp); + + if (BNXT_PTP_RX_TS_VALID(flags)) + goto ts_valid; + if (!bp->ptp_all_rx_tstamp || !ts || !BNXT_ALL_RX_TS_VALID(flags)) + return false; + +ts_valid: + *cmpl_ts = ts; + return true; +} + /* returns the following: * 1 - 1 packet successfully received * 0 - successful TPA_START, packet not completed yet @@ -1785,6 +1800,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, struct sk_buff *skb; struct xdp_buff xdp; u32 flags, misc; + u32 cmpl_ts; void *data; int rc = 0; @@ -2007,10 +2023,8 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, } } - if (unlikely((flags & RX_CMP_FLAGS_ITYPES_MASK) == - RX_CMP_FLAGS_ITYPE_PTP_W_TS) || bp->ptp_all_rx_tstamp) { + if (bnxt_rx_ts_valid(bp, flags, rxcmp1, &cmpl_ts)) { if (bp->flags & BNXT_FLAG_CHIP_P5) { - u32 cmpl_ts = le32_to_cpu(rxcmp1->rx_cmp_timestamp); u64 ns, ts; if (!bnxt_get_rx_ts_p5(bp, &ts, cmpl_ts)) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index ea0f47eceea7..0116f67593e3 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -161,7 +161,7 @@ struct rx_cmp { #define RX_CMP_FLAGS_ERROR (1 << 6) #define RX_CMP_FLAGS_PLACEMENT (7 << 7) #define RX_CMP_FLAGS_RSS_VALID (1 << 10) - #define RX_CMP_FLAGS_UNUSED (1 << 11) + #define RX_CMP_FLAGS_PKT_METADATA_PRESENT (1 << 11) #define RX_CMP_FLAGS_ITYPES_SHIFT 12 #define RX_CMP_FLAGS_ITYPES_MASK 0xf000 #define RX_CMP_FLAGS_ITYPE_UNKNOWN (0 << 12) @@ -188,6 +188,12 @@ struct rx_cmp { __le32 rx_cmp_rss_hash; }; +#define BNXT_PTP_RX_TS_VALID(flags) \ + (((flags) & RX_CMP_FLAGS_ITYPES_MASK) == RX_CMP_FLAGS_ITYPE_PTP_W_TS) + +#define BNXT_ALL_RX_TS_VALID(flags) \ + !((flags) & RX_CMP_FLAGS_PKT_METADATA_PRESENT) + #define RX_CMP_HASH_VALID(rxcmp) \ ((rxcmp)->rx_cmp_len_flags_type & cpu_to_le32(RX_CMP_FLAGS_RSS_VALID)) From 4faf39c4252ae5eb5bdacc92c85e087f240dd3e0 Mon Sep 17 00:00:00 2001 From: Chengfeng Ye Date: Thu, 7 Dec 2023 12:34:37 +0000 Subject: [PATCH 1049/1397] atm: solos-pci: Fix potential deadlock on &cli_queue_lock [ Upstream commit d5dba32b8f6cb39be708b726044ba30dbc088b30 ] As &card->cli_queue_lock is acquired under softirq context along the following call chain from solos_bh(), other acquisition of the same lock inside process context should disable at least bh to avoid double lock. console_show() --> spin_lock(&card->cli_queue_lock) --> solos_bh() --> spin_lock(&card->cli_queue_lock) This flaw was found by an experimental static analysis tool I am developing for irq-related deadlock. To prevent the potential deadlock, the patch uses spin_lock_bh() on the card->cli_queue_lock under process context code consistently to prevent the possible deadlock scenario. Fixes: 9c54004ea717 ("atm: Driver for Solos PCI ADSL2+ card.") Signed-off-by: Chengfeng Ye Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/atm/solos-pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c index 94fbc3abe60e..95f768b28a5e 100644 --- a/drivers/atm/solos-pci.c +++ b/drivers/atm/solos-pci.c @@ -449,9 +449,9 @@ static ssize_t console_show(struct device *dev, struct device_attribute *attr, struct sk_buff *skb; unsigned int len; - spin_lock(&card->cli_queue_lock); + spin_lock_bh(&card->cli_queue_lock); skb = skb_dequeue(&card->cli_queue[SOLOS_CHAN(atmdev)]); - spin_unlock(&card->cli_queue_lock); + spin_unlock_bh(&card->cli_queue_lock); if(skb == NULL) return sprintf(buf, "No data.\n"); From 82102501e08e3ce28ee52f8df97d2ac98a3cea51 Mon Sep 17 00:00:00 2001 From: Chengfeng Ye Date: Thu, 7 Dec 2023 12:34:53 +0000 Subject: [PATCH 1050/1397] atm: solos-pci: Fix potential deadlock on &tx_queue_lock [ Upstream commit 15319a4e8ee4b098118591c6ccbd17237f841613 ] As &card->tx_queue_lock is acquired under softirq context along the following call chain from solos_bh(), other acquisition of the same lock inside process context should disable at least bh to avoid double lock. pclose() --> spin_lock(&card->tx_queue_lock) --> solos_bh() --> fpga_tx() --> spin_lock(&card->tx_queue_lock) This flaw was found by an experimental static analysis tool I am developing for irq-related deadlock. To prevent the potential deadlock, the patch uses spin_lock_bh() on &card->tx_queue_lock under process context code consistently to prevent the possible deadlock scenario. Fixes: 213e85d38912 ("solos-pci: clean up pclose() function") Signed-off-by: Chengfeng Ye Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/atm/solos-pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c index 95f768b28a5e..d3c30a28c410 100644 --- a/drivers/atm/solos-pci.c +++ b/drivers/atm/solos-pci.c @@ -956,14 +956,14 @@ static void pclose(struct atm_vcc *vcc) struct pkt_hdr *header; /* Remove any yet-to-be-transmitted packets from the pending queue */ - spin_lock(&card->tx_queue_lock); + spin_lock_bh(&card->tx_queue_lock); skb_queue_walk_safe(&card->tx_queue[port], skb, tmpskb) { if (SKB_CB(skb)->vcc == vcc) { skb_unlink(skb, &card->tx_queue[port]); solos_pop(vcc, skb); } } - spin_unlock(&card->tx_queue_lock); + spin_unlock_bh(&card->tx_queue_lock); skb = alloc_skb(sizeof(*header), GFP_KERNEL); if (!skb) { From 4317fba45ff3eb5a4ce31d7781b6eb3e71abcca4 Mon Sep 17 00:00:00 2001 From: Radu Bulie Date: Thu, 7 Dec 2023 16:38:01 +0800 Subject: [PATCH 1051/1397] net: fec: correct queue selection [ Upstream commit 9fc95fe95c3e2a63ced8eeca4b256518ab204b63 ] The old implementation extracted VLAN TCI info from the payload before the VLAN tag has been pushed in the payload. Another problem was that the VLAN TCI was extracted even if the packet did not have VLAN protocol header. This resulted in invalid VLAN TCI and as a consequence a random queue was computed. This patch fixes the above issues and use the VLAN TCI from the skb if it is present or VLAN TCI from payload if present. If no VLAN header is present queue 0 is selected. Fixes: 52c4a1a85f4b ("net: fec: add ndo_select_queue to fix TX bandwidth fluctuations") Signed-off-by: Radu Bulie Signed-off-by: Wei Fang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/freescale/fec_main.c | 27 +++++++++-------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 77c8e9cfb445..35c95f07fd6d 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3710,31 +3710,26 @@ static int fec_set_features(struct net_device *netdev, return 0; } -static u16 fec_enet_get_raw_vlan_tci(struct sk_buff *skb) -{ - struct vlan_ethhdr *vhdr; - unsigned short vlan_TCI = 0; - - if (skb->protocol == htons(ETH_P_ALL)) { - vhdr = (struct vlan_ethhdr *)(skb->data); - vlan_TCI = ntohs(vhdr->h_vlan_TCI); - } - - return vlan_TCI; -} - static u16 fec_enet_select_queue(struct net_device *ndev, struct sk_buff *skb, struct net_device *sb_dev) { struct fec_enet_private *fep = netdev_priv(ndev); - u16 vlan_tag; + u16 vlan_tag = 0; if (!(fep->quirks & FEC_QUIRK_HAS_AVB)) return netdev_pick_tx(ndev, skb, NULL); - vlan_tag = fec_enet_get_raw_vlan_tci(skb); - if (!vlan_tag) + /* VLAN is present in the payload.*/ + if (eth_type_vlan(skb->protocol)) { + struct vlan_ethhdr *vhdr = skb_vlan_eth_hdr(skb); + + vlan_tag = ntohs(vhdr->h_vlan_TCI); + /* VLAN is present in the skb but not yet pushed in the payload.*/ + } else if (skb_vlan_tag_present(skb)) { + vlan_tag = skb->vlan_tci; + } else { return vlan_tag; + } return fec_enet_vlan_pri_to_queue[vlan_tag >> 13]; } From dd75adfdc2865724c156b180c833ea63650c54a7 Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Thu, 7 Dec 2023 17:49:16 +0800 Subject: [PATCH 1052/1397] octeontx2-af: fix a use-after-free in rvu_nix_register_reporters [ Upstream commit 28a7cb045ab700de5554193a1642917602787784 ] The rvu_dl will be freed in rvu_nix_health_reporters_destroy(rvu_dl) after the create_workqueue fails, and after that free, the rvu_dl will be translate back through the following call chain: rvu_nix_health_reporters_destroy |-> rvu_nix_health_reporters_create |-> rvu_health_reporters_create |-> rvu_register_dl (label err_dl_health) Finally. in the err_dl_health label, rvu_dl being freed again in rvu_health_reporters_destroy(rvu) by rvu_nix_health_reporters_destroy. In the second calls of rvu_nix_health_reporters_destroy, however, it uses rvu_dl->rvu_nix_health_reporter, which is already freed at the end of rvu_nix_health_reporters_destroy in the first call. So this patch prevents the first destroy by instantly returning -ENONMEN when create_workqueue fails. In addition, since the failure of create_workqueue is the only entrence of label err, it has been integrated into the error-handling path of create_workqueue. Fixes: 5ed66306eab6 ("octeontx2-af: Add devlink health reporters for NIX") Signed-off-by: Zhipeng Lu Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c index 058f75dc4c8a..bffe04e6d025 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c @@ -642,7 +642,7 @@ static int rvu_nix_register_reporters(struct rvu_devlink *rvu_dl) rvu_dl->devlink_wq = create_workqueue("rvu_devlink_wq"); if (!rvu_dl->devlink_wq) - goto err; + return -ENOMEM; INIT_WORK(&rvu_reporters->intr_work, rvu_nix_intr_work); INIT_WORK(&rvu_reporters->gen_work, rvu_nix_gen_work); @@ -650,9 +650,6 @@ static int rvu_nix_register_reporters(struct rvu_devlink *rvu_dl) INIT_WORK(&rvu_reporters->ras_work, rvu_nix_ras_work); return 0; -err: - rvu_nix_health_reporters_destroy(rvu_dl); - return -ENOMEM; } static int rvu_nix_health_reporters_create(struct rvu_devlink *rvu_dl) From 15f300ed1d5e21ac85ab63de504dc246839fdf12 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Tue, 5 Dec 2023 18:25:54 +0100 Subject: [PATCH 1053/1397] net/sched: act_ct: Take per-cb reference to tcf_ct_flow_table [ Upstream commit 125f1c7f26ffcdbf96177abe75b70c1a6ceb17bc ] The referenced change added custom cleanup code to act_ct to delete any callbacks registered on the parent block when deleting the tcf_ct_flow_table instance. However, the underlying issue is that the drivers don't obtain the reference to the tcf_ct_flow_table instance when registering callbacks which means that not only driver callbacks may still be on the table when deleting it but also that the driver can still have pointers to its internal nf_flowtable and can use it concurrently which results either warning in netfilter[0] or use-after-free. Fix the issue by taking a reference to the underlying struct tcf_ct_flow_table instance when registering the callback and release the reference when unregistering. Expose new API required for such reference counting by adding two new callbacks to nf_flowtable_type and implementing them for act_ct flowtable_ct type. This fixes the issue by extending the lifetime of nf_flowtable until all users have unregistered. [0]: [106170.938634] ------------[ cut here ]------------ [106170.939111] WARNING: CPU: 21 PID: 3688 at include/net/netfilter/nf_flow_table.h:262 mlx5_tc_ct_del_ft_cb+0x267/0x2b0 [mlx5_core] [106170.940108] Modules linked in: act_ct nf_flow_table act_mirred act_skbedit act_tunnel_key vxlan cls_matchall nfnetlink_cttimeout act_gact cls_flower sch_ingress mlx5_vdpa vringh vhost_iotlb vdpa bonding openvswitch nsh rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm mlx5_ib ib_uverbs ib_core xt_MASQUERADE nf_conntrack_netlink nfnetlink iptable_nat xt_addrtype xt_conntrack nf_nat br_netfilter rpcsec_gss_krb5 auth_rpcgss oid_regis try overlay mlx5_core [106170.943496] CPU: 21 PID: 3688 Comm: kworker/u48:0 Not tainted 6.6.0-rc7_for_upstream_min_debug_2023_11_01_13_02 #1 [106170.944361] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [106170.945292] Workqueue: mlx5e mlx5e_rep_neigh_update [mlx5_core] [106170.945846] RIP: 0010:mlx5_tc_ct_del_ft_cb+0x267/0x2b0 [mlx5_core] [106170.946413] Code: 89 ef 48 83 05 71 a4 14 00 01 e8 f4 06 04 e1 48 83 05 6c a4 14 00 01 48 83 c4 28 5b 5d 41 5c 41 5d c3 48 83 05 d1 8b 14 00 01 <0f> 0b 48 83 05 d7 8b 14 00 01 e9 96 fe ff ff 48 83 05 a2 90 14 00 [106170.947924] RSP: 0018:ffff88813ff0fcb8 EFLAGS: 00010202 [106170.948397] RAX: 0000000000000000 RBX: ffff88811eabac40 RCX: ffff88811eabad48 [106170.949040] RDX: ffff88811eab8000 RSI: ffffffffa02cd560 RDI: 0000000000000000 [106170.949679] RBP: ffff88811eab8000 R08: 0000000000000001 R09: ffffffffa0229700 [106170.950317] R10: ffff888103538fc0 R11: 0000000000000001 R12: ffff88811eabad58 [106170.950969] R13: ffff888110c01c00 R14: ffff888106b40000 R15: 0000000000000000 [106170.951616] FS: 0000000000000000(0000) GS:ffff88885fd40000(0000) knlGS:0000000000000000 [106170.952329] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [106170.952834] CR2: 00007f1cefd28cb0 CR3: 000000012181b006 CR4: 0000000000370ea0 [106170.953482] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [106170.954121] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [106170.954766] Call Trace: [106170.955057] [106170.955315] ? __warn+0x79/0x120 [106170.955648] ? mlx5_tc_ct_del_ft_cb+0x267/0x2b0 [mlx5_core] [106170.956172] ? report_bug+0x17c/0x190 [106170.956537] ? handle_bug+0x3c/0x60 [106170.956891] ? exc_invalid_op+0x14/0x70 [106170.957264] ? asm_exc_invalid_op+0x16/0x20 [106170.957666] ? mlx5_del_flow_rules+0x10/0x310 [mlx5_core] [106170.958172] ? mlx5_tc_ct_block_flow_offload_add+0x1240/0x1240 [mlx5_core] [106170.958788] ? mlx5_tc_ct_del_ft_cb+0x267/0x2b0 [mlx5_core] [106170.959339] ? mlx5_tc_ct_del_ft_cb+0xc6/0x2b0 [mlx5_core] [106170.959854] ? mapping_remove+0x154/0x1d0 [mlx5_core] [106170.960342] ? mlx5e_tc_action_miss_mapping_put+0x4f/0x80 [mlx5_core] [106170.960927] mlx5_tc_ct_delete_flow+0x76/0xc0 [mlx5_core] [106170.961441] mlx5_free_flow_attr_actions+0x13b/0x220 [mlx5_core] [106170.962001] mlx5e_tc_del_fdb_flow+0x22c/0x3b0 [mlx5_core] [106170.962524] mlx5e_tc_del_flow+0x95/0x3c0 [mlx5_core] [106170.963034] mlx5e_flow_put+0x73/0xe0 [mlx5_core] [106170.963506] mlx5e_put_flow_list+0x38/0x70 [mlx5_core] [106170.964002] mlx5e_rep_update_flows+0xec/0x290 [mlx5_core] [106170.964525] mlx5e_rep_neigh_update+0x1da/0x310 [mlx5_core] [106170.965056] process_one_work+0x13a/0x2c0 [106170.965443] worker_thread+0x2e5/0x3f0 [106170.965808] ? rescuer_thread+0x410/0x410 [106170.966192] kthread+0xc6/0xf0 [106170.966515] ? kthread_complete_and_exit+0x20/0x20 [106170.966970] ret_from_fork+0x2d/0x50 [106170.967332] ? kthread_complete_and_exit+0x20/0x20 [106170.967774] ret_from_fork_asm+0x11/0x20 [106170.970466] [106170.970726] ---[ end trace 0000000000000000 ]--- Fixes: 77ac5e40c44e ("net/sched: act_ct: remove and free nf_table callbacks") Signed-off-by: Vlad Buslov Reviewed-by: Paul Blakey Acked-by: Pablo Neira Ayuso Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/netfilter/nf_flow_table.h | 10 ++++++++ net/sched/act_ct.c | 34 ++++++++++++++++++++++----- 2 files changed, 38 insertions(+), 6 deletions(-) diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index fe1507c1db82..692d5955911c 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -62,6 +62,8 @@ struct nf_flowtable_type { enum flow_offload_tuple_dir dir, struct nf_flow_rule *flow_rule); void (*free)(struct nf_flowtable *ft); + void (*get)(struct nf_flowtable *ft); + void (*put)(struct nf_flowtable *ft); nf_hookfn *hook; struct module *owner; }; @@ -240,6 +242,11 @@ nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table, } list_add_tail(&block_cb->list, &block->cb_list); + up_write(&flow_table->flow_block_lock); + + if (flow_table->type->get) + flow_table->type->get(flow_table); + return 0; unlock: up_write(&flow_table->flow_block_lock); @@ -262,6 +269,9 @@ nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table, WARN_ON(true); } up_write(&flow_table->flow_block_lock); + + if (flow_table->type->put) + flow_table->type->put(flow_table); } void flow_offload_route_init(struct flow_offload *flow, diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 6dcc4585576e..dd710fb9f490 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -286,9 +286,31 @@ static bool tcf_ct_flow_is_outdated(const struct flow_offload *flow) !test_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags); } +static void tcf_ct_flow_table_get_ref(struct tcf_ct_flow_table *ct_ft); + +static void tcf_ct_nf_get(struct nf_flowtable *ft) +{ + struct tcf_ct_flow_table *ct_ft = + container_of(ft, struct tcf_ct_flow_table, nf_ft); + + tcf_ct_flow_table_get_ref(ct_ft); +} + +static void tcf_ct_flow_table_put(struct tcf_ct_flow_table *ct_ft); + +static void tcf_ct_nf_put(struct nf_flowtable *ft) +{ + struct tcf_ct_flow_table *ct_ft = + container_of(ft, struct tcf_ct_flow_table, nf_ft); + + tcf_ct_flow_table_put(ct_ft); +} + static struct nf_flowtable_type flowtable_ct = { .gc = tcf_ct_flow_is_outdated, .action = tcf_ct_flow_table_fill_actions, + .get = tcf_ct_nf_get, + .put = tcf_ct_nf_put, .owner = THIS_MODULE, }; @@ -337,9 +359,13 @@ static int tcf_ct_flow_table_get(struct net *net, struct tcf_ct_params *params) return err; } +static void tcf_ct_flow_table_get_ref(struct tcf_ct_flow_table *ct_ft) +{ + refcount_inc(&ct_ft->ref); +} + static void tcf_ct_flow_table_cleanup_work(struct work_struct *work) { - struct flow_block_cb *block_cb, *tmp_cb; struct tcf_ct_flow_table *ct_ft; struct flow_block *block; @@ -347,13 +373,9 @@ static void tcf_ct_flow_table_cleanup_work(struct work_struct *work) rwork); nf_flow_table_free(&ct_ft->nf_ft); - /* Remove any remaining callbacks before cleanup */ block = &ct_ft->nf_ft.flow_block; down_write(&ct_ft->nf_ft.flow_block_lock); - list_for_each_entry_safe(block_cb, tmp_cb, &block->cb_list, list) { - list_del(&block_cb->list); - flow_block_cb_free(block_cb); - } + WARN_ON(!list_empty(&block->cb_list)); up_write(&ct_ft->nf_ft.flow_block_lock); kfree(ct_ft); From 6047060105e44c7108c715535fc92c0226995a40 Mon Sep 17 00:00:00 2001 From: Shinas Rasheed Date: Thu, 7 Dec 2023 21:56:46 -0800 Subject: [PATCH 1054/1397] octeon_ep: explicitly test for firmware ready value [ Upstream commit 284f717622417cb267e344a9174f8e5698d1e3c1 ] The firmware ready value is 1, and get firmware ready status function should explicitly test for that value. The firmware ready value read will be 2 after driver load, and on unbind till firmware rewrites the firmware ready back to 0, the value seen by driver will be 2, which should be regarded as not ready. Fixes: 10c073e40469 ("octeon_ep: defer probe if firmware not ready") Signed-off-by: Shinas Rasheed Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeon_ep/octep_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index 5b46ca47c8e5..2ee1374db4c0 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -1076,7 +1076,8 @@ static bool get_fw_ready_status(struct pci_dev *pdev) pci_read_config_byte(pdev, (pos + 8), &status); dev_info(&pdev->dev, "Firmware ready status = %u\n", status); - return status; +#define FW_STATUS_READY 1ULL + return status == FW_STATUS_READY; } return false; } From 5295d2ad9103381daba63b206b34cfed08bdba69 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Fri, 8 Dec 2023 12:26:09 +0530 Subject: [PATCH 1055/1397] octeontx2-pf: Fix promisc mcam entry action [ Upstream commit dbda436824ded8ef6a05bb82cd9baa8d42377a49 ] Current implementation is such that, promisc mcam entry action is set as multicast even when there are no trusted VFs. multicast action causes the hardware to copy packet data, which reduces the performance. This patch fixes this issue by setting the promisc mcam entry action to unicast instead of multicast when there are no trusted VFs. The same change is made for the 'allmulti' mcam entry action. Fixes: ffd2f89ad05c ("octeontx2-pf: Enable promisc/allmulti match MCAM entries.") Signed-off-by: Hariprasad Kelam Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../ethernet/marvell/octeontx2/nic/otx2_pf.c | 25 ++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 0c17ebdda148..a57455aebff6 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1650,6 +1650,21 @@ static void otx2_free_hw_resources(struct otx2_nic *pf) mutex_unlock(&mbox->lock); } +static bool otx2_promisc_use_mce_list(struct otx2_nic *pfvf) +{ + int vf; + + /* The AF driver will determine whether to allow the VF netdev or not */ + if (is_otx2_vf(pfvf->pcifunc)) + return true; + + /* check if there are any trusted VFs associated with the PF netdev */ + for (vf = 0; vf < pci_num_vf(pfvf->pdev); vf++) + if (pfvf->vf_configs[vf].trusted) + return true; + return false; +} + static void otx2_do_set_rx_mode(struct otx2_nic *pf) { struct net_device *netdev = pf->netdev; @@ -1682,7 +1697,8 @@ static void otx2_do_set_rx_mode(struct otx2_nic *pf) if (netdev->flags & (IFF_ALLMULTI | IFF_MULTICAST)) req->mode |= NIX_RX_MODE_ALLMULTI; - req->mode |= NIX_RX_MODE_USE_MCE; + if (otx2_promisc_use_mce_list(pf)) + req->mode |= NIX_RX_MODE_USE_MCE; otx2_sync_mbox_msg(&pf->mbox); mutex_unlock(&pf->mbox.lock); @@ -2691,11 +2707,14 @@ static int otx2_ndo_set_vf_trust(struct net_device *netdev, int vf, pf->vf_configs[vf].trusted = enable; rc = otx2_set_vf_permissions(pf, vf, OTX2_TRUSTED_VF); - if (rc) + if (rc) { pf->vf_configs[vf].trusted = !enable; - else + } else { netdev_info(pf->netdev, "VF %d is %strusted\n", vf, enable ? "" : "not "); + otx2_set_rx_mode(netdev); + } + return rc; } From 6b5de31e372cc79ade47b54d8cbb6d258887497d Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Fri, 8 Dec 2023 12:26:10 +0530 Subject: [PATCH 1056/1397] octeontx2-af: Update RSS algorithm index [ Upstream commit 570ba37898ecd9069beb58bf0b6cf84daba6e0fe ] The RSS flow algorithm is not set up correctly for promiscuous or all multi MCAM entries. This has an impact on flow distribution. This patch fixes the issue by updating flow algorithm index in above mentioned MCAM entries. Fixes: 967db3529eca ("octeontx2-af: add support for multicast/promisc packet replication feature") Signed-off-by: Hariprasad Kelam Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../ethernet/marvell/octeontx2/af/rvu_npc.c | 55 +++++++++++++++---- 1 file changed, 44 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index f65805860c8d..0bcf3e559280 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -671,6 +671,7 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc, int blkaddr, ucast_idx, index; struct nix_rx_action action = { 0 }; u64 relaxed_mask; + u8 flow_key_alg; if (!hw->cap.nix_rx_multicast && is_cgx_vf(rvu, pcifunc)) return; @@ -701,6 +702,8 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc, action.op = NIX_RX_ACTIONOP_UCAST; } + flow_key_alg = action.flow_key_alg; + /* RX_ACTION set to MCAST for CGX PF's */ if (hw->cap.nix_rx_multicast && pfvf->use_mce_list && is_pf_cgxmapped(rvu, rvu_get_pf(pcifunc))) { @@ -740,7 +743,7 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc, req.vf = pcifunc; req.index = action.index; req.match_id = action.match_id; - req.flow_key_alg = action.flow_key_alg; + req.flow_key_alg = flow_key_alg; rvu_mbox_handler_npc_install_flow(rvu, &req, &rsp); } @@ -854,6 +857,7 @@ void rvu_npc_install_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf, u8 mac_addr[ETH_ALEN] = { 0 }; struct nix_rx_action action = { 0 }; struct rvu_pfvf *pfvf; + u8 flow_key_alg; u16 vf_func; /* Only CGX PF/VF can add allmulticast entry */ @@ -888,6 +892,7 @@ void rvu_npc_install_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf, *(u64 *)&action = npc_get_mcam_action(rvu, mcam, blkaddr, ucast_idx); + flow_key_alg = action.flow_key_alg; if (action.op != NIX_RX_ACTIONOP_RSS) { *(u64 *)&action = 0; action.op = NIX_RX_ACTIONOP_UCAST; @@ -924,7 +929,7 @@ void rvu_npc_install_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf, req.vf = pcifunc | vf_func; req.index = action.index; req.match_id = action.match_id; - req.flow_key_alg = action.flow_key_alg; + req.flow_key_alg = flow_key_alg; rvu_mbox_handler_npc_install_flow(rvu, &req, &rsp); } @@ -990,11 +995,38 @@ static void npc_update_vf_flow_entry(struct rvu *rvu, struct npc_mcam *mcam, mutex_unlock(&mcam->lock); } +static void npc_update_rx_action_with_alg_idx(struct rvu *rvu, struct nix_rx_action action, + struct rvu_pfvf *pfvf, int mcam_index, int blkaddr, + int alg_idx) + +{ + struct npc_mcam *mcam = &rvu->hw->mcam; + struct rvu_hwinfo *hw = rvu->hw; + int bank, op_rss; + + if (!is_mcam_entry_enabled(rvu, mcam, blkaddr, mcam_index)) + return; + + op_rss = (!hw->cap.nix_rx_multicast || !pfvf->use_mce_list); + + bank = npc_get_bank(mcam, mcam_index); + mcam_index &= (mcam->banksize - 1); + + /* If Rx action is MCAST update only RSS algorithm index */ + if (!op_rss) { + *(u64 *)&action = rvu_read64(rvu, blkaddr, + NPC_AF_MCAMEX_BANKX_ACTION(mcam_index, bank)); + + action.flow_key_alg = alg_idx; + } + rvu_write64(rvu, blkaddr, + NPC_AF_MCAMEX_BANKX_ACTION(mcam_index, bank), *(u64 *)&action); +} + void rvu_npc_update_flowkey_alg_idx(struct rvu *rvu, u16 pcifunc, int nixlf, int group, int alg_idx, int mcam_index) { struct npc_mcam *mcam = &rvu->hw->mcam; - struct rvu_hwinfo *hw = rvu->hw; struct nix_rx_action action; int blkaddr, index, bank; struct rvu_pfvf *pfvf; @@ -1050,15 +1082,16 @@ void rvu_npc_update_flowkey_alg_idx(struct rvu *rvu, u16 pcifunc, int nixlf, /* If PF's promiscuous entry is enabled, * Set RSS action for that entry as well */ - if ((!hw->cap.nix_rx_multicast || !pfvf->use_mce_list) && - is_mcam_entry_enabled(rvu, mcam, blkaddr, index)) { - bank = npc_get_bank(mcam, index); - index &= (mcam->banksize - 1); + npc_update_rx_action_with_alg_idx(rvu, action, pfvf, index, blkaddr, + alg_idx); - rvu_write64(rvu, blkaddr, - NPC_AF_MCAMEX_BANKX_ACTION(index, bank), - *(u64 *)&action); - } + index = npc_get_nixlf_mcam_index(mcam, pcifunc, + nixlf, NIXLF_ALLMULTI_ENTRY); + /* If PF's allmulti entry is enabled, + * Set RSS action for that entry as well + */ + npc_update_rx_action_with_alg_idx(rvu, action, pfvf, index, blkaddr, + alg_idx); } void npc_enadis_default_mce_entry(struct rvu *rvu, u16 pcifunc, From f115b31d7e96b4674f99427126e78721ef96a2ae Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Fri, 8 Dec 2023 14:57:54 +0530 Subject: [PATCH 1057/1397] octeontx2-af: Fix pause frame configuration [ Upstream commit e307b5a845c5951dabafc48d00b6424ee64716c4 ] The current implementation's default Pause Forward setting is causing unnecessary network traffic. This patch disables Pause Forward to address this issue. Fixes: 1121f6b02e7a ("octeontx2-af: Priority flow control configuration support") Signed-off-by: Hariprasad Kelam Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/af/rpm.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c index af21e2030cff..4728ba34b0e3 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c @@ -373,6 +373,11 @@ void rpm_lmac_pause_frm_config(void *rpmd, int lmac_id, bool enable) cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE; rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); + /* Disable forward pause to driver */ + cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); + cfg &= ~RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_FWD; + rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); + /* Enable channel mask for all LMACS */ if (is_dev_rpm2(rpm)) rpm_write(rpm, lmac_id, RPM2_CMR_CHAN_MSK_OR, 0xffff); @@ -616,12 +621,10 @@ int rpm_lmac_pfc_config(void *rpmd, int lmac_id, u8 tx_pause, u8 rx_pause, u16 p if (rx_pause) { cfg &= ~(RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE | - RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE | - RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_FWD); + RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE); } else { cfg |= (RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE | - RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE | - RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_FWD); + RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE); } if (tx_pause) { From 531fd46f92895bcdc41bedd12533266c397196da Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Sat, 9 Dec 2023 04:42:10 -0500 Subject: [PATCH 1058/1397] atm: Fix Use-After-Free in do_vcc_ioctl [ Upstream commit 24e90b9e34f9e039f56b5f25f6e6eb92cdd8f4b3 ] Because do_vcc_ioctl() accesses sk->sk_receive_queue without holding a sk->sk_receive_queue.lock, it can cause a race with vcc_recvmsg(). A use-after-free for skb occurs with the following flow. ``` do_vcc_ioctl() -> skb_peek() vcc_recvmsg() -> skb_recv_datagram() -> skb_free_datagram() ``` Add sk->sk_receive_queue.lock to do_vcc_ioctl() to fix this issue. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Hyunwoo Kim Link: https://lore.kernel.org/r/20231209094210.GA403126@v4bel-B760M-AORUS-ELITE-AX Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/atm/ioctl.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c index 838ebf0cabbf..f81f8d56f5c0 100644 --- a/net/atm/ioctl.c +++ b/net/atm/ioctl.c @@ -73,14 +73,17 @@ static int do_vcc_ioctl(struct socket *sock, unsigned int cmd, case SIOCINQ: { struct sk_buff *skb; + int amount; if (sock->state != SS_CONNECTED) { error = -EINVAL; goto done; } + spin_lock_irq(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); - error = put_user(skb ? skb->len : 0, - (int __user *)argp) ? -EFAULT : 0; + amount = skb ? skb->len : 0; + spin_unlock_irq(&sk->sk_receive_queue.lock); + error = put_user(amount, (int __user *)argp) ? -EFAULT : 0; goto done; } case ATM_SETSC: From 63caa51833e8701248a8a89d83effe96f30e4c80 Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Sat, 9 Dec 2023 05:05:38 -0500 Subject: [PATCH 1059/1397] net/rose: Fix Use-After-Free in rose_ioctl [ Upstream commit 810c38a369a0a0ce625b5c12169abce1dd9ccd53 ] Because rose_ioctl() accesses sk->sk_receive_queue without holding a sk->sk_receive_queue.lock, it can cause a race with rose_accept(). A use-after-free for skb occurs with the following flow. ``` rose_ioctl() -> skb_peek() rose_accept() -> skb_dequeue() -> kfree_skb() ``` Add sk->sk_receive_queue.lock to rose_ioctl() to fix this issue. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Hyunwoo Kim Link: https://lore.kernel.org/r/20231209100538.GA407321@v4bel-B760M-AORUS-ELITE-AX Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/rose/af_rose.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 49dafe9ac72f..4a5c2dc8dd7a 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1315,9 +1315,11 @@ static int rose_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case TIOCINQ: { struct sk_buff *skb; long amount = 0L; - /* These two are safe on a single CPU system as only user tasks fiddle here */ + + spin_lock_irq(&sk->sk_receive_queue.lock); if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) amount = skb->len; + spin_unlock_irq(&sk->sk_receive_queue.lock); return put_user(amount, (unsigned int __user *) argp); } From 3beb9d66e4422d1c3865da9027eee49863bff9e1 Mon Sep 17 00:00:00 2001 From: Piotr Gardocki Date: Tue, 21 Nov 2023 22:47:15 -0500 Subject: [PATCH 1060/1397] iavf: Introduce new state machines for flow director [ Upstream commit 3a0b5a2929fdeda63fc921c2dbed237059acf732 ] New states introduced: IAVF_FDIR_FLTR_DIS_REQUEST IAVF_FDIR_FLTR_DIS_PENDING IAVF_FDIR_FLTR_INACTIVE Current FDIR state machines (SM) are not adequate to handle a few scenarios in the link DOWN/UP event, reset event and ntuple-feature. For example, when VF link goes DOWN and comes back UP administratively, the expectation is that previously installed filters should also be restored. But with current SM, filters are not restored. So with new SM, during link DOWN filters are marked as INACTIVE in the iavf list but removed from PF. After link UP, SM will transition from INACTIVE to ADD_REQUEST to restore the filter. Similarly, with VF reset, filters will be removed from the PF, but marked as INACTIVE in the iavf list. Filters will be restored after reset completion. Steps to reproduce: ------------------- 1. Create a VF. Here VF is enp8s0. 2. Assign IP addresses to VF and link partner and ping continuously from remote. Here remote IP is 1.1.1.1. 3. Check default RX Queue of traffic. ethtool -S enp8s0 | grep -E "rx-[[:digit:]]+\.packets" 4. Add filter - change default RX Queue (to 15 here) ethtool -U ens8s0 flow-type ip4 src-ip 1.1.1.1 action 15 loc 5 5. Ensure filter gets added and traffic is received on RX queue 15 now. Link event testing: ------------------- 6. Bring VF link down and up. If traffic flows to configured queue 15, test is success, otherwise it is a failure. Reset event testing: -------------------- 7. Reset the VF. If traffic flows to configured queue 15, test is success, otherwise it is a failure. Fixes: 0dbfbabb840d ("iavf: Add framework to enable ethtool ntuple filters") Signed-off-by: Piotr Gardocki Reviewed-by: Larysa Zaremba Signed-off-by: Ranganatha Rao Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/iavf/iavf.h | 1 + .../net/ethernet/intel/iavf/iavf_ethtool.c | 27 ++++--- drivers/net/ethernet/intel/iavf/iavf_fdir.h | 15 +++- drivers/net/ethernet/intel/iavf/iavf_main.c | 48 ++++++++++--- .../net/ethernet/intel/iavf/iavf_virtchnl.c | 71 +++++++++++++++++-- 5 files changed, 139 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index d8d7b62ceb24..431d9d62c8c6 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -303,6 +303,7 @@ struct iavf_adapter { #define IAVF_FLAG_QUEUES_DISABLED BIT(17) #define IAVF_FLAG_SETUP_NETDEV_FEATURES BIT(18) #define IAVF_FLAG_REINIT_MSIX_NEEDED BIT(20) +#define IAVF_FLAG_FDIR_ENABLED BIT(21) /* duplicates for common code */ #define IAVF_FLAG_DCB_ENABLED 0 /* flags for admin queue service task */ diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index 1b412754aa42..892c6a4f03bb 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -1063,7 +1063,7 @@ iavf_get_ethtool_fdir_entry(struct iavf_adapter *adapter, struct iavf_fdir_fltr *rule = NULL; int ret = 0; - if (!FDIR_FLTR_SUPPORT(adapter)) + if (!(adapter->flags & IAVF_FLAG_FDIR_ENABLED)) return -EOPNOTSUPP; spin_lock_bh(&adapter->fdir_fltr_lock); @@ -1205,7 +1205,7 @@ iavf_get_fdir_fltr_ids(struct iavf_adapter *adapter, struct ethtool_rxnfc *cmd, unsigned int cnt = 0; int val = 0; - if (!FDIR_FLTR_SUPPORT(adapter)) + if (!(adapter->flags & IAVF_FLAG_FDIR_ENABLED)) return -EOPNOTSUPP; cmd->data = IAVF_MAX_FDIR_FILTERS; @@ -1397,7 +1397,7 @@ static int iavf_add_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx int count = 50; int err; - if (!FDIR_FLTR_SUPPORT(adapter)) + if (!(adapter->flags & IAVF_FLAG_FDIR_ENABLED)) return -EOPNOTSUPP; if (fsp->flow_type & FLOW_MAC_EXT) @@ -1438,12 +1438,16 @@ static int iavf_add_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx spin_lock_bh(&adapter->fdir_fltr_lock); iavf_fdir_list_add_fltr(adapter, fltr); adapter->fdir_active_fltr++; - fltr->state = IAVF_FDIR_FLTR_ADD_REQUEST; - adapter->aq_required |= IAVF_FLAG_AQ_ADD_FDIR_FILTER; + if (adapter->link_up) { + fltr->state = IAVF_FDIR_FLTR_ADD_REQUEST; + adapter->aq_required |= IAVF_FLAG_AQ_ADD_FDIR_FILTER; + } else { + fltr->state = IAVF_FDIR_FLTR_INACTIVE; + } spin_unlock_bh(&adapter->fdir_fltr_lock); - mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0); - + if (adapter->link_up) + mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0); ret: if (err && fltr) kfree(fltr); @@ -1465,7 +1469,7 @@ static int iavf_del_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx struct iavf_fdir_fltr *fltr = NULL; int err = 0; - if (!FDIR_FLTR_SUPPORT(adapter)) + if (!(adapter->flags & IAVF_FLAG_FDIR_ENABLED)) return -EOPNOTSUPP; spin_lock_bh(&adapter->fdir_fltr_lock); @@ -1474,6 +1478,11 @@ static int iavf_del_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx if (fltr->state == IAVF_FDIR_FLTR_ACTIVE) { fltr->state = IAVF_FDIR_FLTR_DEL_REQUEST; adapter->aq_required |= IAVF_FLAG_AQ_DEL_FDIR_FILTER; + } else if (fltr->state == IAVF_FDIR_FLTR_INACTIVE) { + list_del(&fltr->list); + kfree(fltr); + adapter->fdir_active_fltr--; + fltr = NULL; } else { err = -EBUSY; } @@ -1782,7 +1791,7 @@ static int iavf_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, ret = 0; break; case ETHTOOL_GRXCLSRLCNT: - if (!FDIR_FLTR_SUPPORT(adapter)) + if (!(adapter->flags & IAVF_FLAG_FDIR_ENABLED)) break; spin_lock_bh(&adapter->fdir_fltr_lock); cmd->rule_cnt = adapter->fdir_active_fltr; diff --git a/drivers/net/ethernet/intel/iavf/iavf_fdir.h b/drivers/net/ethernet/intel/iavf/iavf_fdir.h index 9eb9f73f6adf..d31bd923ba8c 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_fdir.h +++ b/drivers/net/ethernet/intel/iavf/iavf_fdir.h @@ -6,12 +6,25 @@ struct iavf_adapter; -/* State of Flow Director filter */ +/* State of Flow Director filter + * + * *_REQUEST states are used to mark filter to be sent to PF driver to perform + * an action (either add or delete filter). *_PENDING states are an indication + * that request was sent to PF and the driver is waiting for response. + * + * Both DELETE and DISABLE states are being used to delete a filter in PF. + * The difference is that after a successful response filter in DEL_PENDING + * state is being deleted from VF driver as well and filter in DIS_PENDING state + * is being changed to INACTIVE state. + */ enum iavf_fdir_fltr_state_t { IAVF_FDIR_FLTR_ADD_REQUEST, /* User requests to add filter */ IAVF_FDIR_FLTR_ADD_PENDING, /* Filter pending add by the PF */ IAVF_FDIR_FLTR_DEL_REQUEST, /* User requests to delete filter */ IAVF_FDIR_FLTR_DEL_PENDING, /* Filter pending delete by the PF */ + IAVF_FDIR_FLTR_DIS_REQUEST, /* Filter scheduled to be disabled */ + IAVF_FDIR_FLTR_DIS_PENDING, /* Filter pending disable by the PF */ + IAVF_FDIR_FLTR_INACTIVE, /* Filter inactive on link down */ IAVF_FDIR_FLTR_ACTIVE, /* Filter is active */ }; diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 68783a7b7096..5158addc0aa9 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -1356,18 +1356,20 @@ static void iavf_clear_cloud_filters(struct iavf_adapter *adapter) **/ static void iavf_clear_fdir_filters(struct iavf_adapter *adapter) { - struct iavf_fdir_fltr *fdir, *fdirtmp; + struct iavf_fdir_fltr *fdir; /* remove all Flow Director filters */ spin_lock_bh(&adapter->fdir_fltr_lock); - list_for_each_entry_safe(fdir, fdirtmp, &adapter->fdir_list_head, - list) { + list_for_each_entry(fdir, &adapter->fdir_list_head, list) { if (fdir->state == IAVF_FDIR_FLTR_ADD_REQUEST) { - list_del(&fdir->list); - kfree(fdir); - adapter->fdir_active_fltr--; - } else { - fdir->state = IAVF_FDIR_FLTR_DEL_REQUEST; + /* Cancel a request, keep filter as inactive */ + fdir->state = IAVF_FDIR_FLTR_INACTIVE; + } else if (fdir->state == IAVF_FDIR_FLTR_ADD_PENDING || + fdir->state == IAVF_FDIR_FLTR_ACTIVE) { + /* Disable filters which are active or have a pending + * request to PF to be added + */ + fdir->state = IAVF_FDIR_FLTR_DIS_REQUEST; } } spin_unlock_bh(&adapter->fdir_fltr_lock); @@ -4174,6 +4176,33 @@ static int iavf_setup_tc(struct net_device *netdev, enum tc_setup_type type, } } +/** + * iavf_restore_fdir_filters + * @adapter: board private structure + * + * Restore existing FDIR filters when VF netdev comes back up. + **/ +static void iavf_restore_fdir_filters(struct iavf_adapter *adapter) +{ + struct iavf_fdir_fltr *f; + + spin_lock_bh(&adapter->fdir_fltr_lock); + list_for_each_entry(f, &adapter->fdir_list_head, list) { + if (f->state == IAVF_FDIR_FLTR_DIS_REQUEST) { + /* Cancel a request, keep filter as active */ + f->state = IAVF_FDIR_FLTR_ACTIVE; + } else if (f->state == IAVF_FDIR_FLTR_DIS_PENDING || + f->state == IAVF_FDIR_FLTR_INACTIVE) { + /* Add filters which are inactive or have a pending + * request to PF to be deleted + */ + f->state = IAVF_FDIR_FLTR_ADD_REQUEST; + adapter->aq_required |= IAVF_FLAG_AQ_ADD_FDIR_FILTER; + } + } + spin_unlock_bh(&adapter->fdir_fltr_lock); +} + /** * iavf_open - Called when a network interface is made active * @netdev: network interface device structure @@ -4241,8 +4270,9 @@ static int iavf_open(struct net_device *netdev) spin_unlock_bh(&adapter->mac_vlan_list_lock); - /* Restore VLAN filters that were removed with IFF_DOWN */ + /* Restore filters that were removed with IFF_DOWN */ iavf_restore_filters(adapter); + iavf_restore_fdir_filters(adapter); iavf_configure(adapter); diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index 0b97b424e487..b95a4f903204 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -1738,8 +1738,8 @@ void iavf_add_fdir_filter(struct iavf_adapter *adapter) **/ void iavf_del_fdir_filter(struct iavf_adapter *adapter) { + struct virtchnl_fdir_del f = {}; struct iavf_fdir_fltr *fdir; - struct virtchnl_fdir_del f; bool process_fltr = false; int len; @@ -1756,11 +1756,16 @@ void iavf_del_fdir_filter(struct iavf_adapter *adapter) list_for_each_entry(fdir, &adapter->fdir_list_head, list) { if (fdir->state == IAVF_FDIR_FLTR_DEL_REQUEST) { process_fltr = true; - memset(&f, 0, len); f.vsi_id = fdir->vc_add_msg.vsi_id; f.flow_id = fdir->flow_id; fdir->state = IAVF_FDIR_FLTR_DEL_PENDING; break; + } else if (fdir->state == IAVF_FDIR_FLTR_DIS_REQUEST) { + process_fltr = true; + f.vsi_id = fdir->vc_add_msg.vsi_id; + f.flow_id = fdir->flow_id; + fdir->state = IAVF_FDIR_FLTR_DIS_PENDING; + break; } } spin_unlock_bh(&adapter->fdir_fltr_lock); @@ -1904,6 +1909,48 @@ static void iavf_netdev_features_vlan_strip_set(struct net_device *netdev, netdev->features &= ~NETIF_F_HW_VLAN_CTAG_RX; } +/** + * iavf_activate_fdir_filters - Reactivate all FDIR filters after a reset + * @adapter: private adapter structure + * + * Called after a reset to re-add all FDIR filters and delete some of them + * if they were pending to be deleted. + */ +static void iavf_activate_fdir_filters(struct iavf_adapter *adapter) +{ + struct iavf_fdir_fltr *f, *ftmp; + bool add_filters = false; + + spin_lock_bh(&adapter->fdir_fltr_lock); + list_for_each_entry_safe(f, ftmp, &adapter->fdir_list_head, list) { + if (f->state == IAVF_FDIR_FLTR_ADD_REQUEST || + f->state == IAVF_FDIR_FLTR_ADD_PENDING || + f->state == IAVF_FDIR_FLTR_ACTIVE) { + /* All filters and requests have been removed in PF, + * restore them + */ + f->state = IAVF_FDIR_FLTR_ADD_REQUEST; + add_filters = true; + } else if (f->state == IAVF_FDIR_FLTR_DIS_REQUEST || + f->state == IAVF_FDIR_FLTR_DIS_PENDING) { + /* Link down state, leave filters as inactive */ + f->state = IAVF_FDIR_FLTR_INACTIVE; + } else if (f->state == IAVF_FDIR_FLTR_DEL_REQUEST || + f->state == IAVF_FDIR_FLTR_DEL_PENDING) { + /* Delete filters that were pending to be deleted, the + * list on PF is already cleared after a reset + */ + list_del(&f->list); + kfree(f); + adapter->fdir_active_fltr--; + } + } + spin_unlock_bh(&adapter->fdir_fltr_lock); + + if (add_filters) + adapter->aq_required |= IAVF_FLAG_AQ_ADD_FDIR_FILTER; +} + /** * iavf_virtchnl_completion * @adapter: adapter structure @@ -2081,7 +2128,8 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, spin_lock_bh(&adapter->fdir_fltr_lock); list_for_each_entry(fdir, &adapter->fdir_list_head, list) { - if (fdir->state == IAVF_FDIR_FLTR_DEL_PENDING) { + if (fdir->state == IAVF_FDIR_FLTR_DEL_PENDING || + fdir->state == IAVF_FDIR_FLTR_DIS_PENDING) { fdir->state = IAVF_FDIR_FLTR_ACTIVE; dev_info(&adapter->pdev->dev, "Failed to del Flow Director filter, error %s\n", iavf_stat_str(&adapter->hw, @@ -2217,6 +2265,8 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, spin_unlock_bh(&adapter->mac_vlan_list_lock); + iavf_activate_fdir_filters(adapter); + iavf_parse_vf_resource_msg(adapter); /* negotiated VIRTCHNL_VF_OFFLOAD_VLAN_V2, so wait for the @@ -2406,7 +2456,9 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, list_for_each_entry_safe(fdir, fdir_tmp, &adapter->fdir_list_head, list) { if (fdir->state == IAVF_FDIR_FLTR_DEL_PENDING) { - if (del_fltr->status == VIRTCHNL_FDIR_SUCCESS) { + if (del_fltr->status == VIRTCHNL_FDIR_SUCCESS || + del_fltr->status == + VIRTCHNL_FDIR_FAILURE_RULE_NONEXIST) { dev_info(&adapter->pdev->dev, "Flow Director filter with location %u is deleted\n", fdir->loc); list_del(&fdir->list); @@ -2418,6 +2470,17 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, del_fltr->status); iavf_print_fdir_fltr(adapter, fdir); } + } else if (fdir->state == IAVF_FDIR_FLTR_DIS_PENDING) { + if (del_fltr->status == VIRTCHNL_FDIR_SUCCESS || + del_fltr->status == + VIRTCHNL_FDIR_FAILURE_RULE_NONEXIST) { + fdir->state = IAVF_FDIR_FLTR_INACTIVE; + } else { + fdir->state = IAVF_FDIR_FLTR_ACTIVE; + dev_info(&adapter->pdev->dev, "Failed to disable Flow Director filter with status: %d\n", + del_fltr->status); + iavf_print_fdir_fltr(adapter, fdir); + } } } spin_unlock_bh(&adapter->fdir_fltr_lock); From e768a04908de2d72caaf8bc491472597c922cb39 Mon Sep 17 00:00:00 2001 From: Piotr Gardocki Date: Tue, 21 Nov 2023 22:47:16 -0500 Subject: [PATCH 1061/1397] iavf: Handle ntuple on/off based on new state machines for flow director [ Upstream commit 09d23b8918f9ab0f8114f6b94f2faf8bde3fb52a ] ntuple-filter feature on/off: Default is on. If turned off, the filters will be removed from both PF and iavf list. The removal is irrespective of current filter state. Steps to reproduce: ------------------- 1. Ensure ntuple is on. ethtool -K enp8s0 ntuple-filters on 2. Create a filter to receive the traffic into non-default rx-queue like 15 and ensure traffic is flowing into queue into 15. Now, turn off ntuple. Traffic should not flow to configured queue 15. It should flow to default RX queue. Fixes: 0dbfbabb840d ("iavf: Add framework to enable ethtool ntuple filters") Signed-off-by: Piotr Gardocki Reviewed-by: Larysa Zaremba Signed-off-by: Ranganatha Rao Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/iavf/iavf_main.c | 59 +++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 5158addc0aa9..af8eb27a3615 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -4409,6 +4409,49 @@ static int iavf_change_mtu(struct net_device *netdev, int new_mtu) return ret; } +/** + * iavf_disable_fdir - disable Flow Director and clear existing filters + * @adapter: board private structure + **/ +static void iavf_disable_fdir(struct iavf_adapter *adapter) +{ + struct iavf_fdir_fltr *fdir, *fdirtmp; + bool del_filters = false; + + adapter->flags &= ~IAVF_FLAG_FDIR_ENABLED; + + /* remove all Flow Director filters */ + spin_lock_bh(&adapter->fdir_fltr_lock); + list_for_each_entry_safe(fdir, fdirtmp, &adapter->fdir_list_head, + list) { + if (fdir->state == IAVF_FDIR_FLTR_ADD_REQUEST || + fdir->state == IAVF_FDIR_FLTR_INACTIVE) { + /* Delete filters not registered in PF */ + list_del(&fdir->list); + kfree(fdir); + adapter->fdir_active_fltr--; + } else if (fdir->state == IAVF_FDIR_FLTR_ADD_PENDING || + fdir->state == IAVF_FDIR_FLTR_DIS_REQUEST || + fdir->state == IAVF_FDIR_FLTR_ACTIVE) { + /* Filters registered in PF, schedule their deletion */ + fdir->state = IAVF_FDIR_FLTR_DEL_REQUEST; + del_filters = true; + } else if (fdir->state == IAVF_FDIR_FLTR_DIS_PENDING) { + /* Request to delete filter already sent to PF, change + * state to DEL_PENDING to delete filter after PF's + * response, not set as INACTIVE + */ + fdir->state = IAVF_FDIR_FLTR_DEL_PENDING; + } + } + spin_unlock_bh(&adapter->fdir_fltr_lock); + + if (del_filters) { + adapter->aq_required |= IAVF_FLAG_AQ_DEL_FDIR_FILTER; + mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0); + } +} + #define NETIF_VLAN_OFFLOAD_FEATURES (NETIF_F_HW_VLAN_CTAG_RX | \ NETIF_F_HW_VLAN_CTAG_TX | \ NETIF_F_HW_VLAN_STAG_RX | \ @@ -4431,6 +4474,13 @@ static int iavf_set_features(struct net_device *netdev, iavf_set_vlan_offload_features(adapter, netdev->features, features); + if ((netdev->features & NETIF_F_NTUPLE) ^ (features & NETIF_F_NTUPLE)) { + if (features & NETIF_F_NTUPLE) + adapter->flags |= IAVF_FLAG_FDIR_ENABLED; + else + iavf_disable_fdir(adapter); + } + return 0; } @@ -4726,6 +4776,9 @@ static netdev_features_t iavf_fix_features(struct net_device *netdev, { struct iavf_adapter *adapter = netdev_priv(netdev); + if (!FDIR_FLTR_SUPPORT(adapter)) + features &= ~NETIF_F_NTUPLE; + return iavf_fix_netdev_vlan_features(adapter, features); } @@ -4843,6 +4896,12 @@ int iavf_process_config(struct iavf_adapter *adapter) if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN) netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; + if (FDIR_FLTR_SUPPORT(adapter)) { + netdev->hw_features |= NETIF_F_NTUPLE; + netdev->features |= NETIF_F_NTUPLE; + adapter->flags |= IAVF_FLAG_FDIR_ENABLED; + } + netdev->priv_flags |= IFF_UNICAST_FLT; /* Do not turn on offloads when they are requested to be turned off. From 54f59a242bcfcd7ef7bbb9e1928edd39d908c8d5 Mon Sep 17 00:00:00 2001 From: Slawomir Laba Date: Wed, 29 Nov 2023 10:35:26 -0500 Subject: [PATCH 1062/1397] iavf: Fix iavf_shutdown to call iavf_remove instead iavf_close [ Upstream commit 7ae42ef308ed0f6250b36f43e4eeb182ebbe6215 ] Make the flow for pci shutdown be the same to the pci remove. iavf_shutdown was implementing an incomplete version of iavf_remove. It misses several calls to the kernel like iavf_free_misc_irq, iavf_reset_interrupt_capability, iounmap that might break the system on reboot or hibernation. Implement the call of iavf_remove directly in iavf_shutdown to close this gap. Fixes below error messages (dmesg) during shutdown stress tests - [685814.900917] ice 0000:88:00.0: MAC 02:d0:5f:82:43:5d does not exist for VF 0 [685814.900928] ice 0000:88:00.0: MAC 33:33:00:00:00:01 does not exist for VF 0 Reproduction: 1. Create one VF interface: echo 1 > /sys/class/net//device/sriov_numvfs 2. Run live dmesg on the host: dmesg -wH 3. On SUT, script below steps into vf_namespace_assignment.sh <#!/bin/sh> // Remove <>. Git removes # line if= (edit this per VF name) loop=0 while true; do echo test round $loop let loop++ ip netns add ns$loop ip link set dev $if up ip link set dev $if netns ns$loop ip netns exec ns$loop ip link set dev $if up ip netns exec ns$loop ip link set dev $if netns 1 ip netns delete ns$loop done 4. Run the script for at least 1000 iterations on SUT: ./vf_namespace_assignment.sh Expected result: No errors in dmesg. Fixes: 129cf89e5856 ("iavf: rename functions and structs to new name") Signed-off-by: Slawomir Laba Reviewed-by: Michal Swiatkowski Reviewed-by: Ahmed Zaki Reviewed-by: Jesse Brandeburg Co-developed-by: Ranganatha Rao Signed-off-by: Ranganatha Rao Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/iavf/iavf_main.c | 72 ++++++--------------- 1 file changed, 21 insertions(+), 51 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index af8eb27a3615..257865647c86 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -277,27 +277,6 @@ void iavf_free_virt_mem(struct iavf_hw *hw, struct iavf_virt_mem *mem) kfree(mem->va); } -/** - * iavf_lock_timeout - try to lock mutex but give up after timeout - * @lock: mutex that should be locked - * @msecs: timeout in msecs - * - * Returns 0 on success, negative on failure - **/ -static int iavf_lock_timeout(struct mutex *lock, unsigned int msecs) -{ - unsigned int wait, delay = 10; - - for (wait = 0; wait < msecs; wait += delay) { - if (mutex_trylock(lock)) - return 0; - - msleep(delay); - } - - return -1; -} - /** * iavf_schedule_reset - Set the flags and schedule a reset event * @adapter: board private structure @@ -4925,34 +4904,6 @@ int iavf_process_config(struct iavf_adapter *adapter) return 0; } -/** - * iavf_shutdown - Shutdown the device in preparation for a reboot - * @pdev: pci device structure - **/ -static void iavf_shutdown(struct pci_dev *pdev) -{ - struct iavf_adapter *adapter = iavf_pdev_to_adapter(pdev); - struct net_device *netdev = adapter->netdev; - - netif_device_detach(netdev); - - if (netif_running(netdev)) - iavf_close(netdev); - - if (iavf_lock_timeout(&adapter->crit_lock, 5000)) - dev_warn(&adapter->pdev->dev, "%s: failed to acquire crit_lock\n", __func__); - /* Prevent the watchdog from running. */ - iavf_change_state(adapter, __IAVF_REMOVE); - adapter->aq_required = 0; - mutex_unlock(&adapter->crit_lock); - -#ifdef CONFIG_PM - pci_save_state(pdev); - -#endif - pci_disable_device(pdev); -} - /** * iavf_probe - Device Initialization Routine * @pdev: PCI device information struct @@ -5166,17 +5117,22 @@ static int __maybe_unused iavf_resume(struct device *dev_d) **/ static void iavf_remove(struct pci_dev *pdev) { - struct iavf_adapter *adapter = iavf_pdev_to_adapter(pdev); struct iavf_fdir_fltr *fdir, *fdirtmp; struct iavf_vlan_filter *vlf, *vlftmp; struct iavf_cloud_filter *cf, *cftmp; struct iavf_adv_rss *rss, *rsstmp; struct iavf_mac_filter *f, *ftmp; + struct iavf_adapter *adapter; struct net_device *netdev; struct iavf_hw *hw; int err; - netdev = adapter->netdev; + /* Don't proceed with remove if netdev is already freed */ + netdev = pci_get_drvdata(pdev); + if (!netdev) + return; + + adapter = iavf_pdev_to_adapter(pdev); hw = &adapter->hw; if (test_and_set_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) @@ -5304,11 +5260,25 @@ static void iavf_remove(struct pci_dev *pdev) destroy_workqueue(adapter->wq); + pci_set_drvdata(pdev, NULL); + free_netdev(netdev); pci_disable_device(pdev); } +/** + * iavf_shutdown - Shutdown the device in preparation for a reboot + * @pdev: pci device structure + **/ +static void iavf_shutdown(struct pci_dev *pdev) +{ + iavf_remove(pdev); + + if (system_state == SYSTEM_POWER_OFF) + pci_set_power_state(pdev, PCI_D3hot); +} + static SIMPLE_DEV_PM_OPS(iavf_pm_ops, iavf_suspend, iavf_resume); static struct pci_driver iavf_driver = { From 7106a15b96d7debc1da1c16623881ec94ac2b018 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Sun, 10 Dec 2023 12:52:55 +0800 Subject: [PATCH 1063/1397] qed: Fix a potential use-after-free in qed_cxt_tables_alloc [ Upstream commit b65d52ac9c085c0c52dee012a210d4e2f352611b ] qed_ilt_shadow_alloc() will call qed_ilt_shadow_free() to free p_hwfn->p_cxt_mngr->ilt_shadow on error. However, qed_cxt_tables_alloc() accesses the freed pointer on failure of qed_ilt_shadow_alloc() through calling qed_cxt_mngr_free(), which may lead to use-after-free. Fix this issue by setting p_mngr->ilt_shadow to NULL in qed_ilt_shadow_free(). Fixes: fe56b9e6a8d9 ("qed: Add module with basic common support") Reviewed-by: Przemek Kitszel Signed-off-by: Dinghao Liu Link: https://lore.kernel.org/r/20231210045255.21383-1-dinghao.liu@zju.edu.cn Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_cxt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c index 65e20693c549..33f4f58ee51c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c @@ -933,6 +933,7 @@ static void qed_ilt_shadow_free(struct qed_hwfn *p_hwfn) p_dma->virt_addr = NULL; } kfree(p_mngr->ilt_shadow); + p_mngr->ilt_shadow = NULL; } static int qed_ilt_blk_alloc(struct qed_hwfn *p_hwfn, From d6bef004987084ec0db319b0ec3f087b6f56f880 Mon Sep 17 00:00:00 2001 From: Dong Chenchen Date: Sun, 10 Dec 2023 10:02:00 +0800 Subject: [PATCH 1064/1397] net: Remove acked SYN flag from packet in the transmit queue correctly [ Upstream commit f99cd56230f56c8b6b33713c5be4da5d6766be1f ] syzkaller report: kernel BUG at net/core/skbuff.c:3452! invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI CPU: 0 PID: 0 Comm: swapper/0 Not tainted 6.7.0-rc4-00009-gbee0e7762ad2-dirty #135 RIP: 0010:skb_copy_and_csum_bits (net/core/skbuff.c:3452) Call Trace: icmp_glue_bits (net/ipv4/icmp.c:357) __ip_append_data.isra.0 (net/ipv4/ip_output.c:1165) ip_append_data (net/ipv4/ip_output.c:1362 net/ipv4/ip_output.c:1341) icmp_push_reply (net/ipv4/icmp.c:370) __icmp_send (./include/net/route.h:252 net/ipv4/icmp.c:772) ip_fragment.constprop.0 (./include/linux/skbuff.h:1234 net/ipv4/ip_output.c:592 net/ipv4/ip_output.c:577) __ip_finish_output (net/ipv4/ip_output.c:311 net/ipv4/ip_output.c:295) ip_output (net/ipv4/ip_output.c:427) __ip_queue_xmit (net/ipv4/ip_output.c:535) __tcp_transmit_skb (net/ipv4/tcp_output.c:1462) __tcp_retransmit_skb (net/ipv4/tcp_output.c:3387) tcp_retransmit_skb (net/ipv4/tcp_output.c:3404) tcp_retransmit_timer (net/ipv4/tcp_timer.c:604) tcp_write_timer (./include/linux/spinlock.h:391 net/ipv4/tcp_timer.c:716) The panic issue was trigered by tcp simultaneous initiation. The initiation process is as follows: TCP A TCP B 1. CLOSED CLOSED 2. SYN-SENT --> ... 3. SYN-RECEIVED <-- <-- SYN-SENT 4. ... --> SYN-RECEIVED 5. SYN-RECEIVED --> ... // TCP B: not send challenge ack for ack limit or packet loss // TCP A: close tcp_close tcp_send_fin if (!tskb && tcp_under_memory_pressure(sk)) tskb = skb_rb_last(&sk->tcp_rtx_queue); //pick SYN_ACK packet TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN; // set FIN flag 6. FIN_WAIT_1 --> ... // TCP B: send challenge ack to SYN_FIN_ACK 7. ... <-- SYN-RECEIVED //challenge ack // TCP A: 8. FIN_WAIT_1 --> ... // retransmit panic __tcp_retransmit_skb //skb->len=0 tcp_trim_head len = tp->snd_una - TCP_SKB_CB(skb)->seq // len=101-100 __pskb_trim_head skb->data_len -= len // skb->len=-1, wrap around ... ... ip_fragment icmp_glue_bits //BUG_ON If we use tcp_trim_head() to remove acked SYN from packet that contains data or other flags, skb->len will be incorrectly decremented. We can remove SYN flag that has been acked from rtx_queue earlier than tcp_trim_head(), which can fix the problem mentioned above. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Co-developed-by: Eric Dumazet Signed-off-by: Eric Dumazet Signed-off-by: Dong Chenchen Link: https://lore.kernel.org/r/20231210020200.1539875-1-dongchenchen2@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/tcp_output.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 9ccfdc825004..cab3c1162c3a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3263,7 +3263,13 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) if (skb_still_in_host_queue(sk, skb)) return -EBUSY; +start: if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) { + if (unlikely(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) { + TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN; + TCP_SKB_CB(skb)->seq++; + goto start; + } if (unlikely(before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))) { WARN_ON_ONCE(1); return -EINVAL; From c22877fafd6b905e2714701fb3611953ff1b869e Mon Sep 17 00:00:00 2001 From: David Arinzon Date: Mon, 11 Dec 2023 06:27:58 +0000 Subject: [PATCH 1065/1397] net: ena: Destroy correct number of xdp queues upon failure [ Upstream commit 41db6f99b5489a0d2ef26afe816ef0c6118d1d47 ] The ena_setup_and_create_all_xdp_queues() function freed all the resources upon failure, after creating only xdp_num_queues queues, instead of freeing just the created ones. In this patch, the only resources that are freed, are the ones allocated right before the failure occurs. Fixes: 548c4940b9f1 ("net: ena: Implement XDP_TX action") Signed-off-by: Shahar Itzko Signed-off-by: David Arinzon Link: https://lore.kernel.org/r/20231211062801.27891-2-darinzon@amazon.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index f955bde10cf9..098025d29247 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -74,6 +74,8 @@ static void ena_unmap_tx_buff(struct ena_ring *tx_ring, struct ena_tx_buffer *tx_info); static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter, int first_index, int count); +static void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter, + int first_index, int count); /* Increase a stat by cnt while holding syncp seqlock on 32bit machines */ static void ena_increase_stat(u64 *statp, u64 cnt, @@ -457,23 +459,22 @@ static void ena_init_all_xdp_queues(struct ena_adapter *adapter) static int ena_setup_and_create_all_xdp_queues(struct ena_adapter *adapter) { + u32 xdp_first_ring = adapter->xdp_first_ring; + u32 xdp_num_queues = adapter->xdp_num_queues; int rc = 0; - rc = ena_setup_tx_resources_in_range(adapter, adapter->xdp_first_ring, - adapter->xdp_num_queues); + rc = ena_setup_tx_resources_in_range(adapter, xdp_first_ring, xdp_num_queues); if (rc) goto setup_err; - rc = ena_create_io_tx_queues_in_range(adapter, - adapter->xdp_first_ring, - adapter->xdp_num_queues); + rc = ena_create_io_tx_queues_in_range(adapter, xdp_first_ring, xdp_num_queues); if (rc) goto create_err; return 0; create_err: - ena_free_all_io_tx_resources(adapter); + ena_free_all_io_tx_resources_in_range(adapter, xdp_first_ring, xdp_num_queues); setup_err: return rc; } From 0cb2021b968e701582c984382b9a9efa55c36466 Mon Sep 17 00:00:00 2001 From: David Arinzon Date: Mon, 11 Dec 2023 06:27:59 +0000 Subject: [PATCH 1066/1397] net: ena: Fix xdp drops handling due to multibuf packets [ Upstream commit 505b1a88d311ff6f8c44a34f94e3be21745cce6f ] Current xdp code drops packets larger than ENA_XDP_MAX_MTU. This is an incorrect condition since the problem is not the size of the packet, rather the number of buffers it contains. This commit: 1. Identifies and drops XDP multi-buffer packets at the beginning of the function. 2. Increases the xdp drop statistic when this drop occurs. 3. Adds a one-time print that such drops are happening to give better indication to the user. Fixes: 838c93dc5449 ("net: ena: implement XDP drop support") Signed-off-by: Arthur Kiyanovski Signed-off-by: David Arinzon Link: https://lore.kernel.org/r/20231211062801.27891-3-darinzon@amazon.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 098025d29247..b638e1d3d151 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1672,20 +1672,23 @@ static void ena_set_rx_hash(struct ena_ring *rx_ring, } } -static int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp) +static int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp, u16 num_descs) { struct ena_rx_buffer *rx_info; int ret; + /* XDP multi-buffer packets not supported */ + if (unlikely(num_descs > 1)) { + netdev_err_once(rx_ring->adapter->netdev, + "xdp: dropped unsupported multi-buffer packets\n"); + ena_increase_stat(&rx_ring->rx_stats.xdp_drop, 1, &rx_ring->syncp); + return ENA_XDP_DROP; + } + rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id]; xdp_prepare_buff(xdp, page_address(rx_info->page), rx_info->buf_offset, rx_ring->ena_bufs[0].len, false); - /* If for some reason we received a bigger packet than - * we expect, then we simply drop it - */ - if (unlikely(rx_ring->ena_bufs[0].len > ENA_XDP_MAX_MTU)) - return ENA_XDP_DROP; ret = ena_xdp_execute(rx_ring, xdp); @@ -1754,7 +1757,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, ena_rx_ctx.l4_proto, ena_rx_ctx.hash); if (ena_xdp_present_ring(rx_ring)) - xdp_verdict = ena_xdp_handle_buff(rx_ring, &xdp); + xdp_verdict = ena_xdp_handle_buff(rx_ring, &xdp, ena_rx_ctx.descs); /* allocate skb and fill it */ if (xdp_verdict == ENA_XDP_PASS) From 0116e02cee5a1a6d304f64faa22eea8cccb4ca9f Mon Sep 17 00:00:00 2001 From: David Arinzon Date: Mon, 11 Dec 2023 06:28:00 +0000 Subject: [PATCH 1067/1397] net: ena: Fix DMA syncing in XDP path when SWIOTLB is on [ Upstream commit d760117060cf2e90b5c59c5492cab179a4dbce01 ] This patch fixes two issues: Issue 1 ------- Description ``````````` Current code does not call dma_sync_single_for_cpu() to sync data from the device side memory to the CPU side memory before the XDP code path uses the CPU side data. This causes the XDP code path to read the unset garbage data in the CPU side memory, resulting in incorrect handling of the packet by XDP. Solution ```````` 1. Add a call to dma_sync_single_for_cpu() before the XDP code starts to use the data in the CPU side memory. 2. The XDP code verdict can be XDP_PASS, in which case there is a fallback to the non-XDP code, which also calls dma_sync_single_for_cpu(). To avoid calling dma_sync_single_for_cpu() twice: 2.1. Put the dma_sync_single_for_cpu() in the code in such a place where it happens before XDP and non-XDP code. 2.2. Remove the calls to dma_sync_single_for_cpu() in the non-XDP code for the first buffer only (rx_copybreak and non-rx_copybreak cases), since the new call that was added covers these cases. The call to dma_sync_single_for_cpu() for the second buffer and on stays because only the first buffer is handled by the newly added dma_sync_single_for_cpu(). And there is no need for special handling of the second buffer and on for the XDP path since currently the driver supports only single buffer packets. Issue 2 ------- Description ``````````` In case the XDP code forwarded the packet (ENA_XDP_FORWARDED), ena_unmap_rx_buff_attrs() is called with attrs set to 0. This means that before unmapping the buffer, the internal function dma_unmap_page_attrs() will also call dma_sync_single_for_cpu() on the whole buffer (not only on the data part of it). This sync is both wasteful (since a sync was already explicitly called before) and also causes a bug, which will be explained using the below diagram. The following diagram shows the flow of events causing the bug. The order of events is (1)-(4) as shown in the diagram. CPU side memory area (3)convert_to_xdp_frame() initializes the headroom with xdpf metadata || \/ ___________________________________ | | 0 | V 4K --------------------------------------------------------------------- | xdpf->data | other xdpf | < data > | tailroom ||...| | | fields | | GARBAGE || | --------------------------------------------------------------------- /\ /\ || || (4)ena_unmap_rx_buff_attrs() calls (2)dma_sync_single_for_cpu() dma_sync_single_for_cpu() on the copies data from device whole buffer page, overwriting side to CPU side memory the xdpf->data with GARBAGE. || 0 4K --------------------------------------------------------------------- | headroom | < data > | tailroom ||...| | GARBAGE | | GARBAGE || | --------------------------------------------------------------------- Device side memory area /\ || (1) device writes RX packet data After the call to ena_unmap_rx_buff_attrs() in (4), the xdpf->data becomes corrupted, and so when it is later accessed in ena_clean_xdp_irq()->xdp_return_frame(), it causes a page fault, crashing the kernel. Solution ```````` Explicitly tell ena_unmap_rx_buff_attrs() not to call dma_sync_single_for_cpu() by passing it the ENA_DMA_ATTR_SKIP_CPU_SYNC flag. Fixes: f7d625adeb7b ("net: ena: Add dynamic recycling mechanism for rx buffers") Signed-off-by: Arthur Kiyanovski Signed-off-by: David Arinzon Link: https://lore.kernel.org/r/20231211062801.27891-4-darinzon@amazon.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 23 ++++++++------------ 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index b638e1d3d151..14e41eb57731 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1493,11 +1493,6 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, if (unlikely(!skb)) return NULL; - /* sync this buffer for CPU use */ - dma_sync_single_for_cpu(rx_ring->dev, - dma_unmap_addr(&rx_info->ena_buf, paddr) + pkt_offset, - len, - DMA_FROM_DEVICE); skb_copy_to_linear_data(skb, buf_addr + buf_offset, len); dma_sync_single_for_device(rx_ring->dev, dma_unmap_addr(&rx_info->ena_buf, paddr) + pkt_offset, @@ -1516,17 +1511,10 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, buf_len = SKB_DATA_ALIGN(len + buf_offset + tailroom); - pre_reuse_paddr = dma_unmap_addr(&rx_info->ena_buf, paddr); - /* If XDP isn't loaded try to reuse part of the RX buffer */ reuse_rx_buf_page = !is_xdp_loaded && ena_try_rx_buf_page_reuse(rx_info, buf_len, len, pkt_offset); - dma_sync_single_for_cpu(rx_ring->dev, - pre_reuse_paddr + pkt_offset, - len, - DMA_FROM_DEVICE); - if (!reuse_rx_buf_page) ena_unmap_rx_buff_attrs(rx_ring, rx_info, DMA_ATTR_SKIP_CPU_SYNC); @@ -1723,6 +1711,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, int xdp_flags = 0; int total_len = 0; int xdp_verdict; + u8 pkt_offset; int rc = 0; int i; @@ -1749,13 +1738,19 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, /* First descriptor might have an offset set by the device */ rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id]; - rx_info->buf_offset += ena_rx_ctx.pkt_offset; + pkt_offset = ena_rx_ctx.pkt_offset; + rx_info->buf_offset += pkt_offset; netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, "rx_poll: q %d got packet from ena. descs #: %d l3 proto %d l4 proto %d hash: %x\n", rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto, ena_rx_ctx.l4_proto, ena_rx_ctx.hash); + dma_sync_single_for_cpu(rx_ring->dev, + dma_unmap_addr(&rx_info->ena_buf, paddr) + pkt_offset, + rx_ring->ena_bufs[0].len, + DMA_FROM_DEVICE); + if (ena_xdp_present_ring(rx_ring)) xdp_verdict = ena_xdp_handle_buff(rx_ring, &xdp, ena_rx_ctx.descs); @@ -1781,7 +1776,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, if (xdp_verdict & ENA_XDP_FORWARDED) { ena_unmap_rx_buff_attrs(rx_ring, &rx_ring->rx_buffer_info[req_id], - 0); + DMA_ATTR_SKIP_CPU_SYNC); rx_ring->rx_buffer_info[req_id].page = NULL; } } From 2cc8ffc3ad31b5618ad6b34450274d3a9817ce1f Mon Sep 17 00:00:00 2001 From: David Arinzon Date: Mon, 11 Dec 2023 06:28:01 +0000 Subject: [PATCH 1068/1397] net: ena: Fix XDP redirection error [ Upstream commit 4ab138ca0a340e6d6e7a6a9bd5004bd8f83127ca ] When sending TX packets, the meta descriptor can be all zeroes as no meta information is required (as in XDP). This patch removes the validity check, as when `disable_meta_caching` is enabled, such TX packets will be dropped otherwise. Fixes: 0e3a3f6dacf0 ("net: ena: support new LLQ acceleration mode") Signed-off-by: Shay Agroskin Signed-off-by: David Arinzon Link: https://lore.kernel.org/r/20231211062801.27891-5-darinzon@amazon.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_eth_com.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.c b/drivers/net/ethernet/amazon/ena/ena_eth_com.c index 3d6f0a466a9e..f9f886289b97 100644 --- a/drivers/net/ethernet/amazon/ena/ena_eth_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.c @@ -328,9 +328,6 @@ static int ena_com_create_and_store_tx_meta_desc(struct ena_com_io_sq *io_sq, * compare it to the stored version, just create the meta */ if (io_sq->disable_meta_caching) { - if (unlikely(!ena_tx_ctx->meta_valid)) - return -EINVAL; - *have_meta = true; return ena_com_create_meta(io_sq, ena_meta); } From e0069c26c552c5c7f1009918eedac31a1eaa141b Mon Sep 17 00:00:00 2001 From: Yanteng Si Date: Mon, 11 Dec 2023 18:33:11 +0800 Subject: [PATCH 1069/1397] stmmac: dwmac-loongson: Make sure MDIO is initialized before use [ Upstream commit e87d3a1370ce9f04770d789bcf7cce44865d2e8d ] Generic code will use mdio. If it is not initialized before use, the kernel will Oops. Fixes: 30bba69d7db4 ("stmmac: pci: Add dwmac support for Loongson") Signed-off-by: Yanteng Si Signed-off-by: Feiyang Chen Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../net/ethernet/stmicro/stmmac/dwmac-loongson.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c index 2cd6fce5c993..e7701326adc6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c @@ -68,17 +68,15 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id if (!plat) return -ENOMEM; + plat->mdio_bus_data = devm_kzalloc(&pdev->dev, + sizeof(*plat->mdio_bus_data), + GFP_KERNEL); + if (!plat->mdio_bus_data) + return -ENOMEM; + plat->mdio_node = of_get_child_by_name(np, "mdio"); if (plat->mdio_node) { dev_info(&pdev->dev, "Found MDIO subnode\n"); - - plat->mdio_bus_data = devm_kzalloc(&pdev->dev, - sizeof(*plat->mdio_bus_data), - GFP_KERNEL); - if (!plat->mdio_bus_data) { - ret = -ENOMEM; - goto err_put_node; - } plat->mdio_bus_data->needs_reset = true; } From f18ac4bae15ea7821716c8d46707bdeaa594b64e Mon Sep 17 00:00:00 2001 From: Yusong Gao Date: Wed, 13 Dec 2023 10:31:10 +0000 Subject: [PATCH 1070/1397] sign-file: Fix incorrect return values check [ Upstream commit 829649443e78d85db0cff0c37cadb28fbb1a5f6f ] There are some wrong return values check in sign-file when call OpenSSL API. The ERR() check cond is wrong because of the program only check the return value is < 0 which ignored the return val is 0. For example: 1. CMS_final() return 1 for success or 0 for failure. 2. i2d_CMS_bio_stream() returns 1 for success or 0 for failure. 3. i2d_TYPEbio() return 1 for success and 0 for failure. 4. BIO_free() return 1 for success and 0 for failure. Link: https://www.openssl.org/docs/manmaster/man3/ Fixes: e5a2e3c84782 ("scripts/sign-file.c: Add support for signing with a raw signature") Signed-off-by: Yusong Gao Reviewed-by: Juerg Haefliger Signed-off-by: David Howells Link: https://lore.kernel.org/r/20231213024405.624692-1-a869920004@gmail.com/ # v5 Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- scripts/sign-file.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/sign-file.c b/scripts/sign-file.c index 598ef5465f82..3edb156ae52c 100644 --- a/scripts/sign-file.c +++ b/scripts/sign-file.c @@ -322,7 +322,7 @@ int main(int argc, char **argv) CMS_NOSMIMECAP | use_keyid | use_signed_attrs), "CMS_add1_signer"); - ERR(CMS_final(cms, bm, NULL, CMS_NOCERTS | CMS_BINARY) < 0, + ERR(CMS_final(cms, bm, NULL, CMS_NOCERTS | CMS_BINARY) != 1, "CMS_final"); #else @@ -341,10 +341,10 @@ int main(int argc, char **argv) b = BIO_new_file(sig_file_name, "wb"); ERR(!b, "%s", sig_file_name); #ifndef USE_PKCS7 - ERR(i2d_CMS_bio_stream(b, cms, NULL, 0) < 0, + ERR(i2d_CMS_bio_stream(b, cms, NULL, 0) != 1, "%s", sig_file_name); #else - ERR(i2d_PKCS7_bio(b, pkcs7) < 0, + ERR(i2d_PKCS7_bio(b, pkcs7) != 1, "%s", sig_file_name); #endif BIO_free(b); @@ -374,9 +374,9 @@ int main(int argc, char **argv) if (!raw_sig) { #ifndef USE_PKCS7 - ERR(i2d_CMS_bio_stream(bd, cms, NULL, 0) < 0, "%s", dest_name); + ERR(i2d_CMS_bio_stream(bd, cms, NULL, 0) != 1, "%s", dest_name); #else - ERR(i2d_PKCS7_bio(bd, pkcs7) < 0, "%s", dest_name); + ERR(i2d_PKCS7_bio(bd, pkcs7) != 1, "%s", dest_name); #endif } else { BIO *b; @@ -396,7 +396,7 @@ int main(int argc, char **argv) ERR(BIO_write(bd, &sig_info, sizeof(sig_info)) < 0, "%s", dest_name); ERR(BIO_write(bd, magic_number, sizeof(magic_number) - 1) < 0, "%s", dest_name); - ERR(BIO_free(bd) < 0, "%s", dest_name); + ERR(BIO_free(bd) != 1, "%s", dest_name); /* Finally, if we're signing in place, replace the original. */ if (replace_orig) From fa634779ffcc6938882bb6ac28308973c4975eab Mon Sep 17 00:00:00 2001 From: Nikolay Kuratov Date: Mon, 11 Dec 2023 19:23:17 +0300 Subject: [PATCH 1071/1397] vsock/virtio: Fix unsigned integer wrap around in virtio_transport_has_space() [ Upstream commit 60316d7f10b17a7ebb1ead0642fee8710e1560e0 ] We need to do signed arithmetic if we expect condition `if (bytes < 0)` to be possible Found by Linux Verification Center (linuxtesting.org) with SVACE Fixes: 06a8fc78367d ("VSOCK: Introduce virtio_vsock_common.ko") Signed-off-by: Nikolay Kuratov Reviewed-by: Stefano Garzarella Link: https://lore.kernel.org/r/20231211162317.4116625-1-kniv@yandex-team.ru Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/vmw_vsock/virtio_transport_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 8bc272b6003b..4084578b0b91 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -679,7 +679,7 @@ static s64 virtio_transport_has_space(struct vsock_sock *vsk) struct virtio_vsock_sock *vvs = vsk->trans; s64 bytes; - bytes = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt); + bytes = (s64)vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt); if (bytes < 0) bytes = 0; From 77e566c8813024d9e75ce6b8e9618d94eb9f05fa Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Tue, 12 Dec 2023 18:43:25 +0200 Subject: [PATCH 1072/1397] dpaa2-switch: fix size of the dma_unmap [ Upstream commit 2aad7d4189a923b24efa8ea6ad09059882b1bfe4 ] The size of the DMA unmap was wrongly put as a sizeof of a pointer. Change the value of the DMA unmap to be the actual macro used for the allocation and the DMA map. Fixes: 1110318d83e8 ("dpaa2-switch: add tc flower hardware offload on ingress traffic") Signed-off-by: Ioana Ciornei Link: https://lore.kernel.org/r/20231212164326.2753457-2-ioana.ciornei@nxp.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c index 4798fb7fe35d..b6a534a3e0b1 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c @@ -139,7 +139,8 @@ int dpaa2_switch_acl_entry_add(struct dpaa2_switch_filter_block *filter_block, err = dpsw_acl_add_entry(ethsw->mc_io, 0, ethsw->dpsw_handle, filter_block->acl_id, acl_entry_cfg); - dma_unmap_single(dev, acl_entry_cfg->key_iova, sizeof(cmd_buff), + dma_unmap_single(dev, acl_entry_cfg->key_iova, + DPAA2_ETHSW_PORT_ACL_CMD_BUF_SIZE, DMA_TO_DEVICE); if (err) { dev_err(dev, "dpsw_acl_add_entry() failed %d\n", err); @@ -181,8 +182,8 @@ dpaa2_switch_acl_entry_remove(struct dpaa2_switch_filter_block *block, err = dpsw_acl_remove_entry(ethsw->mc_io, 0, ethsw->dpsw_handle, block->acl_id, acl_entry_cfg); - dma_unmap_single(dev, acl_entry_cfg->key_iova, sizeof(cmd_buff), - DMA_TO_DEVICE); + dma_unmap_single(dev, acl_entry_cfg->key_iova, + DPAA2_ETHSW_PORT_ACL_CMD_BUF_SIZE, DMA_TO_DEVICE); if (err) { dev_err(dev, "dpsw_acl_remove_entry() failed %d\n", err); kfree(cmd_buff); From da8732cb38eac4461d7cbbfe51f6b43db18965c7 Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Tue, 12 Dec 2023 18:43:26 +0200 Subject: [PATCH 1073/1397] dpaa2-switch: do not ask for MDB, VLAN and FDB replay [ Upstream commit f24a49a375f65e8e75ee1b19d806f46dbaae57fd ] Starting with commit 4e51bf44a03a ("net: bridge: move the switchdev object replay helpers to "push" mode") the switchdev_bridge_port_offload() helper was extended with the intention to provide switchdev drivers easy access to object addition and deletion replays. This works by calling the replay helpers with non-NULL notifier blocks. In the same commit, the dpaa2-switch driver was updated so that it passes valid notifier blocks to the helper. At that moment, no regression was identified through testing. In the meantime, the blamed commit changed the behavior in terms of which ports get hit by the replay. Before this commit, only the initial port which identified itself as offloaded through switchdev_bridge_port_offload() got a replay of all port objects and FDBs. After this, the newly joining port will trigger a replay of objects on all bridge ports and on the bridge itself. This behavior leads to errors in dpaa2_switch_port_vlans_add() when a VLAN gets installed on the same interface multiple times. The intended mechanism to address this is to pass a non-NULL ctx to the switchdev_bridge_port_offload() helper and then check it against the port's private structure. But since the driver does not have any use for the replayed port objects and FDBs until it gains support for LAG offload, it's better to fix the issue by reverting the dpaa2-switch driver to not ask for replay. The pointers will be added back when we are prepared to ignore replays on unrelated ports. Fixes: b28d580e2939 ("net: bridge: switchdev: replay all VLAN groups") Signed-off-by: Ioana Ciornei Link: https://lore.kernel.org/r/20231212164326.2753457-3-ioana.ciornei@nxp.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c index 97d3151076d5..e01a246124ac 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c @@ -1998,9 +1998,6 @@ static int dpaa2_switch_port_attr_set_event(struct net_device *netdev, return notifier_from_errno(err); } -static struct notifier_block dpaa2_switch_port_switchdev_nb; -static struct notifier_block dpaa2_switch_port_switchdev_blocking_nb; - static int dpaa2_switch_port_bridge_join(struct net_device *netdev, struct net_device *upper_dev, struct netlink_ext_ack *extack) @@ -2043,9 +2040,7 @@ static int dpaa2_switch_port_bridge_join(struct net_device *netdev, goto err_egress_flood; err = switchdev_bridge_port_offload(netdev, netdev, NULL, - &dpaa2_switch_port_switchdev_nb, - &dpaa2_switch_port_switchdev_blocking_nb, - false, extack); + NULL, NULL, false, extack); if (err) goto err_switchdev_offload; @@ -2079,9 +2074,7 @@ static int dpaa2_switch_port_restore_rxvlan(struct net_device *vdev, int vid, vo static void dpaa2_switch_port_pre_bridge_leave(struct net_device *netdev) { - switchdev_bridge_port_unoffload(netdev, NULL, - &dpaa2_switch_port_switchdev_nb, - &dpaa2_switch_port_switchdev_blocking_nb); + switchdev_bridge_port_unoffload(netdev, NULL, NULL, NULL); } static int dpaa2_switch_port_bridge_leave(struct net_device *netdev) From ad531dfcc648eb24d512a4acfeec76000eabaf42 Mon Sep 17 00:00:00 2001 From: Sneh Shah Date: Tue, 12 Dec 2023 14:52:08 +0530 Subject: [PATCH 1074/1397] net: stmmac: dwmac-qcom-ethqos: Fix drops in 10M SGMII RX [ Upstream commit 981d947bcd382c3950a593690e0e13d194d65b1c ] In 10M SGMII mode all the packets are being dropped due to wrong Rx clock. SGMII 10MBPS mode needs RX clock divider programmed to avoid drops in Rx. Update configure SGMII function with Rx clk divider programming. Fixes: 463120c31c58 ("net: stmmac: dwmac-qcom-ethqos: add support for SGMII") Tested-by: Andrew Halaney Signed-off-by: Sneh Shah Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20231212092208.22393-1-quic_snehshah@quicinc.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- .../net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index d3bf42d0fceb..31631e3f89d0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -34,6 +34,7 @@ #define RGMII_CONFIG_LOOPBACK_EN BIT(2) #define RGMII_CONFIG_PROG_SWAP BIT(1) #define RGMII_CONFIG_DDR_MODE BIT(0) +#define RGMII_CONFIG_SGMII_CLK_DVDR GENMASK(18, 10) /* SDCC_HC_REG_DLL_CONFIG fields */ #define SDCC_DLL_CONFIG_DLL_RST BIT(30) @@ -78,6 +79,8 @@ #define ETHQOS_MAC_CTRL_SPEED_MODE BIT(14) #define ETHQOS_MAC_CTRL_PORT_SEL BIT(15) +#define SGMII_10M_RX_CLK_DVDR 0x31 + struct ethqos_emac_por { unsigned int offset; unsigned int value; @@ -598,6 +601,9 @@ static int ethqos_configure_rgmii(struct qcom_ethqos *ethqos) return 0; } +/* On interface toggle MAC registers gets reset. + * Configure MAC block for SGMII on ethernet phy link up + */ static int ethqos_configure_sgmii(struct qcom_ethqos *ethqos) { int val; @@ -617,6 +623,10 @@ static int ethqos_configure_sgmii(struct qcom_ethqos *ethqos) case SPEED_10: val |= ETHQOS_MAC_CTRL_PORT_SEL; val &= ~ETHQOS_MAC_CTRL_SPEED_MODE; + rgmii_updatel(ethqos, RGMII_CONFIG_SGMII_CLK_DVDR, + FIELD_PREP(RGMII_CONFIG_SGMII_CLK_DVDR, + SGMII_10M_RX_CLK_DVDR), + RGMII_IO_MACRO_CONFIG); break; } From 58c556661641c5ecbfc064067a31729d8d2c193d Mon Sep 17 00:00:00 2001 From: Andrew Halaney Date: Tue, 12 Dec 2023 16:18:33 -0600 Subject: [PATCH 1075/1397] net: stmmac: Handle disabled MDIO busses from devicetree [ Upstream commit e23c0d21ce9234fbc31ece35663ababbb83f9347 ] Many hardware configurations have the MDIO bus disabled, and are instead using some other MDIO bus to talk to the MAC's phy. of_mdiobus_register() returns -ENODEV in this case. Let's handle it gracefully instead of failing to probe the MAC. Fixes: 47dd7a540b8a ("net: add support for STMicroelectronics Ethernet controllers.") Signed-off-by: Andrew Halaney Reviewed-by: Serge Semin Link: https://lore.kernel.org/r/20231212-b4-stmmac-handle-mdio-enodev-v2-1-600171acf79f@redhat.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index fa9e7e7040b9..0542cfd1817e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -591,7 +591,11 @@ int stmmac_mdio_register(struct net_device *ndev) new_bus->parent = priv->device; err = of_mdiobus_register(new_bus, mdio_node); - if (err != 0) { + if (err == -ENODEV) { + err = 0; + dev_info(dev, "MDIO bus is disabled\n"); + goto bus_register_fail; + } else if (err) { dev_err_probe(dev, err, "Cannot register the MDIO bus\n"); goto bus_register_fail; } From e15ded324a3911358e8541a1b573665f99f216ef Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Tue, 12 Dec 2023 23:10:56 -0500 Subject: [PATCH 1076/1397] appletalk: Fix Use-After-Free in atalk_ioctl [ Upstream commit 189ff16722ee36ced4d2a2469d4ab65a8fee4198 ] Because atalk_ioctl() accesses sk->sk_receive_queue without holding a sk->sk_receive_queue.lock, it can cause a race with atalk_recvmsg(). A use-after-free for skb occurs with the following flow. ``` atalk_ioctl() -> skb_peek() atalk_recvmsg() -> skb_recv_datagram() -> skb_free_datagram() ``` Add sk->sk_receive_queue.lock to atalk_ioctl() to fix this issue. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Hyunwoo Kim Link: https://lore.kernel.org/r/20231213041056.GA519680@v4bel-B760M-AORUS-ELITE-AX Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/appletalk/ddp.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 8978fb6212ff..b070a8991200 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1811,15 +1811,14 @@ static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) break; } case TIOCINQ: { - /* - * These two are safe on a single CPU system as only - * user tasks fiddle here - */ - struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); + struct sk_buff *skb; long amount = 0; + spin_lock_irq(&sk->sk_receive_queue.lock); + skb = skb_peek(&sk->sk_receive_queue); if (skb) amount = skb->len - sizeof(struct ddpehdr); + spin_unlock_irq(&sk->sk_receive_queue.lock); rc = put_user(amount, (int __user *)argp); break; } From 3b5daf20c426b04d9278c0d3d6e3c7e0d6f5c852 Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Wed, 13 Dec 2023 10:40:44 +0100 Subject: [PATCH 1077/1397] net: atlantic: fix double free in ring reinit logic [ Upstream commit 7bb26ea74aa86fdf894b7dbd8c5712c5b4187da7 ] Driver has a logic leak in ring data allocation/free, where double free may happen in aq_ring_free if system is under stress and driver init/deinit is happening. The probability is higher to get this during suspend/resume cycle. Verification was done simulating same conditions with stress -m 2000 --vm-bytes 20M --vm-hang 10 --backoff 1000 while true; do sudo ifconfig enp1s0 down; sudo ifconfig enp1s0 up; done Fixed by explicitly clearing pointers to NULL on deallocation Fixes: 018423e90bee ("net: ethernet: aquantia: Add ring support code") Reported-by: Linus Torvalds Closes: https://lore.kernel.org/netdev/CAHk-=wiZZi7FcvqVSUirHBjx0bBUZ4dFrMDVLc3+3HCrtq0rBA@mail.gmail.com/ Signed-off-by: Igor Russkikh Link: https://lore.kernel.org/r/20231213094044.22988-1-irusskikh@marvell.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 694daeaf3e61..e1885c1eb100 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -938,11 +938,14 @@ void aq_ring_free(struct aq_ring_s *self) return; kfree(self->buff_ring); + self->buff_ring = NULL; - if (self->dx_ring) + if (self->dx_ring) { dma_free_coherent(aq_nic_get_dev(self->aq_nic), self->size * self->dx_size, self->dx_ring, self->dx_ring_pa); + self->dx_ring = NULL; + } } unsigned int aq_ring_fill_stats_data(struct aq_ring_s *self, u64 *data) From f6a7ce5ae416925954a23be83dee621dc8a4b98a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 15 Dec 2023 13:24:10 -0700 Subject: [PATCH 1078/1397] cred: switch to using atomic_long_t commit f8fa5d76925991976b3e7076f9d1052515ec1fca upstream. There are multiple ways to grab references to credentials, and the only protection we have against overflowing it is the memory required to do so. With memory sizes only moving in one direction, let's bump the reference count to 64-bit and move it outside the realm of feasibly overflowing. Signed-off-by: Jens Axboe Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/cred.h | 8 +++--- kernel/cred.c | 64 ++++++++++++++++++++++---------------------- 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/include/linux/cred.h b/include/linux/cred.h index f923528d5cc4..e7502be578dc 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -108,7 +108,7 @@ static inline int groups_search(const struct group_info *group_info, kgid_t grp) * same context as task->real_cred. */ struct cred { - atomic_t usage; + atomic_long_t usage; #ifdef CONFIG_DEBUG_CREDENTIALS atomic_t subscribers; /* number of processes subscribed */ void *put_addr; @@ -227,7 +227,7 @@ static inline bool cap_ambient_invariant_ok(const struct cred *cred) */ static inline struct cred *get_new_cred(struct cred *cred) { - atomic_inc(&cred->usage); + atomic_long_inc(&cred->usage); return cred; } @@ -259,7 +259,7 @@ static inline const struct cred *get_cred_rcu(const struct cred *cred) struct cred *nonconst_cred = (struct cred *) cred; if (!cred) return NULL; - if (!atomic_inc_not_zero(&nonconst_cred->usage)) + if (!atomic_long_inc_not_zero(&nonconst_cred->usage)) return NULL; validate_creds(cred); nonconst_cred->non_rcu = 0; @@ -283,7 +283,7 @@ static inline void put_cred(const struct cred *_cred) if (cred) { validate_creds(cred); - if (atomic_dec_and_test(&(cred)->usage)) + if (atomic_long_dec_and_test(&(cred)->usage)) __put_cred(cred); } } diff --git a/kernel/cred.c b/kernel/cred.c index 98cb4eca23fb..20f590c97cdb 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -102,17 +102,17 @@ static void put_cred_rcu(struct rcu_head *rcu) #ifdef CONFIG_DEBUG_CREDENTIALS if (cred->magic != CRED_MAGIC_DEAD || - atomic_read(&cred->usage) != 0 || + atomic_long_read(&cred->usage) != 0 || read_cred_subscribers(cred) != 0) panic("CRED: put_cred_rcu() sees %p with" - " mag %x, put %p, usage %d, subscr %d\n", + " mag %x, put %p, usage %ld, subscr %d\n", cred, cred->magic, cred->put_addr, - atomic_read(&cred->usage), + atomic_long_read(&cred->usage), read_cred_subscribers(cred)); #else - if (atomic_read(&cred->usage) != 0) - panic("CRED: put_cred_rcu() sees %p with usage %d\n", - cred, atomic_read(&cred->usage)); + if (atomic_long_read(&cred->usage) != 0) + panic("CRED: put_cred_rcu() sees %p with usage %ld\n", + cred, atomic_long_read(&cred->usage)); #endif security_cred_free(cred); @@ -137,11 +137,11 @@ static void put_cred_rcu(struct rcu_head *rcu) */ void __put_cred(struct cred *cred) { - kdebug("__put_cred(%p{%d,%d})", cred, - atomic_read(&cred->usage), + kdebug("__put_cred(%p{%ld,%d})", cred, + atomic_long_read(&cred->usage), read_cred_subscribers(cred)); - BUG_ON(atomic_read(&cred->usage) != 0); + BUG_ON(atomic_long_read(&cred->usage) != 0); #ifdef CONFIG_DEBUG_CREDENTIALS BUG_ON(read_cred_subscribers(cred) != 0); cred->magic = CRED_MAGIC_DEAD; @@ -164,8 +164,8 @@ void exit_creds(struct task_struct *tsk) { struct cred *cred; - kdebug("exit_creds(%u,%p,%p,{%d,%d})", tsk->pid, tsk->real_cred, tsk->cred, - atomic_read(&tsk->cred->usage), + kdebug("exit_creds(%u,%p,%p,{%ld,%d})", tsk->pid, tsk->real_cred, tsk->cred, + atomic_long_read(&tsk->cred->usage), read_cred_subscribers(tsk->cred)); cred = (struct cred *) tsk->real_cred; @@ -224,7 +224,7 @@ struct cred *cred_alloc_blank(void) if (!new) return NULL; - atomic_set(&new->usage, 1); + atomic_long_set(&new->usage, 1); #ifdef CONFIG_DEBUG_CREDENTIALS new->magic = CRED_MAGIC; #endif @@ -270,7 +270,7 @@ struct cred *prepare_creds(void) memcpy(new, old, sizeof(struct cred)); new->non_rcu = 0; - atomic_set(&new->usage, 1); + atomic_long_set(&new->usage, 1); set_cred_subscribers(new, 0); get_group_info(new->group_info); get_uid(new->user); @@ -358,8 +358,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) p->real_cred = get_cred(p->cred); get_cred(p->cred); alter_cred_subscribers(p->cred, 2); - kdebug("share_creds(%p{%d,%d})", - p->cred, atomic_read(&p->cred->usage), + kdebug("share_creds(%p{%ld,%d})", + p->cred, atomic_long_read(&p->cred->usage), read_cred_subscribers(p->cred)); inc_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1); return 0; @@ -452,8 +452,8 @@ int commit_creds(struct cred *new) struct task_struct *task = current; const struct cred *old = task->real_cred; - kdebug("commit_creds(%p{%d,%d})", new, - atomic_read(&new->usage), + kdebug("commit_creds(%p{%ld,%d})", new, + atomic_long_read(&new->usage), read_cred_subscribers(new)); BUG_ON(task->cred != old); @@ -462,7 +462,7 @@ int commit_creds(struct cred *new) validate_creds(old); validate_creds(new); #endif - BUG_ON(atomic_read(&new->usage) < 1); + BUG_ON(atomic_long_read(&new->usage) < 1); get_cred(new); /* we will require a ref for the subj creds too */ @@ -535,14 +535,14 @@ EXPORT_SYMBOL(commit_creds); */ void abort_creds(struct cred *new) { - kdebug("abort_creds(%p{%d,%d})", new, - atomic_read(&new->usage), + kdebug("abort_creds(%p{%ld,%d})", new, + atomic_long_read(&new->usage), read_cred_subscribers(new)); #ifdef CONFIG_DEBUG_CREDENTIALS BUG_ON(read_cred_subscribers(new) != 0); #endif - BUG_ON(atomic_read(&new->usage) < 1); + BUG_ON(atomic_long_read(&new->usage) < 1); put_cred(new); } EXPORT_SYMBOL(abort_creds); @@ -558,8 +558,8 @@ const struct cred *override_creds(const struct cred *new) { const struct cred *old = current->cred; - kdebug("override_creds(%p{%d,%d})", new, - atomic_read(&new->usage), + kdebug("override_creds(%p{%ld,%d})", new, + atomic_long_read(&new->usage), read_cred_subscribers(new)); validate_creds(old); @@ -581,8 +581,8 @@ const struct cred *override_creds(const struct cred *new) rcu_assign_pointer(current->cred, new); alter_cred_subscribers(old, -1); - kdebug("override_creds() = %p{%d,%d}", old, - atomic_read(&old->usage), + kdebug("override_creds() = %p{%ld,%d}", old, + atomic_long_read(&old->usage), read_cred_subscribers(old)); return old; } @@ -599,8 +599,8 @@ void revert_creds(const struct cred *old) { const struct cred *override = current->cred; - kdebug("revert_creds(%p{%d,%d})", old, - atomic_read(&old->usage), + kdebug("revert_creds(%p{%ld,%d})", old, + atomic_long_read(&old->usage), read_cred_subscribers(old)); validate_creds(old); @@ -731,7 +731,7 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) *new = *old; new->non_rcu = 0; - atomic_set(&new->usage, 1); + atomic_long_set(&new->usage, 1); set_cred_subscribers(new, 0); get_uid(new->user); get_user_ns(new->user_ns); @@ -845,8 +845,8 @@ static void dump_invalid_creds(const struct cred *cred, const char *label, cred == tsk->cred ? "[eff]" : ""); pr_err("->magic=%x, put_addr=%p\n", cred->magic, cred->put_addr); - pr_err("->usage=%d, subscr=%d\n", - atomic_read(&cred->usage), + pr_err("->usage=%ld, subscr=%d\n", + atomic_long_read(&cred->usage), read_cred_subscribers(cred)); pr_err("->*uid = { %d,%d,%d,%d }\n", from_kuid_munged(&init_user_ns, cred->uid), @@ -918,9 +918,9 @@ EXPORT_SYMBOL(__validate_process_creds); */ void validate_creds_for_do_exit(struct task_struct *tsk) { - kdebug("validate_creds_for_do_exit(%p,%p{%d,%d})", + kdebug("validate_creds_for_do_exit(%p,%p{%ld,%d})", tsk->real_cred, tsk->cred, - atomic_read(&tsk->cred->usage), + atomic_long_read(&tsk->cred->usage), read_cred_subscribers(tsk->cred)); __validate_process_creds(tsk, __FILE__, __LINE__); From 207f135d819344c03333246f784f6666e652e081 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 15 Dec 2023 13:40:57 -0700 Subject: [PATCH 1079/1397] cred: get rid of CONFIG_DEBUG_CREDENTIALS commit ae1914174a63a558113e80d24ccac2773f9f7b2b upstream. This code is rarely (never?) enabled by distros, and it hasn't caught anything in decades. Let's kill off this legacy debug code. Suggested-by: Linus Torvalds Signed-off-by: Jens Axboe Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/configs/skiroot_defconfig | 1 - arch/s390/configs/debug_defconfig | 1 - fs/nfsd/auth.c | 4 - fs/nfsd/nfssvc.c | 1 - fs/nfsd/vfs.c | 9 +- fs/open.c | 3 - include/linux/cred.h | 50 ----- kernel/cred.c | 230 ++-------------------- kernel/exit.c | 3 - lib/Kconfig.debug | 15 -- net/sunrpc/auth.c | 3 - security/selinux/hooks.c | 6 - tools/objtool/noreturns.h | 1 - tools/testing/selftests/bpf/config.x86_64 | 1 - tools/testing/selftests/hid/config.common | 1 - 15 files changed, 17 insertions(+), 312 deletions(-) diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig index 8d3eacb50d56..9d44e6630908 100644 --- a/arch/powerpc/configs/skiroot_defconfig +++ b/arch/powerpc/configs/skiroot_defconfig @@ -301,7 +301,6 @@ CONFIG_WQ_WATCHDOG=y CONFIG_DEBUG_SG=y CONFIG_DEBUG_NOTIFIERS=y CONFIG_BUG_ON_DATA_CORRUPTION=y -CONFIG_DEBUG_CREDENTIALS=y # CONFIG_FTRACE is not set CONFIG_XMON=y # CONFIG_RUNTIME_TESTING_MENU is not set diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 438cd92e6080..dd0608629310 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -834,7 +834,6 @@ CONFIG_DEBUG_IRQFLAGS=y CONFIG_DEBUG_LIST=y CONFIG_DEBUG_SG=y CONFIG_DEBUG_NOTIFIERS=y -CONFIG_DEBUG_CREDENTIALS=y CONFIG_RCU_TORTURE_TEST=m CONFIG_RCU_REF_SCALE_TEST=m CONFIG_RCU_CPU_STALL_TIMEOUT=300 diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index fdf2aad73470..e6beaaf4f170 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -26,8 +26,6 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) int i; int flags = nfsexp_flags(rqstp, exp); - validate_process_creds(); - /* discard any old override before preparing the new set */ revert_creds(get_cred(current_real_cred())); new = prepare_creds(); @@ -81,10 +79,8 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) else new->cap_effective = cap_raise_nfsd_set(new->cap_effective, new->cap_permitted); - validate_process_creds(); put_cred(override_creds(new)); put_cred(new); - validate_process_creds(); return 0; oom: diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index a87e9ef61386..211458203d14 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -962,7 +962,6 @@ nfsd(void *vrqstp) rqstp->rq_server->sv_maxconn = nn->max_connections; svc_recv(rqstp); - validate_process_creds(); } atomic_dec(&nfsdstats.th_cnt); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index b24462efa178..d0fdf70ab20d 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -884,7 +884,6 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int host_err; bool retried = false; - validate_process_creds(); /* * If we get here, then the client has already done an "open", * and (hopefully) checked permission - so allow OWNER_OVERRIDE @@ -909,7 +908,6 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, } err = nfserrno(host_err); } - validate_process_creds(); return err; } @@ -926,12 +924,7 @@ int nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, int may_flags, struct file **filp) { - int err; - - validate_process_creds(); - err = __nfsd_open(rqstp, fhp, S_IFREG, may_flags, filp); - validate_process_creds(); - return err; + return __nfsd_open(rqstp, fhp, S_IFREG, may_flags, filp); } /* diff --git a/fs/open.c b/fs/open.c index 98f6601fbac6..54723fceb776 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1069,8 +1069,6 @@ struct file *dentry_open(const struct path *path, int flags, int error; struct file *f; - validate_creds(cred); - /* We must always pass in a valid mount pointer. */ BUG_ON(!path->mnt); @@ -1109,7 +1107,6 @@ struct file *dentry_create(const struct path *path, int flags, umode_t mode, struct file *f; int error; - validate_creds(cred); f = alloc_empty_file(flags, cred); if (IS_ERR(f)) return f; diff --git a/include/linux/cred.h b/include/linux/cred.h index e7502be578dc..bb55703e1166 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -109,13 +109,6 @@ static inline int groups_search(const struct group_info *group_info, kgid_t grp) */ struct cred { atomic_long_t usage; -#ifdef CONFIG_DEBUG_CREDENTIALS - atomic_t subscribers; /* number of processes subscribed */ - void *put_addr; - unsigned magic; -#define CRED_MAGIC 0x43736564 -#define CRED_MAGIC_DEAD 0x44656144 -#endif kuid_t uid; /* real UID of the task */ kgid_t gid; /* real GID of the task */ kuid_t suid; /* saved UID of the task */ @@ -171,46 +164,6 @@ extern int cred_fscmp(const struct cred *, const struct cred *); extern void __init cred_init(void); extern int set_cred_ucounts(struct cred *); -/* - * check for validity of credentials - */ -#ifdef CONFIG_DEBUG_CREDENTIALS -extern void __noreturn __invalid_creds(const struct cred *, const char *, unsigned); -extern void __validate_process_creds(struct task_struct *, - const char *, unsigned); - -extern bool creds_are_invalid(const struct cred *cred); - -static inline void __validate_creds(const struct cred *cred, - const char *file, unsigned line) -{ - if (unlikely(creds_are_invalid(cred))) - __invalid_creds(cred, file, line); -} - -#define validate_creds(cred) \ -do { \ - __validate_creds((cred), __FILE__, __LINE__); \ -} while(0) - -#define validate_process_creds() \ -do { \ - __validate_process_creds(current, __FILE__, __LINE__); \ -} while(0) - -extern void validate_creds_for_do_exit(struct task_struct *); -#else -static inline void validate_creds(const struct cred *cred) -{ -} -static inline void validate_creds_for_do_exit(struct task_struct *tsk) -{ -} -static inline void validate_process_creds(void) -{ -} -#endif - static inline bool cap_ambient_invariant_ok(const struct cred *cred) { return cap_issubset(cred->cap_ambient, @@ -249,7 +202,6 @@ static inline const struct cred *get_cred(const struct cred *cred) struct cred *nonconst_cred = (struct cred *) cred; if (!cred) return cred; - validate_creds(cred); nonconst_cred->non_rcu = 0; return get_new_cred(nonconst_cred); } @@ -261,7 +213,6 @@ static inline const struct cred *get_cred_rcu(const struct cred *cred) return NULL; if (!atomic_long_inc_not_zero(&nonconst_cred->usage)) return NULL; - validate_creds(cred); nonconst_cred->non_rcu = 0; return cred; } @@ -282,7 +233,6 @@ static inline void put_cred(const struct cred *_cred) struct cred *cred = (struct cred *) _cred; if (cred) { - validate_creds(cred); if (atomic_long_dec_and_test(&(cred)->usage)) __put_cred(cred); } diff --git a/kernel/cred.c b/kernel/cred.c index 20f590c97cdb..64404d51c052 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -43,10 +43,6 @@ static struct group_info init_groups = { .usage = ATOMIC_INIT(2) }; */ struct cred init_cred = { .usage = ATOMIC_INIT(4), -#ifdef CONFIG_DEBUG_CREDENTIALS - .subscribers = ATOMIC_INIT(2), - .magic = CRED_MAGIC, -#endif .uid = GLOBAL_ROOT_UID, .gid = GLOBAL_ROOT_GID, .suid = GLOBAL_ROOT_UID, @@ -66,31 +62,6 @@ struct cred init_cred = { .ucounts = &init_ucounts, }; -static inline void set_cred_subscribers(struct cred *cred, int n) -{ -#ifdef CONFIG_DEBUG_CREDENTIALS - atomic_set(&cred->subscribers, n); -#endif -} - -static inline int read_cred_subscribers(const struct cred *cred) -{ -#ifdef CONFIG_DEBUG_CREDENTIALS - return atomic_read(&cred->subscribers); -#else - return 0; -#endif -} - -static inline void alter_cred_subscribers(const struct cred *_cred, int n) -{ -#ifdef CONFIG_DEBUG_CREDENTIALS - struct cred *cred = (struct cred *) _cred; - - atomic_add(n, &cred->subscribers); -#endif -} - /* * The RCU callback to actually dispose of a set of credentials */ @@ -100,20 +71,9 @@ static void put_cred_rcu(struct rcu_head *rcu) kdebug("put_cred_rcu(%p)", cred); -#ifdef CONFIG_DEBUG_CREDENTIALS - if (cred->magic != CRED_MAGIC_DEAD || - atomic_long_read(&cred->usage) != 0 || - read_cred_subscribers(cred) != 0) - panic("CRED: put_cred_rcu() sees %p with" - " mag %x, put %p, usage %ld, subscr %d\n", - cred, cred->magic, cred->put_addr, - atomic_long_read(&cred->usage), - read_cred_subscribers(cred)); -#else if (atomic_long_read(&cred->usage) != 0) panic("CRED: put_cred_rcu() sees %p with usage %ld\n", cred, atomic_long_read(&cred->usage)); -#endif security_cred_free(cred); key_put(cred->session_keyring); @@ -137,16 +97,10 @@ static void put_cred_rcu(struct rcu_head *rcu) */ void __put_cred(struct cred *cred) { - kdebug("__put_cred(%p{%ld,%d})", cred, - atomic_long_read(&cred->usage), - read_cred_subscribers(cred)); + kdebug("__put_cred(%p{%ld})", cred, + atomic_long_read(&cred->usage)); BUG_ON(atomic_long_read(&cred->usage) != 0); -#ifdef CONFIG_DEBUG_CREDENTIALS - BUG_ON(read_cred_subscribers(cred) != 0); - cred->magic = CRED_MAGIC_DEAD; - cred->put_addr = __builtin_return_address(0); -#endif BUG_ON(cred == current->cred); BUG_ON(cred == current->real_cred); @@ -164,20 +118,15 @@ void exit_creds(struct task_struct *tsk) { struct cred *cred; - kdebug("exit_creds(%u,%p,%p,{%ld,%d})", tsk->pid, tsk->real_cred, tsk->cred, - atomic_long_read(&tsk->cred->usage), - read_cred_subscribers(tsk->cred)); + kdebug("exit_creds(%u,%p,%p,{%ld})", tsk->pid, tsk->real_cred, tsk->cred, + atomic_long_read(&tsk->cred->usage)); cred = (struct cred *) tsk->real_cred; tsk->real_cred = NULL; - validate_creds(cred); - alter_cred_subscribers(cred, -1); put_cred(cred); cred = (struct cred *) tsk->cred; tsk->cred = NULL; - validate_creds(cred); - alter_cred_subscribers(cred, -1); put_cred(cred); #ifdef CONFIG_KEYS_REQUEST_CACHE @@ -225,9 +174,6 @@ struct cred *cred_alloc_blank(void) return NULL; atomic_long_set(&new->usage, 1); -#ifdef CONFIG_DEBUG_CREDENTIALS - new->magic = CRED_MAGIC; -#endif if (security_cred_alloc_blank(new, GFP_KERNEL_ACCOUNT) < 0) goto error; @@ -258,8 +204,6 @@ struct cred *prepare_creds(void) const struct cred *old; struct cred *new; - validate_process_creds(); - new = kmem_cache_alloc(cred_jar, GFP_KERNEL); if (!new) return NULL; @@ -271,7 +215,6 @@ struct cred *prepare_creds(void) new->non_rcu = 0; atomic_long_set(&new->usage, 1); - set_cred_subscribers(new, 0); get_group_info(new->group_info); get_uid(new->user); get_user_ns(new->user_ns); @@ -294,7 +237,6 @@ struct cred *prepare_creds(void) if (security_prepare_creds(new, old, GFP_KERNEL_ACCOUNT) < 0) goto error; - validate_creds(new); return new; error: @@ -357,10 +299,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) ) { p->real_cred = get_cred(p->cred); get_cred(p->cred); - alter_cred_subscribers(p->cred, 2); - kdebug("share_creds(%p{%ld,%d})", - p->cred, atomic_long_read(&p->cred->usage), - read_cred_subscribers(p->cred)); + kdebug("share_creds(%p{%ld})", + p->cred, atomic_long_read(&p->cred->usage)); inc_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1); return 0; } @@ -399,8 +339,6 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) p->cred = p->real_cred = get_cred(new); inc_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1); - alter_cred_subscribers(new, 2); - validate_creds(new); return 0; error_put: @@ -452,16 +390,10 @@ int commit_creds(struct cred *new) struct task_struct *task = current; const struct cred *old = task->real_cred; - kdebug("commit_creds(%p{%ld,%d})", new, - atomic_long_read(&new->usage), - read_cred_subscribers(new)); + kdebug("commit_creds(%p{%ld})", new, + atomic_long_read(&new->usage)); BUG_ON(task->cred != old); -#ifdef CONFIG_DEBUG_CREDENTIALS - BUG_ON(read_cred_subscribers(old) < 2); - validate_creds(old); - validate_creds(new); -#endif BUG_ON(atomic_long_read(&new->usage) < 1); get_cred(new); /* we will require a ref for the subj creds too */ @@ -497,14 +429,12 @@ int commit_creds(struct cred *new) * RLIMIT_NPROC limits on user->processes have already been checked * in set_user(). */ - alter_cred_subscribers(new, 2); if (new->user != old->user || new->user_ns != old->user_ns) inc_rlimit_ucounts(new->ucounts, UCOUNT_RLIMIT_NPROC, 1); rcu_assign_pointer(task->real_cred, new); rcu_assign_pointer(task->cred, new); if (new->user != old->user || new->user_ns != old->user_ns) dec_rlimit_ucounts(old->ucounts, UCOUNT_RLIMIT_NPROC, 1); - alter_cred_subscribers(old, -2); /* send notifications */ if (!uid_eq(new->uid, old->uid) || @@ -535,13 +465,9 @@ EXPORT_SYMBOL(commit_creds); */ void abort_creds(struct cred *new) { - kdebug("abort_creds(%p{%ld,%d})", new, - atomic_long_read(&new->usage), - read_cred_subscribers(new)); + kdebug("abort_creds(%p{%ld})", new, + atomic_long_read(&new->usage)); -#ifdef CONFIG_DEBUG_CREDENTIALS - BUG_ON(read_cred_subscribers(new) != 0); -#endif BUG_ON(atomic_long_read(&new->usage) < 1); put_cred(new); } @@ -558,12 +484,8 @@ const struct cred *override_creds(const struct cred *new) { const struct cred *old = current->cred; - kdebug("override_creds(%p{%ld,%d})", new, - atomic_long_read(&new->usage), - read_cred_subscribers(new)); - - validate_creds(old); - validate_creds(new); + kdebug("override_creds(%p{%ld})", new, + atomic_long_read(&new->usage)); /* * NOTE! This uses 'get_new_cred()' rather than 'get_cred()'. @@ -572,18 +494,12 @@ const struct cred *override_creds(const struct cred *new) * we are only installing the cred into the thread-synchronous * '->cred' pointer, not the '->real_cred' pointer that is * visible to other threads under RCU. - * - * Also note that we did validate_creds() manually, not depending - * on the validation in 'get_cred()'. */ get_new_cred((struct cred *)new); - alter_cred_subscribers(new, 1); rcu_assign_pointer(current->cred, new); - alter_cred_subscribers(old, -1); - kdebug("override_creds() = %p{%ld,%d}", old, - atomic_long_read(&old->usage), - read_cred_subscribers(old)); + kdebug("override_creds() = %p{%ld}", old, + atomic_long_read(&old->usage)); return old; } EXPORT_SYMBOL(override_creds); @@ -599,15 +515,10 @@ void revert_creds(const struct cred *old) { const struct cred *override = current->cred; - kdebug("revert_creds(%p{%ld,%d})", old, - atomic_long_read(&old->usage), - read_cred_subscribers(old)); + kdebug("revert_creds(%p{%ld})", old, + atomic_long_read(&old->usage)); - validate_creds(old); - validate_creds(override); - alter_cred_subscribers(old, 1); rcu_assign_pointer(current->cred, old); - alter_cred_subscribers(override, -1); put_cred(override); } EXPORT_SYMBOL(revert_creds); @@ -727,12 +638,10 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) kdebug("prepare_kernel_cred() alloc %p", new); old = get_task_cred(daemon); - validate_creds(old); *new = *old; new->non_rcu = 0; atomic_long_set(&new->usage, 1); - set_cred_subscribers(new, 0); get_uid(new->user); get_user_ns(new->user_ns); get_group_info(new->group_info); @@ -756,7 +665,6 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) goto error; put_cred(old); - validate_creds(new); return new; error: @@ -821,109 +729,3 @@ int set_create_files_as(struct cred *new, struct inode *inode) return security_kernel_create_files_as(new, inode); } EXPORT_SYMBOL(set_create_files_as); - -#ifdef CONFIG_DEBUG_CREDENTIALS - -bool creds_are_invalid(const struct cred *cred) -{ - if (cred->magic != CRED_MAGIC) - return true; - return false; -} -EXPORT_SYMBOL(creds_are_invalid); - -/* - * dump invalid credentials - */ -static void dump_invalid_creds(const struct cred *cred, const char *label, - const struct task_struct *tsk) -{ - pr_err("%s credentials: %p %s%s%s\n", - label, cred, - cred == &init_cred ? "[init]" : "", - cred == tsk->real_cred ? "[real]" : "", - cred == tsk->cred ? "[eff]" : ""); - pr_err("->magic=%x, put_addr=%p\n", - cred->magic, cred->put_addr); - pr_err("->usage=%ld, subscr=%d\n", - atomic_long_read(&cred->usage), - read_cred_subscribers(cred)); - pr_err("->*uid = { %d,%d,%d,%d }\n", - from_kuid_munged(&init_user_ns, cred->uid), - from_kuid_munged(&init_user_ns, cred->euid), - from_kuid_munged(&init_user_ns, cred->suid), - from_kuid_munged(&init_user_ns, cred->fsuid)); - pr_err("->*gid = { %d,%d,%d,%d }\n", - from_kgid_munged(&init_user_ns, cred->gid), - from_kgid_munged(&init_user_ns, cred->egid), - from_kgid_munged(&init_user_ns, cred->sgid), - from_kgid_munged(&init_user_ns, cred->fsgid)); -#ifdef CONFIG_SECURITY - pr_err("->security is %p\n", cred->security); - if ((unsigned long) cred->security >= PAGE_SIZE && - (((unsigned long) cred->security & 0xffffff00) != - (POISON_FREE << 24 | POISON_FREE << 16 | POISON_FREE << 8))) - pr_err("->security {%x, %x}\n", - ((u32*)cred->security)[0], - ((u32*)cred->security)[1]); -#endif -} - -/* - * report use of invalid credentials - */ -void __noreturn __invalid_creds(const struct cred *cred, const char *file, unsigned line) -{ - pr_err("Invalid credentials\n"); - pr_err("At %s:%u\n", file, line); - dump_invalid_creds(cred, "Specified", current); - BUG(); -} -EXPORT_SYMBOL(__invalid_creds); - -/* - * check the credentials on a process - */ -void __validate_process_creds(struct task_struct *tsk, - const char *file, unsigned line) -{ - if (tsk->cred == tsk->real_cred) { - if (unlikely(read_cred_subscribers(tsk->cred) < 2 || - creds_are_invalid(tsk->cred))) - goto invalid_creds; - } else { - if (unlikely(read_cred_subscribers(tsk->real_cred) < 1 || - read_cred_subscribers(tsk->cred) < 1 || - creds_are_invalid(tsk->real_cred) || - creds_are_invalid(tsk->cred))) - goto invalid_creds; - } - return; - -invalid_creds: - pr_err("Invalid process credentials\n"); - pr_err("At %s:%u\n", file, line); - - dump_invalid_creds(tsk->real_cred, "Real", tsk); - if (tsk->cred != tsk->real_cred) - dump_invalid_creds(tsk->cred, "Effective", tsk); - else - pr_err("Effective creds == Real creds\n"); - BUG(); -} -EXPORT_SYMBOL(__validate_process_creds); - -/* - * check creds for do_exit() - */ -void validate_creds_for_do_exit(struct task_struct *tsk) -{ - kdebug("validate_creds_for_do_exit(%p,%p{%ld,%d})", - tsk->real_cred, tsk->cred, - atomic_long_read(&tsk->cred->usage), - read_cred_subscribers(tsk->cred)); - - __validate_process_creds(tsk, __FILE__, __LINE__); -} - -#endif /* CONFIG_DEBUG_CREDENTIALS */ diff --git a/kernel/exit.c b/kernel/exit.c index edb50b4c9972..21a59a6e1f2e 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -824,8 +824,6 @@ void __noreturn do_exit(long code) ptrace_event(PTRACE_EVENT_EXIT, code); user_events_exit(tsk); - validate_creds_for_do_exit(tsk); - io_uring_files_cancel(); exit_signals(tsk); /* sets PF_EXITING */ @@ -912,7 +910,6 @@ void __noreturn do_exit(long code) if (tsk->task_frag.page) put_page(tsk->task_frag.page); - validate_creds_for_do_exit(tsk); exit_task_stack_account(tsk); check_stack_usage(); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index fa307f93fa2e..1331f3186f2a 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1730,21 +1730,6 @@ config DEBUG_MAPLE_TREE endmenu -config DEBUG_CREDENTIALS - bool "Debug credential management" - depends on DEBUG_KERNEL - help - Enable this to turn on some debug checking for credential - management. The additional code keeps track of the number of - pointers from task_structs to any given cred struct, and checks to - see that this number never exceeds the usage count of the cred - struct. - - Furthermore, if SELinux is enabled, this also checks that the - security pointer in the cred struct is never seen to be invalid. - - If unsure, say N. - source "kernel/rcu/Kconfig.debug" config DEBUG_WQ_FORCE_RR_CPU diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 814b0169f972..ec41b26af76e 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -40,9 +40,6 @@ static unsigned long number_cred_unused; static struct cred machine_cred = { .usage = ATOMIC_INIT(1), -#ifdef CONFIG_DEBUG_CREDENTIALS - .magic = CRED_MAGIC, -#endif }; /* diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 2aa0e219d721..208503340721 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1660,8 +1660,6 @@ static int inode_has_perm(const struct cred *cred, struct inode_security_struct *isec; u32 sid; - validate_creds(cred); - if (unlikely(IS_PRIVATE(inode))) return 0; @@ -3056,8 +3054,6 @@ static int selinux_inode_follow_link(struct dentry *dentry, struct inode *inode, struct inode_security_struct *isec; u32 sid; - validate_creds(cred); - ad.type = LSM_AUDIT_DATA_DENTRY; ad.u.dentry = dentry; sid = cred_sid(cred); @@ -3101,8 +3097,6 @@ static int selinux_inode_permission(struct inode *inode, int mask) if (!mask) return 0; - validate_creds(cred); - if (unlikely(IS_PRIVATE(inode))) return 0; diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h index e45c7cb1d5bc..80a3e6acf31e 100644 --- a/tools/objtool/noreturns.h +++ b/tools/objtool/noreturns.h @@ -6,7 +6,6 @@ * * Yes, this is unfortunate. A better solution is in the works. */ -NORETURN(__invalid_creds) NORETURN(__kunit_abort) NORETURN(__module_put_and_kthread_exit) NORETURN(__reiserfs_panic) diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64 index 2e70a6048278..49a29dbc1910 100644 --- a/tools/testing/selftests/bpf/config.x86_64 +++ b/tools/testing/selftests/bpf/config.x86_64 @@ -50,7 +50,6 @@ CONFIG_CRYPTO_SEQIV=y CONFIG_CRYPTO_XXHASH=y CONFIG_DCB=y CONFIG_DEBUG_ATOMIC_SLEEP=y -CONFIG_DEBUG_CREDENTIALS=y CONFIG_DEBUG_INFO_BTF=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_MEMORY_INIT=y diff --git a/tools/testing/selftests/hid/config.common b/tools/testing/selftests/hid/config.common index 0617275d93cc..0f456dbab62f 100644 --- a/tools/testing/selftests/hid/config.common +++ b/tools/testing/selftests/hid/config.common @@ -46,7 +46,6 @@ CONFIG_CRYPTO_SEQIV=y CONFIG_CRYPTO_XXHASH=y CONFIG_DCB=y CONFIG_DEBUG_ATOMIC_SLEEP=y -CONFIG_DEBUG_CREDENTIALS=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_DEFAULT_FQ_CODEL=y From fea8562f51b001de81a8115e34d138386b525afb Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Sat, 2 Dec 2023 21:24:30 -0600 Subject: [PATCH 1080/1397] HID: i2c-hid: Add IDEA5002 to i2c_hid_acpi_blacklist[] commit a9f68ffe1170ca4bc17ab29067d806a354a026e0 upstream. Users have reported problems with recent Lenovo laptops that contain an IDEA5002 I2C HID device. Reports include fans turning on and running even at idle and spurious wakeups from suspend. Presumably in the Windows ecosystem there is an application that uses the HID device. Maybe that puts it into a lower power state so it doesn't cause spurious events. This device doesn't serve any functional purpose in Linux as nothing interacts with it so blacklist it from being probed. This will prevent the GPIO driver from setting up the GPIO and the spurious interrupts and wake events will not occur. Cc: stable@vger.kernel.org # 6.1 Reported-and-tested-by: Marcus Aram Reported-and-tested-by: Mark Herbert Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/2812 Signed-off-by: Mario Limonciello Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/i2c-hid/i2c-hid-acpi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hid/i2c-hid/i2c-hid-acpi.c b/drivers/hid/i2c-hid/i2c-hid-acpi.c index ac918a9ea8d3..1b49243adb16 100644 --- a/drivers/hid/i2c-hid/i2c-hid-acpi.c +++ b/drivers/hid/i2c-hid/i2c-hid-acpi.c @@ -40,6 +40,11 @@ static const struct acpi_device_id i2c_hid_acpi_blacklist[] = { * ICN8505 controller, has a _CID of PNP0C50 but is not HID compatible. */ { "CHPN0001" }, + /* + * The IDEA5002 ACPI device causes high interrupt usage and spurious + * wakeups from suspend. + */ + { "IDEA5002" }, { } }; From 6e5782b1e18b9dfe01fe220f14d5d4f25292a40e Mon Sep 17 00:00:00 2001 From: Sebastian Parschauer Date: Mon, 27 Nov 2023 23:49:37 +0100 Subject: [PATCH 1081/1397] HID: Add quirk for Labtec/ODDOR/aikeec handbrake commit 31e52523267faab5ed8569b9d5c22c9a2283872f upstream. This device needs ALWAYS_POLL quirk, otherwise it keeps reconnecting indefinitely. It is a handbrake for sim racing detected as joystick. Reported and tested by GitHub user N0th1ngM4tt3rs. Link: https://github.com/sriemer/fix-linux-mouse issue 22 Signed-off-by: Sebastian Parschauer Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-quirks.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index d10ccfa17e16..97ab317570dd 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -744,6 +744,7 @@ #define USB_VENDOR_ID_LABTEC 0x1020 #define USB_DEVICE_ID_LABTEC_WIRELESS_KEYBOARD 0x0006 +#define USB_DEVICE_ID_LABTEC_ODDOR_HANDBRAKE 0x8888 #define USB_VENDOR_ID_LCPOWER 0x1241 #define USB_DEVICE_ID_LCPOWER_LC1000 0xf767 diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index 5a48fcaa32f0..d9a4f8f7bbb0 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -120,6 +120,7 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M406XE), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X_V2), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_PENSKETCH_T609A), HID_QUIRK_MULTI_INPUT }, + { HID_USB_DEVICE(USB_VENDOR_ID_LABTEC, USB_DEVICE_ID_LABTEC_ODDOR_HANDBRAKE), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_OPTICAL_USB_MOUSE_600E), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_608D), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_6019), HID_QUIRK_ALWAYS_POLL }, From 9f36c1c5132f3a03f420e88924b8829b508c59b8 Mon Sep 17 00:00:00 2001 From: Tyler Fanelli Date: Tue, 19 Sep 2023 22:40:00 -0400 Subject: [PATCH 1082/1397] fuse: Rename DIRECT_IO_RELAX to DIRECT_IO_ALLOW_MMAP commit c55e0a55b165202f18cbc4a20650d2e1becd5507 upstream. Although DIRECT_IO_RELAX's initial usage is to allow shared mmap, its description indicates a purpose of reducing memory footprint. This may imply that it could be further used to relax other DIRECT_IO operations in the future. Replace it with a flag DIRECT_IO_ALLOW_MMAP which does only one thing, allow shared mmap of DIRECT_IO files while still bypassing the cache on regular reads and writes. [Miklos] Also Keep DIRECT_IO_RELAX definition for backward compatibility. Signed-off-by: Tyler Fanelli Fixes: e78662e818f9 ("fuse: add a new fuse init flag to relax restrictions in no cache mode") Cc: # v6.6 Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/file.c | 6 +++--- fs/fuse/fuse_i.h | 4 ++-- fs/fuse/inode.c | 6 +++--- include/uapi/linux/fuse.h | 10 ++++++---- 4 files changed, 14 insertions(+), 12 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 1cdb6327511e..89e870d1a526 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1448,7 +1448,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, if (!ia) return -ENOMEM; - if (fopen_direct_io && fc->direct_io_relax) { + if (fopen_direct_io && fc->direct_io_allow_mmap) { res = filemap_write_and_wait_range(mapping, pos, pos + count - 1); if (res) { fuse_io_free(ia); @@ -2466,9 +2466,9 @@ static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) if (ff->open_flags & FOPEN_DIRECT_IO) { /* Can't provide the coherency needed for MAP_SHARED - * if FUSE_DIRECT_IO_RELAX isn't set. + * if FUSE_DIRECT_IO_ALLOW_MMAP isn't set. */ - if ((vma->vm_flags & VM_MAYSHARE) && !fc->direct_io_relax) + if ((vma->vm_flags & VM_MAYSHARE) && !fc->direct_io_allow_mmap) return -ENODEV; invalidate_inode_pages2(file->f_mapping); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index bf0b85d0b95c..405252bb51f2 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -797,8 +797,8 @@ struct fuse_conn { /* Is tmpfile not implemented by fs? */ unsigned int no_tmpfile:1; - /* relax restrictions in FOPEN_DIRECT_IO mode */ - unsigned int direct_io_relax:1; + /* Relax restrictions to allow shared mmap in FOPEN_DIRECT_IO mode */ + unsigned int direct_io_allow_mmap:1; /* Is statx not implemented by fs? */ unsigned int no_statx:1; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 2e4eb7cf26fb..444418e240c8 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1232,8 +1232,8 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, fc->init_security = 1; if (flags & FUSE_CREATE_SUPP_GROUP) fc->create_supp_group = 1; - if (flags & FUSE_DIRECT_IO_RELAX) - fc->direct_io_relax = 1; + if (flags & FUSE_DIRECT_IO_ALLOW_MMAP) + fc->direct_io_allow_mmap = 1; } else { ra_pages = fc->max_read / PAGE_SIZE; fc->no_lock = 1; @@ -1280,7 +1280,7 @@ void fuse_send_init(struct fuse_mount *fm) FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA | FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT | FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP | - FUSE_HAS_EXPIRE_ONLY | FUSE_DIRECT_IO_RELAX; + FUSE_HAS_EXPIRE_ONLY | FUSE_DIRECT_IO_ALLOW_MMAP; #ifdef CONFIG_FUSE_DAX if (fm->fc->dax) flags |= FUSE_MAP_ALIGNMENT; diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index db92a7202b34..e7418d15fe39 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -209,7 +209,7 @@ * - add FUSE_HAS_EXPIRE_ONLY * * 7.39 - * - add FUSE_DIRECT_IO_RELAX + * - add FUSE_DIRECT_IO_ALLOW_MMAP * - add FUSE_STATX and related structures */ @@ -409,8 +409,7 @@ struct fuse_file_lock { * FUSE_CREATE_SUPP_GROUP: add supplementary group info to create, mkdir, * symlink and mknod (single group that matches parent) * FUSE_HAS_EXPIRE_ONLY: kernel supports expiry-only entry invalidation - * FUSE_DIRECT_IO_RELAX: relax restrictions in FOPEN_DIRECT_IO mode, for now - * allow shared mmap + * FUSE_DIRECT_IO_ALLOW_MMAP: allow shared mmap in FOPEN_DIRECT_IO mode. */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -449,7 +448,10 @@ struct fuse_file_lock { #define FUSE_HAS_INODE_DAX (1ULL << 33) #define FUSE_CREATE_SUPP_GROUP (1ULL << 34) #define FUSE_HAS_EXPIRE_ONLY (1ULL << 35) -#define FUSE_DIRECT_IO_RELAX (1ULL << 36) +#define FUSE_DIRECT_IO_ALLOW_MMAP (1ULL << 36) + +/* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */ +#define FUSE_DIRECT_IO_RELAX FUSE_DIRECT_IO_ALLOW_MMAP /** * CUSE INIT request/reply flags From 2939dd306b1f00faf71472f2b16dead077f74c36 Mon Sep 17 00:00:00 2001 From: Krister Johansen Date: Fri, 3 Nov 2023 10:39:47 -0700 Subject: [PATCH 1083/1397] fuse: share lookup state between submount and its parent commit c4d361f66ac91db8fc65061a9671682f61f4ca9d upstream. Fuse submounts do not perform a lookup for the nodeid that they inherit from their parent. Instead, the code decrements the nlookup on the submount's fuse_inode when it is instantiated, and no forget is performed when a submount root is evicted. Trouble arises when the submount's parent is evicted despite the submount itself being in use. In this author's case, the submount was in a container and deatched from the initial mount namespace via a MNT_DEATCH operation. When memory pressure triggered the shrinker, the inode from the parent was evicted, which triggered enough forgets to render the submount's nodeid invalid. Since submounts should still function, even if their parent goes away, solve this problem by sharing refcounted state between the parent and its submount. When all of the references on this shared state reach zero, it's safe to forget the final lookup of the fuse nodeid. Signed-off-by: Krister Johansen Cc: stable@vger.kernel.org Fixes: 1866d779d5d2 ("fuse: Allow fuse_fill_super_common() for submounts") Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/fuse_i.h | 15 ++++++++++ fs/fuse/inode.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 87 insertions(+), 3 deletions(-) diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 405252bb51f2..9377c46f14c4 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -63,6 +63,19 @@ struct fuse_forget_link { struct fuse_forget_link *next; }; +/* Submount lookup tracking */ +struct fuse_submount_lookup { + /** Refcount */ + refcount_t count; + + /** Unique ID, which identifies the inode between userspace + * and kernel */ + u64 nodeid; + + /** The request used for sending the FORGET message */ + struct fuse_forget_link *forget; +}; + /** FUSE inode */ struct fuse_inode { /** Inode data */ @@ -158,6 +171,8 @@ struct fuse_inode { */ struct fuse_inode_dax *dax; #endif + /** Submount specific lookup tracking */ + struct fuse_submount_lookup *submount_lookup; }; /** FUSE inode state bits */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 444418e240c8..59743813563e 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -68,6 +68,24 @@ struct fuse_forget_link *fuse_alloc_forget(void) return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL_ACCOUNT); } +static struct fuse_submount_lookup *fuse_alloc_submount_lookup(void) +{ + struct fuse_submount_lookup *sl; + + sl = kzalloc(sizeof(struct fuse_submount_lookup), GFP_KERNEL_ACCOUNT); + if (!sl) + return NULL; + sl->forget = fuse_alloc_forget(); + if (!sl->forget) + goto out_free; + + return sl; + +out_free: + kfree(sl); + return NULL; +} + static struct inode *fuse_alloc_inode(struct super_block *sb) { struct fuse_inode *fi; @@ -83,6 +101,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb) fi->attr_version = 0; fi->orig_ino = 0; fi->state = 0; + fi->submount_lookup = NULL; mutex_init(&fi->mutex); spin_lock_init(&fi->lock); fi->forget = fuse_alloc_forget(); @@ -113,6 +132,17 @@ static void fuse_free_inode(struct inode *inode) kmem_cache_free(fuse_inode_cachep, fi); } +static void fuse_cleanup_submount_lookup(struct fuse_conn *fc, + struct fuse_submount_lookup *sl) +{ + if (!refcount_dec_and_test(&sl->count)) + return; + + fuse_queue_forget(fc, sl->forget, sl->nodeid, 1); + sl->forget = NULL; + kfree(sl); +} + static void fuse_evict_inode(struct inode *inode) { struct fuse_inode *fi = get_fuse_inode(inode); @@ -132,6 +162,11 @@ static void fuse_evict_inode(struct inode *inode) fi->nlookup); fi->forget = NULL; } + + if (fi->submount_lookup) { + fuse_cleanup_submount_lookup(fc, fi->submount_lookup); + fi->submount_lookup = NULL; + } } if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) { WARN_ON(!list_empty(&fi->write_files)); @@ -332,6 +367,13 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, fuse_dax_dontcache(inode, attr->flags); } +static void fuse_init_submount_lookup(struct fuse_submount_lookup *sl, + u64 nodeid) +{ + sl->nodeid = nodeid; + refcount_set(&sl->count, 1); +} + static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr, struct fuse_conn *fc) { @@ -395,12 +437,22 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, */ if (fc->auto_submounts && (attr->flags & FUSE_ATTR_SUBMOUNT) && S_ISDIR(attr->mode)) { + struct fuse_inode *fi; + inode = new_inode(sb); if (!inode) return NULL; fuse_init_inode(inode, attr, fc); - get_fuse_inode(inode)->nodeid = nodeid; + fi = get_fuse_inode(inode); + fi->nodeid = nodeid; + fi->submount_lookup = fuse_alloc_submount_lookup(); + if (!fi->submount_lookup) { + iput(inode); + return NULL; + } + /* Sets nlookup = 1 on fi->submount_lookup->nlookup */ + fuse_init_submount_lookup(fi->submount_lookup, nodeid); inode->i_flags |= S_AUTOMOUNT; goto done; } @@ -423,11 +475,11 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, iput(inode); goto retry; } -done: fi = get_fuse_inode(inode); spin_lock(&fi->lock); fi->nlookup++; spin_unlock(&fi->lock); +done: fuse_change_attributes(inode, attr, NULL, attr_valid, attr_version); return inode; @@ -1465,6 +1517,8 @@ static int fuse_fill_super_submount(struct super_block *sb, struct super_block *parent_sb = parent_fi->inode.i_sb; struct fuse_attr root_attr; struct inode *root; + struct fuse_submount_lookup *sl; + struct fuse_inode *fi; fuse_sb_defaults(sb); fm->sb = sb; @@ -1487,12 +1541,27 @@ static int fuse_fill_super_submount(struct super_block *sb, * its nlookup should not be incremented. fuse_iget() does * that, though, so undo it here. */ - get_fuse_inode(root)->nlookup--; + fi = get_fuse_inode(root); + fi->nlookup--; + sb->s_d_op = &fuse_dentry_operations; sb->s_root = d_make_root(root); if (!sb->s_root) return -ENOMEM; + /* + * Grab the parent's submount_lookup pointer and take a + * reference on the shared nlookup from the parent. This is to + * prevent the last forget for this nodeid from getting + * triggered until all users have finished with it. + */ + sl = parent_fi->submount_lookup; + WARN_ON(!sl); + if (sl) { + refcount_inc(&sl->count); + fi->submount_lookup = sl; + } + return 0; } From fbcddc7410625f90a3c1352c503d286bb23c1a60 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 3 Dec 2023 09:42:33 +0200 Subject: [PATCH 1084/1397] fuse: disable FOPEN_PARALLEL_DIRECT_WRITES with FUSE_DIRECT_IO_ALLOW_MMAP commit 3f29f1c336c0e8a4bec52f1e5217f88835553e5b upstream. The new fuse init flag FUSE_DIRECT_IO_ALLOW_MMAP breaks assumptions made by FOPEN_PARALLEL_DIRECT_WRITES and causes test generic/095 to hit BUG_ON(fi->writectr < 0) assertions in fuse_set_nowrite(): generic/095 5s ... kernel BUG at fs/fuse/dir.c:1756! ... ? fuse_set_nowrite+0x3d/0xdd ? do_raw_spin_unlock+0x88/0x8f ? _raw_spin_unlock+0x2d/0x43 ? fuse_range_is_writeback+0x71/0x84 fuse_sync_writes+0xf/0x19 fuse_direct_io+0x167/0x5bd fuse_direct_write_iter+0xf0/0x146 Auto disable FOPEN_PARALLEL_DIRECT_WRITES when server negotiated FUSE_DIRECT_IO_ALLOW_MMAP. Fixes: e78662e818f9 ("fuse: add a new fuse init flag to relax restrictions in no cache mode") Cc: # v6.6 Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/file.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 89e870d1a526..a660f1f21540 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1574,6 +1574,7 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from) ssize_t res; bool exclusive_lock = !(ff->open_flags & FOPEN_PARALLEL_DIRECT_WRITES) || + get_fuse_conn(inode)->direct_io_allow_mmap || iocb->ki_flags & IOCB_APPEND || fuse_direct_write_extending_i_size(iocb, from); @@ -1581,6 +1582,7 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from) * Take exclusive lock if * - Parallel direct writes are disabled - a user space decision * - Parallel direct writes are enabled and i_size is being extended. + * - Shared mmap on direct_io file is supported (FUSE_DIRECT_IO_ALLOW_MMAP). * This might not be needed at all, but needs further investigation. */ if (exclusive_lock) From ce5a6df21a00a02c155f2cf269f0e258aa522346 Mon Sep 17 00:00:00 2001 From: Hangyu Hua Date: Thu, 16 Nov 2023 15:57:26 +0800 Subject: [PATCH 1085/1397] fuse: dax: set fc->dax to NULL in fuse_dax_conn_free() commit 7f8ed28d1401320bcb02dda81b3c23ab2dc5a6d8 upstream. fuse_dax_conn_free() will be called when fuse_fill_super_common() fails after fuse_dax_conn_alloc(). Then deactivate_locked_super() in virtio_fs_get_tree() will call virtio_kill_sb() to release the discarded superblock. This will call fuse_dax_conn_free() again in fuse_conn_put(), resulting in a possible double free. Fixes: 1dd539577c42 ("virtiofs: add a mount option to enable dax") Signed-off-by: Hangyu Hua Acked-by: Vivek Goyal Reviewed-by: Jingbo Xu Cc: # v5.10 Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dax.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c index 23904a6a9a96..12ef91d170bb 100644 --- a/fs/fuse/dax.c +++ b/fs/fuse/dax.c @@ -1222,6 +1222,7 @@ void fuse_dax_conn_free(struct fuse_conn *fc) if (fc->dax) { fuse_free_dax_mem_ranges(&fc->dax->free_ranges); kfree(fc->dax); + fc->dax = NULL; } } From 9566ef570cc4c9a69af493fdba5f8b4ea101426f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 14 Dec 2023 21:34:08 +0000 Subject: [PATCH 1086/1397] io_uring/cmd: fix breakage in SOCKET_URING_OP_SIOC* implementation commit 1ba0e9d69b2000e95267c888cbfa91d823388d47 upstream. In 8e9fad0e70b7 "io_uring: Add io_uring command support for sockets" you've got an include of asm-generic/ioctls.h done in io_uring/uring_cmd.c. That had been done for the sake of this chunk - + ret = prot->ioctl(sk, SIOCINQ, &arg); + if (ret) + return ret; + return arg; + case SOCKET_URING_OP_SIOCOUTQ: + ret = prot->ioctl(sk, SIOCOUTQ, &arg); SIOC{IN,OUT}Q are defined to symbols (FIONREAD and TIOCOUTQ) that come from ioctls.h, all right, but the values vary by the architecture. FIONREAD is 0x467F on mips 0x4004667F on alpha, powerpc and sparc 0x8004667F on sh and xtensa 0x541B everywhere else TIOCOUTQ is 0x7472 on mips 0x40047473 on alpha, powerpc and sparc 0x80047473 on sh and xtensa 0x5411 everywhere else ->ioctl() expects the same values it would've gotten from userland; all places where we compare with SIOC{IN,OUT}Q are using asm/ioctls.h, so they pick the correct values. io_uring_cmd_sock(), OTOH, ends up passing the default ones. Fixes: 8e9fad0e70b7 ("io_uring: Add io_uring command support for sockets") Cc: Signed-off-by: Al Viro Link: https://lore.kernel.org/r/20231214213408.GT1674809@ZenIV Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/uring_cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index 537795fddc87..5fa19861cda5 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -7,7 +7,7 @@ #include #include -#include +#include #include "io_uring.h" #include "rsrc.h" From c52ebaf742734113b41b4b6bf8bd9a86e34e4445 Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Fri, 8 Dec 2023 15:21:26 +0200 Subject: [PATCH 1087/1397] ALSA: hda/hdmi: add force-connect quirk for NUC5CPYB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3b1ff57e24a7bcd2e2a8426dd2013a80d1fa96eb upstream. Add one more older NUC model that requires quirk to force all pins to be connected. The display codec pins are not registered properly without the force-connect quirk. The codec will report only one pin as having external connectivity, but i915 finds all three connectors on the system, so the two drivers are not in sync. Issue found with DRM igt-gpu-tools test kms_hdmi_inject@inject-audio. Link: https://gitlab.freedesktop.org/drm/igt-gpu-tools/-/issues/3 Cc: Ville Syrjälä Cc: Jani Saarinen Signed-off-by: Kai Vehmanen Cc: Link: https://lore.kernel.org/r/20231208132127.2438067-2-kai.vehmanen@linux.intel.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_hdmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 1cde2a69bdb4..b152c941414f 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1994,6 +1994,7 @@ static const struct snd_pci_quirk force_connect_list[] = { SND_PCI_QUIRK(0x103c, 0x8711, "HP", 1), SND_PCI_QUIRK(0x103c, 0x8715, "HP", 1), SND_PCI_QUIRK(0x1462, 0xec94, "MS-7C94", 1), + SND_PCI_QUIRK(0x8086, 0x2060, "Intel NUC5CPYB", 1), SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", 1), {} }; From 7ec57c10b01832d699bb61d4049e57031592625d Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Fri, 8 Dec 2023 15:21:27 +0200 Subject: [PATCH 1088/1397] ALSA: hda/hdmi: add force-connect quirks for ASUSTeK Z170 variants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 924f5ca2975b2993ee81a7ecc3c809943a70f334 upstream. On ASUSTeK Z170M PLUS and Z170 PRO GAMING systems, the display codec pins are not registered properly without the force-connect quirk. The codec will report only one pin as having external connectivity, but i915 finds all three connectors on the system, so the two drivers are not in sync. Issue found with DRM igt-gpu-tools test kms_hdmi_inject@inject-audio. Link: https://gitlab.freedesktop.org/drm/intel/-/issues/9801 Cc: Ville Syrjälä Cc: Jani Saarinen Signed-off-by: Kai Vehmanen Cc: Link: https://lore.kernel.org/r/20231208132127.2438067-3-kai.vehmanen@linux.intel.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_hdmi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index b152c941414f..78cee53fee02 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1993,6 +1993,8 @@ static const struct snd_pci_quirk force_connect_list[] = { SND_PCI_QUIRK(0x103c, 0x871a, "HP", 1), SND_PCI_QUIRK(0x103c, 0x8711, "HP", 1), SND_PCI_QUIRK(0x103c, 0x8715, "HP", 1), + SND_PCI_QUIRK(0x1043, 0x86ae, "ASUS", 1), /* Z170 PRO */ + SND_PCI_QUIRK(0x1043, 0x86c7, "ASUS", 1), /* Z170M PLUS */ SND_PCI_QUIRK(0x1462, 0xec94, "MS-7C94", 1), SND_PCI_QUIRK(0x8086, 0x2060, "Intel NUC5CPYB", 1), SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", 1), From ffd1fe12d4c95c5644152fcbc85696a4195190ae Mon Sep 17 00:00:00 2001 From: Hartmut Knaack Date: Sat, 9 Dec 2023 15:47:07 +0100 Subject: [PATCH 1089/1397] ALSA: hda/realtek: Apply mute LED quirk for HP15-db commit 9b726bf6ae11add6a7a52883a21f90ff9cbca916 upstream. The HP laptop 15-db0403ng uses the ALC236 codec and controls the mute LED using COEF 0x07 index 1. Sound card subsystem: Hewlett-Packard Company Device [103c:84ae] Use the existing quirk for this model. Signed-off-by: Hartmut Knaack Cc: Link: https://lore.kernel.org/r/e61815d0-f1c7-b164-e49d-6ca84771476a@gmx.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 5c609c1150c4..2590879f0a84 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9705,6 +9705,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x83b9, "HP Spectre x360", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x841c, "HP Pavilion 15-CK0xx", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x8497, "HP Envy x360", ALC269_FIXUP_HP_MUTE_LED_MIC3), + SND_PCI_QUIRK(0x103c, 0x84ae, "HP 15-db0403ng", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x84da, "HP OMEN dc0019-ur", ALC295_FIXUP_HP_OMEN), SND_PCI_QUIRK(0x103c, 0x84e7, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x8519, "HP Spectre x360 15-df0xxx", ALC285_FIXUP_HP_SPECTRE_X360), From 7fc8bfdb7007ccd013eb786c76f6619da56a98f2 Mon Sep 17 00:00:00 2001 From: Gergo Koteles Date: Sat, 9 Dec 2023 22:18:29 +0100 Subject: [PATCH 1090/1397] ALSA: hda/tas2781: leave hda_component in usable state commit 75a25d31b80770485641ad2789a854955f5c1e40 upstream. Unloading then loading the module causes a NULL ponter dereference. The hda_unbind zeroes the hda_component, later the hda_bind tries to dereference the codec field. The hda_component is only initialized once by tas2781_generic_fixup. Set only previously modified fields to NULL. BUG: kernel NULL pointer dereference, address: 0000000000000322 Call Trace: ? __die+0x23/0x70 ? page_fault_oops+0x171/0x4e0 ? exc_page_fault+0x7f/0x180 ? asm_exc_page_fault+0x26/0x30 ? tas2781_hda_bind+0x59/0x140 [snd_hda_scodec_tas2781_i2c] component_bind_all+0xf3/0x240 try_to_bring_up_aggregate_device+0x1c3/0x270 __component_add+0xbc/0x1a0 tas2781_hda_i2c_probe+0x289/0x3a0 [snd_hda_scodec_tas2781_i2c] i2c_device_probe+0x136/0x2e0 Fixes: 5be27f1e3ec9 ("ALSA: hda/tas2781: Add tas2781 HDA driver") Cc: stable@vger.kernel.org Signed-off-by: Gergo Koteles Link: https://lore.kernel.org/r/8b8ed2bd5f75fbb32e354a3226c2f966fa85b46b.1702156522.git.soyer@irl.hu Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/tas2781_hda_i2c.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/tas2781_hda_i2c.c b/sound/pci/hda/tas2781_hda_i2c.c index fb802802939e..b42837105c22 100644 --- a/sound/pci/hda/tas2781_hda_i2c.c +++ b/sound/pci/hda/tas2781_hda_i2c.c @@ -612,9 +612,13 @@ static void tas2781_hda_unbind(struct device *dev, { struct tasdevice_priv *tas_priv = dev_get_drvdata(dev); struct hda_component *comps = master_data; + comps = &comps[tas_priv->index]; - if (comps[tas_priv->index].dev == dev) - memset(&comps[tas_priv->index], 0, sizeof(*comps)); + if (comps->dev == dev) { + comps->dev = NULL; + memset(comps->name, 0, sizeof(comps->name)); + comps->playback_hook = NULL; + } tasdevice_config_info_remove(tas_priv); tasdevice_dsp_remove(tas_priv); From 795e91c599c29ab9b6b6cd57548b497d0d78e0d6 Mon Sep 17 00:00:00 2001 From: Gergo Koteles Date: Mon, 11 Dec 2023 00:37:33 +0100 Subject: [PATCH 1091/1397] ALSA: hda/tas2781: handle missing EFI calibration data commit 33071422714a4c9587753b0ccc130ca59323bf42 upstream. The code does not properly check whether the calibration variable is available in the EFI. If it is not available, it causes a NULL pointer dereference. Check the return value of the first get_variable call also. BUG: kernel NULL pointer dereference, address: 0000000000000000 Call Trace: ? __die+0x23/0x70 ? page_fault_oops+0x171/0x4e0 ? srso_alias_return_thunk+0x5/0x7f ? schedule+0x5e/0xd0 ? exc_page_fault+0x7f/0x180 ? asm_exc_page_fault+0x26/0x30 ? crc32_body+0x2c/0x120 ? tas2781_save_calibration+0xe4/0x220 [snd_hda_scodec_tas2781_i2c] tasdev_fw_ready+0x1af/0x280 [snd_hda_scodec_tas2781_i2c] request_firmware_work_func+0x59/0xa0 Fixes: 5be27f1e3ec9 ("ALSA: hda/tas2781: Add tas2781 HDA driver") CC: stable@vger.kernel.org Signed-off-by: Gergo Koteles Link: https://lore.kernel.org/r/f1f6583bda918f78556f67d522ca7b3b91cebbd5.1702251102.git.soyer@irl.hu Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/tas2781_hda_i2c.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/tas2781_hda_i2c.c b/sound/pci/hda/tas2781_hda_i2c.c index b42837105c22..d3dafc9d150b 100644 --- a/sound/pci/hda/tas2781_hda_i2c.c +++ b/sound/pci/hda/tas2781_hda_i2c.c @@ -455,9 +455,9 @@ static int tas2781_save_calibration(struct tasdevice_priv *tas_priv) status = efi.get_variable(efi_name, &efi_guid, &attr, &tas_priv->cali_data.total_sz, tas_priv->cali_data.data); - if (status != EFI_SUCCESS) - return -EINVAL; } + if (status != EFI_SUCCESS) + return -EINVAL; tmp_val = (unsigned int *)tas_priv->cali_data.data; From d94fad04a64b2a58157940ec7ab59d4ccd400a1c Mon Sep 17 00:00:00 2001 From: Gergo Koteles Date: Thu, 14 Dec 2023 00:28:16 +0100 Subject: [PATCH 1092/1397] ALSA: hda/tas2781: call cleanup functions only once commit 6c6fa2641402e8e753262fb61ed9a15a7cb225ad upstream. If the module can load the RCA but not the firmware binary, it will call the cleanup functions. Then unloading the module causes general protection fault due to double free. Do not call the cleanup functions in tasdev_fw_ready. general protection fault, probably for non-canonical address 0x6f2b8a2bff4c8fec: 0000 [#1] PREEMPT SMP NOPTI Call Trace: ? die_addr+0x36/0x90 ? exc_general_protection+0x1c5/0x430 ? asm_exc_general_protection+0x26/0x30 ? tasdevice_config_info_remove+0x6d/0xd0 [snd_soc_tas2781_fmwlib] tas2781_hda_unbind+0xaa/0x100 [snd_hda_scodec_tas2781_i2c] component_unbind+0x2e/0x50 component_unbind_all+0x92/0xa0 component_del+0xa8/0x140 tas2781_hda_remove.isra.0+0x32/0x60 [snd_hda_scodec_tas2781_i2c] i2c_device_remove+0x26/0xb0 Fixes: 5be27f1e3ec9 ("ALSA: hda/tas2781: Add tas2781 HDA driver") CC: stable@vger.kernel.org Signed-off-by: Gergo Koteles Link: https://lore.kernel.org/r/1a0885c424bb21172702d254655882b59ef6477a.1702510018.git.soyer@irl.hu Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/tas2781_hda_i2c.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/sound/pci/hda/tas2781_hda_i2c.c b/sound/pci/hda/tas2781_hda_i2c.c index d3dafc9d150b..c8ee5f809c38 100644 --- a/sound/pci/hda/tas2781_hda_i2c.c +++ b/sound/pci/hda/tas2781_hda_i2c.c @@ -550,11 +550,6 @@ static void tasdev_fw_ready(const struct firmware *fmw, void *context) tas2781_save_calibration(tas_priv); out: - if (tas_priv->fw_state == TASDEVICE_DSP_FW_FAIL) { - /*If DSP FW fail, kcontrol won't be created */ - tasdevice_config_info_remove(tas_priv); - tasdevice_dsp_remove(tas_priv); - } mutex_unlock(&tas_priv->codec_lock); if (fmw) release_firmware(fmw); From 56e22123449cf0be89db7bbf7c4667eb9c6bb953 Mon Sep 17 00:00:00 2001 From: Gergo Koteles Date: Thu, 14 Dec 2023 00:49:20 +0100 Subject: [PATCH 1093/1397] ALSA: hda/tas2781: reset the amp before component_add commit 315deab289924c83ab1ded50022e8db95d6e428b upstream. Calling component_add starts loading the firmware, the callback function writes the program to the amplifiers. If the module resets the amplifiers after component_add, it happens that one of the amplifiers does not work because the reset and program writing are interleaving. Call tas2781_reset before component_add to ensure reliable initialization. Fixes: 5be27f1e3ec9 ("ALSA: hda/tas2781: Add tas2781 HDA driver") CC: stable@vger.kernel.org Signed-off-by: Gergo Koteles Link: https://lore.kernel.org/r/4d23bf58558e23ee8097de01f70f1eb8d9de2d15.1702511246.git.soyer@irl.hu Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/tas2781_hda_i2c.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/tas2781_hda_i2c.c b/sound/pci/hda/tas2781_hda_i2c.c index c8ee5f809c38..63a90c7e8976 100644 --- a/sound/pci/hda/tas2781_hda_i2c.c +++ b/sound/pci/hda/tas2781_hda_i2c.c @@ -674,14 +674,14 @@ static int tas2781_hda_i2c_probe(struct i2c_client *clt) pm_runtime_put_autosuspend(tas_priv->dev); + tas2781_reset(tas_priv); + ret = component_add(tas_priv->dev, &tas2781_hda_comp_ops); if (ret) { dev_err(tas_priv->dev, "Register component failed: %d\n", ret); pm_runtime_disable(tas_priv->dev); - goto err; } - tas2781_reset(tas_priv); err: if (ret) tas2781_hda_remove(&clt->dev); From 5cc8d88a1b94b900fd74abda744c29ff5845430b Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 14 Dec 2023 09:08:56 -0600 Subject: [PATCH 1094/1397] Revert "PCI: acpiphp: Reassign resources on bridge if necessary" commit 5df12742b7e3aae2594a30a9d14d5d6e9e7699f4 upstream. This reverts commit 40613da52b13fb21c5566f10b287e0ca8c12c4e9 and the subsequent fix to it: cc22522fd55e ("PCI: acpiphp: Use pci_assign_unassigned_bridge_resources() only for non-root bus") 40613da52b13 fixed a problem where hot-adding a device with large BARs failed if the bridge windows programmed by firmware were not large enough. cc22522fd55e ("PCI: acpiphp: Use pci_assign_unassigned_bridge_resources() only for non-root bus") fixed a problem with 40613da52b13: an ACPI hot-add of a device on a PCI root bus (common in the virt world) or firmware sending ACPI Bus Check to non-existent Root Ports (e.g., on Dell Inspiron 7352/0W6WV0) caused a NULL pointer dereference and suspend/resume hangs. Unfortunately the combination of 40613da52b13 and cc22522fd55e caused other problems: - Fiona reported that hot-add of SCSI disks in QEMU virtual machine fails sometimes. - Dongli reported a similar problem with hot-add of SCSI disks. - Jonathan reported a console freeze during boot on bare metal due to an error in radeon GPU initialization. Revert both patches to avoid adding these problems. This means we will again see the problems with hot-adding devices with large BARs and the NULL pointer dereferences and suspend/resume issues that 40613da52b13 and cc22522fd55e were intended to fix. Fixes: 40613da52b13 ("PCI: acpiphp: Reassign resources on bridge if necessary") Fixes: cc22522fd55e ("PCI: acpiphp: Use pci_assign_unassigned_bridge_resources() only for non-root bus") Reported-by: Fiona Ebner Closes: https://lore.kernel.org/r/9eb669c0-d8f2-431d-a700-6da13053ae54@proxmox.com Reported-by: Dongli Zhang Closes: https://lore.kernel.org/r/3c4a446a-b167-11b8-f36f-d3c1b49b42e9@oracle.com Reported-by: Jonathan Woithe Closes: https://lore.kernel.org/r/ZXpaNCLiDM+Kv38H@marvin.atrad.com.au Signed-off-by: Bjorn Helgaas Acked-by: Michael S. Tsirkin Acked-by: Igor Mammedov Cc: Signed-off-by: Greg Kroah-Hartman --- drivers/pci/hotplug/acpiphp_glue.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 601129772b2d..5b1f271c6034 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -512,15 +512,12 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge) if (pass && dev->subordinate) { check_hotplug_bridge(slot, dev); pcibios_resource_survey_bus(dev->subordinate); - if (pci_is_root_bus(bus)) - __pci_bus_size_bridges(dev->subordinate, &add_list); + __pci_bus_size_bridges(dev->subordinate, + &add_list); } } } - if (pci_is_root_bus(bus)) - __pci_bus_assign_resources(bus, &add_list, NULL); - else - pci_assign_unassigned_bridge_resources(bus->self); + __pci_bus_assign_resources(bus, &add_list, NULL); } acpiphp_sanitize_bus(bus); From 4fb5358c574e1b49a55e090dbe26cd4ddaa6c639 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Fri, 1 Dec 2023 11:50:28 +0000 Subject: [PATCH 1095/1397] PCI: loongson: Limit MRRS to 256 commit ef61a0405742a9f7f6051bc6fd2f017d87d07911 upstream. This is a partial revert of 8b3517f88ff2 ("PCI: loongson: Prevent LS7A MRRS increases") for MIPS-based Loongson. Some MIPS Loongson systems don't support arbitrary Max_Read_Request_Size (MRRS) settings. 8b3517f88ff2 ("PCI: loongson: Prevent LS7A MRRS increases") worked around that by (1) assuming that firmware configured MRRS to the maximum supported value and (2) preventing the PCI core from increasing MRRS. Unfortunately, some firmware doesn't set that maximum MRRS correctly, which results in devices not being initialized correctly. One symptom, from the Debian report below, is this: ata4.00: exception Emask 0x0 SAct 0x20000000 SErr 0x0 action 0x6 frozen ata4.00: failed command: WRITE FPDMA QUEUED ata4.00: cmd 61/20:e8:00:f0:e1/00:00:00:00:00/40 tag 29 ncq dma 16384 out res 40/00:00:00:00:00/00:00:00:00:00/00 Emask 0x4 (timeout) ata4.00: status: { DRDY } ata4: hard resetting link Limit MRRS to 256 because MIPS Loongson with higher MRRS support is considered rare. This must be done at device enablement stage because the MRRS setting may get lost if PCI_COMMAND_MASTER on the parent bridge is cleared, and we are only sure parent bridge is enabled at this point. Fixes: 8b3517f88ff2 ("PCI: loongson: Prevent LS7A MRRS increases") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217680 Link: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1035587 Link: https://lore.kernel.org/r/20231201115028.84351-1-jiaxun.yang@flygoat.com Signed-off-by: Jiaxun Yang Signed-off-by: Bjorn Helgaas Acked-by: Huacai Chen Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/pci-loongson.c | 46 ++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 5 deletions(-) diff --git a/drivers/pci/controller/pci-loongson.c b/drivers/pci/controller/pci-loongson.c index d45e7b8dc530..8b34ccff073a 100644 --- a/drivers/pci/controller/pci-loongson.c +++ b/drivers/pci/controller/pci-loongson.c @@ -80,13 +80,49 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON, DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON, DEV_LS7A_LPC, system_bus_quirk); +/* + * Some Loongson PCIe ports have hardware limitations on their Maximum Read + * Request Size. They can't handle anything larger than this. Sane + * firmware will set proper MRRS at boot, so we only need no_inc_mrrs for + * bridges. However, some MIPS Loongson firmware doesn't set MRRS properly, + * so we have to enforce maximum safe MRRS, which is 256 bytes. + */ +#ifdef CONFIG_MIPS +static void loongson_set_min_mrrs_quirk(struct pci_dev *pdev) +{ + struct pci_bus *bus = pdev->bus; + struct pci_dev *bridge; + static const struct pci_device_id bridge_devids[] = { + { PCI_VDEVICE(LOONGSON, DEV_LS2K_PCIE_PORT0) }, + { PCI_VDEVICE(LOONGSON, DEV_LS7A_PCIE_PORT0) }, + { PCI_VDEVICE(LOONGSON, DEV_LS7A_PCIE_PORT1) }, + { PCI_VDEVICE(LOONGSON, DEV_LS7A_PCIE_PORT2) }, + { PCI_VDEVICE(LOONGSON, DEV_LS7A_PCIE_PORT3) }, + { PCI_VDEVICE(LOONGSON, DEV_LS7A_PCIE_PORT4) }, + { PCI_VDEVICE(LOONGSON, DEV_LS7A_PCIE_PORT5) }, + { PCI_VDEVICE(LOONGSON, DEV_LS7A_PCIE_PORT6) }, + { 0, }, + }; + + /* look for the matching bridge */ + while (!pci_is_root_bus(bus)) { + bridge = bus->self; + bus = bus->parent; + + if (pci_match_id(bridge_devids, bridge)) { + if (pcie_get_readrq(pdev) > 256) { + pci_info(pdev, "limiting MRRS to 256\n"); + pcie_set_readrq(pdev, 256); + } + break; + } + } +} +DECLARE_PCI_FIXUP_ENABLE(PCI_ANY_ID, PCI_ANY_ID, loongson_set_min_mrrs_quirk); +#endif + static void loongson_mrrs_quirk(struct pci_dev *pdev) { - /* - * Some Loongson PCIe ports have h/w limitations of maximum read - * request size. They can't handle anything larger than this. So - * force this limit on any devices attached under these ports. - */ struct pci_host_bridge *bridge = pci_find_host_bridge(pdev->bus); bridge->no_inc_mrrs = 1; From 1e1f461ea574b1a9aba7ffcc15d8721c087c5cda Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 28 Nov 2023 09:15:07 +0100 Subject: [PATCH 1096/1397] PCI/ASPM: Add pci_enable_link_state_locked() commit 718ab8226636a1a3a7d281f5d6a7ad7c925efe5a upstream. Add pci_enable_link_state_locked() for enabling link states that can be used in contexts where a pci_bus_sem read lock is already held (e.g. from pci_walk_bus()). This helper will be used to fix a couple of potential deadlocks where the current helper is called with the lock already held, hence the CC stable tag. Fixes: f492edb40b54 ("PCI: vmd: Add quirk to configure PCIe ASPM and LTR") Link: https://lore.kernel.org/r/20231128081512.19387-2-johan+linaro@kernel.org Signed-off-by: Johan Hovold [bhelgaas: include helper name in subject, commit log] Signed-off-by: Bjorn Helgaas Reviewed-by: Manivannan Sadhasivam Cc: # 6.3 Cc: Michael Bottini Cc: David E. Box Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pcie/aspm.c | 53 +++++++++++++++++++++++++++++++---------- include/linux/pci.h | 3 +++ 2 files changed, 43 insertions(+), 13 deletions(-) diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index fc18e42f0a6e..67b13f26ba7c 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -1102,17 +1102,7 @@ int pci_disable_link_state(struct pci_dev *pdev, int state) } EXPORT_SYMBOL(pci_disable_link_state); -/** - * pci_enable_link_state - Clear and set the default device link state so that - * the link may be allowed to enter the specified states. Note that if the - * BIOS didn't grant ASPM control to the OS, this does nothing because we can't - * touch the LNKCTL register. Also note that this does not enable states - * disabled by pci_disable_link_state(). Return 0 or a negative errno. - * - * @pdev: PCI device - * @state: Mask of ASPM link states to enable - */ -int pci_enable_link_state(struct pci_dev *pdev, int state) +static int __pci_enable_link_state(struct pci_dev *pdev, int state, bool locked) { struct pcie_link_state *link = pcie_aspm_get_link(pdev); @@ -1129,7 +1119,8 @@ int pci_enable_link_state(struct pci_dev *pdev, int state) return -EPERM; } - down_read(&pci_bus_sem); + if (!locked) + down_read(&pci_bus_sem); mutex_lock(&aspm_lock); link->aspm_default = 0; if (state & PCIE_LINK_STATE_L0S) @@ -1150,12 +1141,48 @@ int pci_enable_link_state(struct pci_dev *pdev, int state) link->clkpm_default = (state & PCIE_LINK_STATE_CLKPM) ? 1 : 0; pcie_set_clkpm(link, policy_to_clkpm_state(link)); mutex_unlock(&aspm_lock); - up_read(&pci_bus_sem); + if (!locked) + up_read(&pci_bus_sem); return 0; } + +/** + * pci_enable_link_state - Clear and set the default device link state so that + * the link may be allowed to enter the specified states. Note that if the + * BIOS didn't grant ASPM control to the OS, this does nothing because we can't + * touch the LNKCTL register. Also note that this does not enable states + * disabled by pci_disable_link_state(). Return 0 or a negative errno. + * + * @pdev: PCI device + * @state: Mask of ASPM link states to enable + */ +int pci_enable_link_state(struct pci_dev *pdev, int state) +{ + return __pci_enable_link_state(pdev, state, false); +} EXPORT_SYMBOL(pci_enable_link_state); +/** + * pci_enable_link_state_locked - Clear and set the default device link state + * so that the link may be allowed to enter the specified states. Note that if + * the BIOS didn't grant ASPM control to the OS, this does nothing because we + * can't touch the LNKCTL register. Also note that this does not enable states + * disabled by pci_disable_link_state(). Return 0 or a negative errno. + * + * @pdev: PCI device + * @state: Mask of ASPM link states to enable + * + * Context: Caller holds pci_bus_sem read lock. + */ +int pci_enable_link_state_locked(struct pci_dev *pdev, int state) +{ + lockdep_assert_held_read(&pci_bus_sem); + + return __pci_enable_link_state(pdev, state, true); +} +EXPORT_SYMBOL(pci_enable_link_state_locked); + static int pcie_aspm_set_policy(const char *val, const struct kernel_param *kp) { diff --git a/include/linux/pci.h b/include/linux/pci.h index b56417276042..1596b1205b8d 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1803,6 +1803,7 @@ extern bool pcie_ports_native; int pci_disable_link_state(struct pci_dev *pdev, int state); int pci_disable_link_state_locked(struct pci_dev *pdev, int state); int pci_enable_link_state(struct pci_dev *pdev, int state); +int pci_enable_link_state_locked(struct pci_dev *pdev, int state); void pcie_no_aspm(void); bool pcie_aspm_support_enabled(void); bool pcie_aspm_enabled(struct pci_dev *pdev); @@ -1813,6 +1814,8 @@ static inline int pci_disable_link_state_locked(struct pci_dev *pdev, int state) { return 0; } static inline int pci_enable_link_state(struct pci_dev *pdev, int state) { return 0; } +static inline int pci_enable_link_state_locked(struct pci_dev *pdev, int state) +{ return 0; } static inline void pcie_no_aspm(void) { } static inline bool pcie_aspm_support_enabled(void) { return false; } static inline bool pcie_aspm_enabled(struct pci_dev *pdev) { return false; } From f94c44342f0a5612be4710c0d3122198244ffa59 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 6 Dec 2023 08:23:49 +0900 Subject: [PATCH 1097/1397] ksmbd: fix wrong name of SMB2_CREATE_ALLOCATION_SIZE commit 13736654481198e519059d4a2e2e3b20fa9fdb3e upstream. MS confirm that "AISi" name of SMB2_CREATE_ALLOCATION_SIZE in MS-SMB2 specification is a typo. cifs/ksmbd have been using this wrong name from MS-SMB2. It should be "AlSi". Also It will cause problem when running smb2.create.open test in smbtorture against ksmbd. Cc: stable@vger.kernel.org Fixes: 12197a7fdda9 ("Clarify SMB2/SMB3 create context and add missing ones") Signed-off-by: Namjae Jeon Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/common/smb2pdu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h index 319fb9ffc6a0..ec20c83cc836 100644 --- a/fs/smb/common/smb2pdu.h +++ b/fs/smb/common/smb2pdu.h @@ -1120,7 +1120,7 @@ struct smb2_change_notify_rsp { #define SMB2_CREATE_SD_BUFFER "SecD" /* security descriptor */ #define SMB2_CREATE_DURABLE_HANDLE_REQUEST "DHnQ" #define SMB2_CREATE_DURABLE_HANDLE_RECONNECT "DHnC" -#define SMB2_CREATE_ALLOCATION_SIZE "AISi" +#define SMB2_CREATE_ALLOCATION_SIZE "AlSi" #define SMB2_CREATE_QUERY_MAXIMAL_ACCESS_REQUEST "MxAc" #define SMB2_CREATE_TIMEWARP_REQUEST "TWrp" #define SMB2_CREATE_QUERY_ON_DISK_ID "QFid" From 98bd0b4ad5d46fe746c9f99e0f11da7cfec8d8bc Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 28 Nov 2023 09:15:08 +0100 Subject: [PATCH 1098/1397] PCI: vmd: Fix potential deadlock when enabling ASPM commit 49de0dc87965079a8e2803ee4b39f9d946259423 upstream. The vmd_pm_enable_quirk() helper is called from pci_walk_bus() during probe to enable ASPM for controllers with VMD_FEAT_BIOS_PM_QUIRK set. Since pci_walk_bus() already holds a pci_bus_sem read lock, use pci_enable_link_state_locked() to enable link states in order to avoid a potential deadlock (e.g. in case someone takes a write lock before reacquiring the read lock). Fixes: f492edb40b54 ("PCI: vmd: Add quirk to configure PCIe ASPM and LTR") Link: https://lore.kernel.org/r/20231128081512.19387-3-johan+linaro@kernel.org Signed-off-by: Johan Hovold [bhelgaas: add "potential" in subject since the deadlock has only been reported by lockdep, include helper name in commit log] Signed-off-by: Bjorn Helgaas Reviewed-by: Manivannan Sadhasivam Cc: # 6.3 Cc: Michael Bottini Cc: David E. Box Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/vmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c index 1c1c1aa940a5..6ac0afae0ca1 100644 --- a/drivers/pci/controller/vmd.c +++ b/drivers/pci/controller/vmd.c @@ -751,7 +751,7 @@ static int vmd_pm_enable_quirk(struct pci_dev *pdev, void *userdata) if (!(features & VMD_FEAT_BIOS_PM_QUIRK)) return 0; - pci_enable_link_state(pdev, PCIE_LINK_STATE_ALL); + pci_enable_link_state_locked(pdev, PCIE_LINK_STATE_ALL); pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_LTR); if (!pos) From 8964524158ace2f0fcc055e663ab5308cae40175 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Tue, 5 Sep 2023 10:49:21 +0200 Subject: [PATCH 1099/1397] drm/mediatek: fix kernel oops if no crtc is found MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 4662817aed5a9d6c695658d0105d8ff4b84ac6cb ] drm_crtc_from_index(0) might return NULL if there are no CRTCs registered at all which will lead to a kernel oops in mtk_drm_crtc_dma_dev_get(). Add the missing return value check. Fixes: 0d9eee9118b7 ("drm/mediatek: Add drm ovl_adaptor sub driver for MT8195") Signed-off-by: Michael Walle Reviewed-by: Nícolas F. R. A. Prado Tested-by: Nícolas F. R. A. Prado Reviewed-by: AngeloGioacchino Del Regno Tested-by: Eugen Hristev Reviewed-by: Eugen Hristev Link: https://patchwork.kernel.org/project/dri-devel/patch/20230905084922.3908121-1-mwalle@kernel.org/ Signed-off-by: Chun-Kuang Hu Signed-off-by: Sasha Levin --- drivers/gpu/drm/mediatek/mtk_drm_drv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index 2d6a979afe8f..cdd506c80373 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -421,6 +421,7 @@ static int mtk_drm_kms_init(struct drm_device *drm) struct mtk_drm_private *private = drm->dev_private; struct mtk_drm_private *priv_n; struct device *dma_dev = NULL; + struct drm_crtc *crtc; int ret, i, j; if (drm_firmware_drivers_only()) @@ -495,7 +496,9 @@ static int mtk_drm_kms_init(struct drm_device *drm) } /* Use OVL device for all DMA memory allocations */ - dma_dev = mtk_drm_crtc_dma_dev_get(drm_crtc_from_index(drm, 0)); + crtc = drm_crtc_from_index(drm, 0); + if (crtc) + dma_dev = mtk_drm_crtc_dma_dev_get(crtc); if (!dma_dev) { ret = -ENODEV; dev_err(drm->dev, "Need at least one OVL device\n"); From 7d6e9cb7b95165b40e7718b017c16b543d93411a Mon Sep 17 00:00:00 2001 From: "Jason-JH.Lin" Date: Wed, 20 Sep 2023 17:06:58 +0800 Subject: [PATCH 1100/1397] drm/mediatek: Add spinlock for setting vblank event in atomic_begin [ Upstream commit fe4c5f662097978b6c91c23a13c24ed92339a180 ] Add spinlock protection to avoid race condition on vblank event between mtk_drm_crtc_atomic_begin() and mtk_drm_finish_page_flip(). Fixes: 119f5173628a ("drm/mediatek: Add DRM Driver for Mediatek SoC MT8173.") Signed-off-by: Jason-JH.Lin Suggested-by: AngeloGioacchino Del Regno Reviewed-by: Alexandre Mergnat Reviewed-by: Fei Shao Tested-by: Fei Shao Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: CK Hu Link: https://patchwork.kernel.org/project/dri-devel/patch/20230920090658.31181-1-jason-jh.lin@mediatek.com/ Signed-off-by: Chun-Kuang Hu Signed-off-by: Sasha Levin --- drivers/gpu/drm/mediatek/mtk_drm_crtc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index 0a511d7688a3..a033da279a94 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -747,6 +747,7 @@ static void mtk_drm_crtc_atomic_begin(struct drm_crtc *crtc, crtc); struct mtk_crtc_state *mtk_crtc_state = to_mtk_crtc_state(crtc_state); struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc); + unsigned long flags; if (mtk_crtc->event && mtk_crtc_state->base.event) DRM_ERROR("new event while there is still a pending event\n"); @@ -754,7 +755,11 @@ static void mtk_drm_crtc_atomic_begin(struct drm_crtc *crtc, if (mtk_crtc_state->base.event) { mtk_crtc_state->base.event->pipe = drm_crtc_index(crtc); WARN_ON(drm_crtc_vblank_get(crtc) != 0); + + spin_lock_irqsave(&crtc->dev->event_lock, flags); mtk_crtc->event = mtk_crtc_state->base.event; + spin_unlock_irqrestore(&crtc->dev->event_lock, flags); + mtk_crtc_state->base.event = NULL; } } From 0afcc6291024dc8fa32b966a0da27345b82bc6cd Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Fri, 1 Sep 2023 11:49:51 +0200 Subject: [PATCH 1101/1397] accel/ivpu: Print information about used workarounds [ Upstream commit eefa13a69053a09f20b2d1c00dda59be9c98cfe9 ] Use ivpu_dbg(MISC) to print information about workarounds. Reviewed-by: Karol Wachowski Reviewed-by: Jeffrey Hugo Signed-off-by: Stanislaw Gruszka Link: https://patchwork.freedesktop.org/patch/msgid/20230901094957.168898-6-stanislaw.gruszka@linux.intel.com Stable-dep-of: 35c49cfc8b70 ("accel/ivpu/37xx: Fix interrupt_clear_with_0 WA initialization") Signed-off-by: Sasha Levin --- drivers/accel/ivpu/ivpu_drv.h | 5 +++++ drivers/accel/ivpu/ivpu_hw_37xx.c | 5 +++++ drivers/accel/ivpu/ivpu_hw_40xx.c | 4 ++++ 3 files changed, 14 insertions(+) diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h index 2adc349126bb..6853dfe1c7e5 100644 --- a/drivers/accel/ivpu/ivpu_drv.h +++ b/drivers/accel/ivpu/ivpu_drv.h @@ -76,6 +76,11 @@ #define IVPU_WA(wa_name) (vdev->wa.wa_name) +#define IVPU_PRINT_WA(wa_name) do { \ + if (IVPU_WA(wa_name)) \ + ivpu_dbg(vdev, MISC, "Using WA: " #wa_name "\n"); \ +} while (0) + struct ivpu_wa_table { bool punit_disabled; bool clear_runtime_mem; diff --git a/drivers/accel/ivpu/ivpu_hw_37xx.c b/drivers/accel/ivpu/ivpu_hw_37xx.c index b8010c07eec1..2409ff0dda61 100644 --- a/drivers/accel/ivpu/ivpu_hw_37xx.c +++ b/drivers/accel/ivpu/ivpu_hw_37xx.c @@ -104,6 +104,11 @@ static void ivpu_hw_wa_init(struct ivpu_device *vdev) if (ivpu_device_id(vdev) == PCI_DEVICE_ID_MTL && ivpu_revision(vdev) < 4) vdev->wa.interrupt_clear_with_0 = true; + + IVPU_PRINT_WA(punit_disabled); + IVPU_PRINT_WA(clear_runtime_mem); + IVPU_PRINT_WA(d3hot_after_power_off); + IVPU_PRINT_WA(interrupt_clear_with_0); } static void ivpu_hw_timeouts_init(struct ivpu_device *vdev) diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c index 7c3ff25232a2..03600a7a5aca 100644 --- a/drivers/accel/ivpu/ivpu_hw_40xx.c +++ b/drivers/accel/ivpu/ivpu_hw_40xx.c @@ -125,6 +125,10 @@ static void ivpu_hw_wa_init(struct ivpu_device *vdev) if (ivpu_hw_gen(vdev) == IVPU_HW_40XX) vdev->wa.disable_clock_relinquish = true; + + IVPU_PRINT_WA(punit_disabled); + IVPU_PRINT_WA(clear_runtime_mem); + IVPU_PRINT_WA(disable_clock_relinquish); } static void ivpu_hw_timeouts_init(struct ivpu_device *vdev) From 83a42d791ba206978ec5381743dbcc49f3357e1d Mon Sep 17 00:00:00 2001 From: Andrzej Kacprowski Date: Mon, 4 Dec 2023 13:23:31 +0100 Subject: [PATCH 1102/1397] accel/ivpu/37xx: Fix interrupt_clear_with_0 WA initialization [ Upstream commit 35c49cfc8b702eda7a0d3f05497b16f81b69e289 ] Using PCI Device ID/Revision to initialize the interrupt_clear_with_0 workaround is problematic - there are many pre-production steppings with different behavior, even with the same PCI ID/Revision Instead of checking for PCI Device ID/Revision, check the VPU buttress interrupt status register behavior - if this register is not zero after writing 1s it means there register is RW instead of RW1C and we need to enable the interrupt_clear_with_0 workaround. Fixes: 7f34e01f77f8 ("accel/ivpu: Clear specific interrupt status bits on C0") Signed-off-by: Andrzej Kacprowski Signed-off-by: Jacek Lawrynowicz Reviewed-by: Jeffrey Hugo Link: https://lore.kernel.org/all/20231204122331.40560-1-jacek.lawrynowicz@linux.intel.com Signed-off-by: Sasha Levin --- drivers/accel/ivpu/ivpu_hw_37xx.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_hw_37xx.c b/drivers/accel/ivpu/ivpu_hw_37xx.c index 2409ff0dda61..ddf03498fd4c 100644 --- a/drivers/accel/ivpu/ivpu_hw_37xx.c +++ b/drivers/accel/ivpu/ivpu_hw_37xx.c @@ -53,10 +53,12 @@ #define ICB_0_1_IRQ_MASK ((((u64)ICB_1_IRQ_MASK) << 32) | ICB_0_IRQ_MASK) -#define BUTTRESS_IRQ_MASK ((REG_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE)) | \ - (REG_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, ATS_ERR)) | \ +#define BUTTRESS_IRQ_MASK ((REG_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, ATS_ERR)) | \ (REG_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, UFI_ERR))) +#define BUTTRESS_ALL_IRQ_MASK (BUTTRESS_IRQ_MASK | \ + (REG_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE))) + #define BUTTRESS_IRQ_ENABLE_MASK ((u32)~BUTTRESS_IRQ_MASK) #define BUTTRESS_IRQ_DISABLE_MASK ((u32)-1) @@ -102,8 +104,12 @@ static void ivpu_hw_wa_init(struct ivpu_device *vdev) vdev->wa.clear_runtime_mem = false; vdev->wa.d3hot_after_power_off = true; - if (ivpu_device_id(vdev) == PCI_DEVICE_ID_MTL && ivpu_revision(vdev) < 4) + REGB_WR32(VPU_37XX_BUTTRESS_INTERRUPT_STAT, BUTTRESS_ALL_IRQ_MASK); + if (REGB_RD32(VPU_37XX_BUTTRESS_INTERRUPT_STAT) == BUTTRESS_ALL_IRQ_MASK) { + /* Writing 1s does not clear the interrupt status register */ vdev->wa.interrupt_clear_with_0 = true; + REGB_WR32(VPU_37XX_BUTTRESS_INTERRUPT_STAT, 0x0); + } IVPU_PRINT_WA(punit_disabled); IVPU_PRINT_WA(clear_runtime_mem); From 54d08313a34faf7cdfa29cd6da320d1aa58b5235 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 1 Dec 2023 12:21:08 +0000 Subject: [PATCH 1103/1397] drm/i915/selftests: Fix engine reset count storage for multi-tile [ Upstream commit 7c7c863bf89c5f76d8c7fda177a81559b61dc15b ] Engine->id namespace is per-tile so struct igt_live_test->reset_engine[] needs to be two-dimensional so engine reset counts from all tiles can be stored with no aliasing. With aliasing, if we had a real multi-tile platform, the reset counts would be incorrect for same engine instance on different tiles. Signed-off-by: Tvrtko Ursulin Fixes: 0c29efa23f5c ("drm/i915/selftests: Consider multi-gt instead of to_gt()") Reported-by: Alan Previn Teres Alexis Cc: Tejas Upadhyay Cc: Andi Shyti Cc: Daniele Ceraolo Spurio Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20231201122109.729006-1-tvrtko.ursulin@linux.intel.com (cherry picked from commit 0647ece3819b018cb62a71c3bcb7c2c3243e78ac) Signed-off-by: Jani Nikula Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/selftests/igt_live_test.c | 9 +++++---- drivers/gpu/drm/i915/selftests/igt_live_test.h | 3 ++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c index 4ddc6d902752..7d41874a49c5 100644 --- a/drivers/gpu/drm/i915/selftests/igt_live_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c @@ -37,8 +37,9 @@ int igt_live_test_begin(struct igt_live_test *t, } for_each_engine(engine, gt, id) - t->reset_engine[id] = - i915_reset_engine_count(&i915->gpu_error, engine); + t->reset_engine[i][id] = + i915_reset_engine_count(&i915->gpu_error, + engine); } t->reset_global = i915_reset_count(&i915->gpu_error); @@ -66,14 +67,14 @@ int igt_live_test_end(struct igt_live_test *t) for_each_gt(gt, i915, i) { for_each_engine(engine, gt, id) { - if (t->reset_engine[id] == + if (t->reset_engine[i][id] == i915_reset_engine_count(&i915->gpu_error, engine)) continue; gt_err(gt, "%s(%s): engine '%s' was reset %d times!\n", t->func, t->name, engine->name, i915_reset_engine_count(&i915->gpu_error, engine) - - t->reset_engine[id]); + t->reset_engine[i][id]); return -EIO; } } diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.h b/drivers/gpu/drm/i915/selftests/igt_live_test.h index 36ed42736c52..83e3ad430922 100644 --- a/drivers/gpu/drm/i915/selftests/igt_live_test.h +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.h @@ -7,6 +7,7 @@ #ifndef IGT_LIVE_TEST_H #define IGT_LIVE_TEST_H +#include "gt/intel_gt_defines.h" /* for I915_MAX_GT */ #include "gt/intel_engine.h" /* for I915_NUM_ENGINES */ struct drm_i915_private; @@ -17,7 +18,7 @@ struct igt_live_test { const char *name; unsigned int reset_global; - unsigned int reset_engine[I915_NUM_ENGINES]; + unsigned int reset_engine[I915_MAX_GT][I915_NUM_ENGINES]; }; /* From cd378c371ba080652a9c4d02f77d0737ff0d126e Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 1 Dec 2023 12:21:09 +0000 Subject: [PATCH 1104/1397] drm/i915: Use internal class when counting engine resets [ Upstream commit 1f721a93a528268fa97875cff515d1fcb69f4f44 ] Commit 503579448db9 ("drm/i915/gsc: Mark internal GSC engine with reserved uabi class") made the GSC0 engine not have a valid uabi class and so broke the engine reset counting, which in turn was made class based in cb823ed9915b ("drm/i915/gt: Use intel_gt as the primary object for handling resets"). Despite the title and commit text of the latter is not mentioning it (and has left the storage array incorrectly sized), tracking by class, despite it adding aliasing in hypthotetical multi-tile systems, is handy for virtual engines which for instance do not have a valid engine->id. Therefore we keep that but just change it to use the internal class which is always valid. We also add a helper to increment the count, which aligns with the existing getter. What was broken without this fix were out of bounds reads every time a reset would happen on the GSC0 engine, or during selftests when storing and cross-checking the counts in igt_live_test_begin and igt_live_test_end. Signed-off-by: Tvrtko Ursulin Fixes: 503579448db9 ("drm/i915/gsc: Mark internal GSC engine with reserved uabi class") [tursulin: fixed Fixes tag] Reported-by: Alan Previn Teres Alexis Cc: Daniele Ceraolo Spurio Reviewed-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20231201122109.729006-2-tvrtko.ursulin@linux.intel.com (cherry picked from commit cf9cb028ac56696ff879af1154c4b2f0b12701fd) Signed-off-by: Jani Nikula Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/gt/intel_reset.c | 2 +- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 5 +++-- drivers/gpu/drm/i915/i915_gpu_error.h | 12 ++++++++++-- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index cc6bd21a3e51..5fa57a34cf4b 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -1297,7 +1297,7 @@ int __intel_engine_reset_bh(struct intel_engine_cs *engine, const char *msg) if (msg) drm_notice(&engine->i915->drm, "Resetting %s for %s\n", engine->name, msg); - atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]); + i915_increase_reset_engine_count(&engine->i915->gpu_error, engine); ret = intel_gt_reset_engine(engine); if (ret) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index dc7b40e06e38..836e4d9d65ef 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -4774,7 +4774,8 @@ static void capture_error_state(struct intel_guc *guc, if (match) { intel_engine_set_hung_context(e, ce); engine_mask |= e->mask; - atomic_inc(&i915->gpu_error.reset_engine_count[e->uabi_class]); + i915_increase_reset_engine_count(&i915->gpu_error, + e); } } @@ -4786,7 +4787,7 @@ static void capture_error_state(struct intel_guc *guc, } else { intel_engine_set_hung_context(ce->engine, ce); engine_mask = ce->engine->mask; - atomic_inc(&i915->gpu_error.reset_engine_count[ce->engine->uabi_class]); + i915_increase_reset_engine_count(&i915->gpu_error, ce->engine); } with_intel_runtime_pm(&i915->runtime_pm, wakeref) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 9f5971f5e980..48f6c00402c4 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -16,6 +16,7 @@ #include "display/intel_display_device.h" #include "gt/intel_engine.h" +#include "gt/intel_engine_types.h" #include "gt/intel_gt_types.h" #include "gt/uc/intel_uc_fw.h" @@ -232,7 +233,7 @@ struct i915_gpu_error { atomic_t reset_count; /** Number of times an engine has been reset */ - atomic_t reset_engine_count[I915_NUM_ENGINES]; + atomic_t reset_engine_count[MAX_ENGINE_CLASS]; }; struct drm_i915_error_state_buf { @@ -255,7 +256,14 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error) static inline u32 i915_reset_engine_count(struct i915_gpu_error *error, const struct intel_engine_cs *engine) { - return atomic_read(&error->reset_engine_count[engine->uabi_class]); + return atomic_read(&error->reset_engine_count[engine->class]); +} + +static inline void +i915_increase_reset_engine_count(struct i915_gpu_error *error, + const struct intel_engine_cs *engine) +{ + atomic_inc(&error->reset_engine_count[engine->class]); } #define CORE_DUMP_FLAG_NONE 0x0 From ca3ebcf2c448f04ae144ddb19ad68b4cab6fcad2 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 6 Dec 2023 11:35:58 +0100 Subject: [PATCH 1105/1397] selftests/mm: cow: print ksft header before printing anything else [ Upstream commit a6fcd57cf2df409d35e9225b8dbad6f937b28df0 ] Doing a ksft_print_msg() before the ksft_print_header() seems to confuse the ksft framework in a strange way: running the test on the cmdline results in the expected output. But piping the output somewhere else, results in some odd output, whereby we repeatedly get the same info printed: # [INFO] detected THP size: 2048 KiB # [INFO] detected hugetlb page size: 2048 KiB # [INFO] detected hugetlb page size: 1048576 KiB # [INFO] huge zeropage is enabled TAP version 13 1..190 # [INFO] Anonymous memory tests in private mappings # [RUN] Basic COW after fork() ... with base page # [INFO] detected THP size: 2048 KiB # [INFO] detected hugetlb page size: 2048 KiB # [INFO] detected hugetlb page size: 1048576 KiB # [INFO] huge zeropage is enabled TAP version 13 1..190 # [INFO] Anonymous memory tests in private mappings # [RUN] Basic COW after fork() ... with base page ok 1 No leak from parent into child # [RUN] Basic COW after fork() ... with swapped out base page # [INFO] detected THP size: 2048 KiB # [INFO] detected hugetlb page size: 2048 KiB # [INFO] detected hugetlb page size: 1048576 KiB # [INFO] huge zeropage is enabled Doing the ksft_print_header() first seems to resolve that and gives us the output we expect: TAP version 13 # [INFO] detected THP size: 2048 KiB # [INFO] detected hugetlb page size: 2048 KiB # [INFO] detected hugetlb page size: 1048576 KiB # [INFO] huge zeropage is enabled 1..190 # [INFO] Anonymous memory tests in private mappings # [RUN] Basic COW after fork() ... with base page ok 1 No leak from parent into child # [RUN] Basic COW after fork() ... with swapped out base page ok 2 No leak from parent into child # [RUN] Basic COW after fork() ... with THP ok 3 No leak from parent into child # [RUN] Basic COW after fork() ... with swapped-out THP ok 4 No leak from parent into child # [RUN] Basic COW after fork() ... with PTE-mapped THP ok 5 No leak from parent into child Link: https://lkml.kernel.org/r/20231206103558.38040-1-david@redhat.com Fixes: f4b5fd6946e2 ("selftests/vm: anon_cow: THP tests") Signed-off-by: David Hildenbrand Reported-by: Nico Pache Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- tools/testing/selftests/mm/cow.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index 7324ce5363c0..6f2f83990441 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -1680,6 +1680,8 @@ int main(int argc, char **argv) { int err; + ksft_print_header(); + pagesize = getpagesize(); thpsize = read_pmd_pagesize(); if (thpsize) @@ -1689,7 +1691,6 @@ int main(int argc, char **argv) ARRAY_SIZE(hugetlbsizes)); detect_huge_zeropage(); - ksft_print_header(); ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case() + ARRAY_SIZE(anon_thp_test_cases) * tests_per_anon_thp_test_case() + ARRAY_SIZE(non_anon_test_cases) * tests_per_non_anon_test_case()); From 54d8c1d3261dfb8be84ffc17cf94fa231c9d88d4 Mon Sep 17 00:00:00 2001 From: Saurabh Sengar Date: Sat, 11 Nov 2023 00:37:47 -0800 Subject: [PATCH 1106/1397] x86/hyperv: Fix the detection of E820_TYPE_PRAM in a Gen2 VM [ Upstream commit 7e8037b099c0bbe8f2109dc452dbcab8d400fc53 ] A Gen2 VM doesn't support legacy PCI/PCIe, so both raw_pci_ops and raw_pci_ext_ops are NULL, and pci_subsys_init() -> pcibios_init() doesn't call pcibios_resource_survey() -> e820__reserve_resources_late(); as a result, any emulated persistent memory of E820_TYPE_PRAM (12) via the kernel parameter memmap=nn[KMG]!ss is not added into iomem_resource and hence can't be detected by register_e820_pmem(). Fix this by directly calling e820__reserve_resources_late() in hv_pci_init(), which is called from arch_initcall(pci_arch_init). It's ok to move a Gen2 VM's e820__reserve_resources_late() from subsys_initcall(pci_subsys_init) to arch_initcall(pci_arch_init) because the code in-between doesn't depend on the E820 resources. e820__reserve_resources_late() depends on e820__reserve_resources(), which has been called earlier from setup_arch(). For a Gen-2 VM, the new hv_pci_init() also adds any memory of E820_TYPE_PMEM (7) into iomem_resource, and acpi_nfit_register_region() -> acpi_nfit_insert_resource() -> region_intersects() returns REGION_INTERSECTS, so the memory of E820_TYPE_PMEM won't get added twice. Changed the local variable "int gen2vm" to "bool gen2vm". Signed-off-by: Saurabh Sengar Signed-off-by: Dexuan Cui Signed-off-by: Wei Liu Message-ID: <1699691867-9827-1-git-send-email-ssengar@linux.microsoft.com> Signed-off-by: Sasha Levin --- arch/x86/hyperv/hv_init.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 21556ad87f4b..8f3a4d16bb79 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -286,15 +287,31 @@ static int hv_cpu_die(unsigned int cpu) static int __init hv_pci_init(void) { - int gen2vm = efi_enabled(EFI_BOOT); + bool gen2vm = efi_enabled(EFI_BOOT); /* - * For Generation-2 VM, we exit from pci_arch_init() by returning 0. - * The purpose is to suppress the harmless warning: + * A Generation-2 VM doesn't support legacy PCI/PCIe, so both + * raw_pci_ops and raw_pci_ext_ops are NULL, and pci_subsys_init() -> + * pcibios_init() doesn't call pcibios_resource_survey() -> + * e820__reserve_resources_late(); as a result, any emulated persistent + * memory of E820_TYPE_PRAM (12) via the kernel parameter + * memmap=nn[KMG]!ss is not added into iomem_resource and hence can't be + * detected by register_e820_pmem(). Fix this by directly calling + * e820__reserve_resources_late() here: e820__reserve_resources_late() + * depends on e820__reserve_resources(), which has been called earlier + * from setup_arch(). Note: e820__reserve_resources_late() also adds + * any memory of E820_TYPE_PMEM (7) into iomem_resource, and + * acpi_nfit_register_region() -> acpi_nfit_insert_resource() -> + * region_intersects() returns REGION_INTERSECTS, so the memory of + * E820_TYPE_PMEM won't get added twice. + * + * We return 0 here so that pci_arch_init() won't print the warning: * "PCI: Fatal: No config space access function found" */ - if (gen2vm) + if (gen2vm) { + e820__reserve_resources_late(); return 0; + } /* For Generation-1 VM, we'll proceed in pci_arch_init(). */ return 1; From 2ebf775f0541ae0d474836fa0cf3220e502f8e3e Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 15 Nov 2023 11:08:57 +0100 Subject: [PATCH 1107/1397] usb: aqc111: check packet for fixup for true limit [ Upstream commit ccab434e674ca95d483788b1895a70c21b7f016a ] If a device sends a packet that is inbetween 0 and sizeof(u64) the value passed to skb_trim() as length will wrap around ending up as some very large value. The driver will then proceed to parse the header located at that position, which will either oops or process some random value. The fix is to check against sizeof(u64) rather than 0, which the driver currently does. The issue exists since the introduction of the driver. Signed-off-by: Oliver Neukum Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/aqc111.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/usb/aqc111.c b/drivers/net/usb/aqc111.c index a017e9de2119..7b8afa589a53 100644 --- a/drivers/net/usb/aqc111.c +++ b/drivers/net/usb/aqc111.c @@ -1079,17 +1079,17 @@ static int aqc111_rx_fixup(struct usbnet *dev, struct sk_buff *skb) u16 pkt_count = 0; u64 desc_hdr = 0; u16 vlan_tag = 0; - u32 skb_len = 0; + u32 skb_len; if (!skb) goto err; - if (skb->len == 0) + skb_len = skb->len; + if (skb_len < sizeof(desc_hdr)) goto err; - skb_len = skb->len; /* RX Descriptor Header */ - skb_trim(skb, skb->len - sizeof(desc_hdr)); + skb_trim(skb, skb_len - sizeof(desc_hdr)); desc_hdr = le64_to_cpup((u64 *)skb_tail_pointer(skb)); /* Check these packets */ From c40c0b89bf1d243de9b3d5f233d8d19ac51f9eae Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 15 Nov 2023 11:53:31 +0100 Subject: [PATCH 1108/1397] stmmac: dwmac-loongson: Add architecture dependency [ Upstream commit 7fbd5fc2b35a8f559a6b380dfa9bcd964a758186 ] Only present the DWMAC_LOONGSON option on architectures where it can actually be used. This follows the same logic as the DWMAC_INTEL option. Signed-off-by: Jean Delvare Cc: Keguang Zhang Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index 06c6871f8788..25f2d42de406 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -269,7 +269,7 @@ config DWMAC_INTEL config DWMAC_LOONGSON tristate "Loongson PCI DWMAC support" default MACH_LOONGSON64 - depends on STMMAC_ETH && PCI + depends on (MACH_LOONGSON64 || COMPILE_TEST) && STMMAC_ETH && PCI depends on COMMON_CLK help This selects the LOONGSON PCI bus support for the stmmac driver, From 462f1111d9459baa1ff98c3bc966b64d480b9ea0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 15 Nov 2023 17:15:40 +0000 Subject: [PATCH 1109/1397] rxrpc: Fix some minor issues with bundle tracing [ Upstream commit 0c3bd086d12d185650d095a906662593ec607bd0 ] Fix some superficial issues with the tracing of rxrpc_bundle structs, including: (1) Set the debug_id when the bundle is allocated rather than when it is set up so that the "NEW" trace line displays the correct bundle ID. (2) Show the refcount when emitting the "FREE" traceline. Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/rxrpc/conn_client.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 981ca5b98bcb..1d95f8bc769f 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -73,6 +73,7 @@ static void rxrpc_destroy_client_conn_ids(struct rxrpc_local *local) static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_call *call, gfp_t gfp) { + static atomic_t rxrpc_bundle_id; struct rxrpc_bundle *bundle; bundle = kzalloc(sizeof(*bundle), gfp); @@ -85,6 +86,7 @@ static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_call *call, bundle->upgrade = test_bit(RXRPC_CALL_UPGRADE, &call->flags); bundle->service_id = call->dest_srx.srx_service; bundle->security_level = call->security_level; + bundle->debug_id = atomic_inc_return(&rxrpc_bundle_id); refcount_set(&bundle->ref, 1); atomic_set(&bundle->active, 1); INIT_LIST_HEAD(&bundle->waiting_calls); @@ -105,7 +107,8 @@ struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *bundle, static void rxrpc_free_bundle(struct rxrpc_bundle *bundle) { - trace_rxrpc_bundle(bundle->debug_id, 1, rxrpc_bundle_free); + trace_rxrpc_bundle(bundle->debug_id, refcount_read(&bundle->ref), + rxrpc_bundle_free); rxrpc_put_peer(bundle->peer, rxrpc_peer_put_bundle); key_put(bundle->key); kfree(bundle); @@ -239,7 +242,6 @@ static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn) */ int rxrpc_look_up_bundle(struct rxrpc_call *call, gfp_t gfp) { - static atomic_t rxrpc_bundle_id; struct rxrpc_bundle *bundle, *candidate; struct rxrpc_local *local = call->local; struct rb_node *p, **pp, *parent; @@ -306,7 +308,6 @@ int rxrpc_look_up_bundle(struct rxrpc_call *call, gfp_t gfp) } _debug("new bundle"); - candidate->debug_id = atomic_inc_return(&rxrpc_bundle_id); rb_link_node(&candidate->local_node, parent, pp); rb_insert_color(&candidate->local_node, &local->client_bundles); call->bundle = rxrpc_get_bundle(candidate, rxrpc_bundle_get_client_call); From 5aba47ce61b7c277246e367f51fdcc930af6a699 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 17 Nov 2023 10:35:22 +0800 Subject: [PATCH 1110/1397] blk-throttle: fix lockdep warning of "cgroup_mutex or RCU read lock required!" [ Upstream commit 27b13e209ddca5979847a1b57890e0372c1edcee ] Inside blkg_for_each_descendant_pre(), both css_for_each_descendant_pre() and blkg_lookup() requires RCU read lock, and either cgroup_assert_mutex_or_rcu_locked() or rcu_read_lock_held() is called. Fix the warning by adding rcu read lock. Reported-by: Changhui Zhong Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20231117023527.3188627-2-ming.lei@redhat.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-throttle.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 13e4377a8b28..16f5766620a4 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -1320,6 +1320,7 @@ static void tg_conf_updated(struct throtl_grp *tg, bool global) tg_bps_limit(tg, READ), tg_bps_limit(tg, WRITE), tg_iops_limit(tg, READ), tg_iops_limit(tg, WRITE)); + rcu_read_lock(); /* * Update has_rules[] flags for the updated tg's subtree. A tg is * considered to have rules if either the tg itself or any of its @@ -1347,6 +1348,7 @@ static void tg_conf_updated(struct throtl_grp *tg, bool global) this_tg->latency_target = max(this_tg->latency_target, parent_tg->latency_target); } + rcu_read_unlock(); /* * We're already holding queue_lock and know @tg is valid. Let's From 0730b1e32a0b032a3a4fddebfec54d7102056414 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 17 Nov 2023 10:35:24 +0800 Subject: [PATCH 1111/1397] blk-cgroup: bypass blkcg_deactivate_policy after destroying [ Upstream commit e63a57303599b17290cd8bc48e6f20b24289a8bc ] blkcg_deactivate_policy() can be called after blkg_destroy_all() returns, and it isn't necessary since blkg_destroy_all has covered policy deactivation. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20231117023527.3188627-4-ming.lei@redhat.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-cgroup.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 4a42ea2972ad..4b48c2c44098 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -577,6 +577,7 @@ static void blkg_destroy_all(struct gendisk *disk) struct request_queue *q = disk->queue; struct blkcg_gq *blkg, *n; int count = BLKG_DESTROY_BATCH_SIZE; + int i; restart: spin_lock_irq(&q->queue_lock); @@ -602,6 +603,18 @@ static void blkg_destroy_all(struct gendisk *disk) } } + /* + * Mark policy deactivated since policy offline has been done, and + * the free is scheduled, so future blkcg_deactivate_policy() can + * be bypassed + */ + for (i = 0; i < BLKCG_MAX_POLS; i++) { + struct blkcg_policy *pol = blkcg_policy[i]; + + if (pol) + __clear_bit(pol->plid, q->blkcg_pols); + } + q->root_blkg = NULL; spin_unlock_irq(&q->queue_lock); } From 09bdafb89a56a267c070fdb96d385934c14d2735 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Mon, 20 Nov 2023 13:24:54 +0800 Subject: [PATCH 1112/1397] bcache: avoid oversize memory allocation by small stripe_size [ Upstream commit baf8fb7e0e5ec54ea0839f0c534f2cdcd79bea9c ] Arraies bcache->stripe_sectors_dirty and bcache->full_dirty_stripes are used for dirty data writeback, their sizes are decided by backing device capacity and stripe size. Larger backing device capacity or smaller stripe size make these two arraies occupies more dynamic memory space. Currently bcache->stripe_size is directly inherited from queue->limits.io_opt of underlying storage device. For normal hard drives, its limits.io_opt is 0, and bcache sets the corresponding stripe_size to 1TB (1<<31 sectors), it works fine 10+ years. But for devices do declare value for queue->limits.io_opt, small stripe_size (comparing to 1TB) becomes an issue for oversize memory allocations of bcache->stripe_sectors_dirty and bcache->full_dirty_stripes, while the capacity of hard drives gets much larger in recent decade. For example a raid5 array assembled by three 20TB hardrives, the raid device capacity is 40TB with typical 512KB limits.io_opt. After the math calculation in bcache code, these two arraies will occupy 400MB dynamic memory. Even worse Andrea Tomassetti reports that a 4KB limits.io_opt is declared on a new 2TB hard drive, then these two arraies request 2GB and 512MB dynamic memory from kzalloc(). The result is that bcache device always fails to initialize on his system. To avoid the oversize memory allocation, bcache->stripe_size should not directly inherited by queue->limits.io_opt from the underlying device. This patch defines BCH_MIN_STRIPE_SZ (4MB) as minimal bcache stripe size and set bcache device's stripe size against the declared limits.io_opt value from the underlying storage device, - If the declared limits.io_opt > BCH_MIN_STRIPE_SZ, bcache device will set its stripe size directly by this limits.io_opt value. - If the declared limits.io_opt < BCH_MIN_STRIPE_SZ, bcache device will set its stripe size by a value multiplying limits.io_opt and euqal or large than BCH_MIN_STRIPE_SZ. Then the minimal stripe size of a bcache device will always be >= 4MB. For a 40TB raid5 device with 512KB limits.io_opt, memory occupied by bcache->stripe_sectors_dirty and bcache->full_dirty_stripes will be 50MB in total. For a 2TB hard drive with 4KB limits.io_opt, memory occupied by these two arraies will be 2.5MB in total. Such mount of memory allocated for bcache->stripe_sectors_dirty and bcache->full_dirty_stripes is reasonable for most of storage devices. Reported-by: Andrea Tomassetti Signed-off-by: Coly Li Reviewed-by: Eric Wheeler Link: https://lore.kernel.org/r/20231120052503.6122-2-colyli@suse.de Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/md/bcache/bcache.h | 1 + drivers/md/bcache/super.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 5a79bb3c272f..83eb7f27db3d 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -265,6 +265,7 @@ struct bcache_device { #define BCACHE_DEV_WB_RUNNING 3 #define BCACHE_DEV_RATE_DW_RUNNING 4 int nr_stripes; +#define BCH_MIN_STRIPE_SZ ((4 << 20) >> SECTOR_SHIFT) unsigned int stripe_size; atomic_t *stripe_sectors_dirty; unsigned long *full_dirty_stripes; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 0ae2b3676293..93791e46b1e8 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -905,6 +905,8 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, if (!d->stripe_size) d->stripe_size = 1 << 31; + else if (d->stripe_size < BCH_MIN_STRIPE_SZ) + d->stripe_size = roundup(BCH_MIN_STRIPE_SZ, d->stripe_size); n = DIV_ROUND_UP_ULL(sectors, d->stripe_size); if (!n || n > max_stripes) { From 665341724499addb5400202294eef513b0f5f5de Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 20 Nov 2023 13:24:56 +0800 Subject: [PATCH 1113/1397] bcache: remove redundant assignment to variable cur_idx [ Upstream commit be93825f0e6428c2d3f03a6e4d447dc48d33d7ff ] Variable cur_idx is being initialized with a value that is never read, it is being re-assigned later in a while-loop. Remove the redundant assignment. Cleans up clang scan build warning: drivers/md/bcache/writeback.c:916:2: warning: Value stored to 'cur_idx' is never read [deadcode.DeadStores] Signed-off-by: Colin Ian King Reviewed-by: Coly Li Signed-off-by: Coly Li Link: https://lore.kernel.org/r/20231120052503.6122-4-colyli@suse.de Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/md/bcache/writeback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index d4432b3a6f96..3accfdaee6b1 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -913,7 +913,7 @@ static int bch_dirty_init_thread(void *arg) int cur_idx, prev_idx, skip_nr; k = p = NULL; - cur_idx = prev_idx = 0; + prev_idx = 0; bch_btree_iter_init(&c->root->keys, &iter, NULL); k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad); From 286918928ed7094d53e6fd08513e4a3f1a09f1d0 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Mon, 20 Nov 2023 13:25:02 +0800 Subject: [PATCH 1114/1397] bcache: add code comments for bch_btree_node_get() and __bch_btree_node_alloc() [ Upstream commit 31f5b956a197d4ec25c8a07cb3a2ab69d0c0b82f ] This patch adds code comments to bch_btree_node_get() and __bch_btree_node_alloc() that NULL pointer will not be returned and it is unnecessary to check NULL pointer by the callers of these routines. Signed-off-by: Coly Li Link: https://lore.kernel.org/r/20231120052503.6122-10-colyli@suse.de Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/md/bcache/btree.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 3084c57248f6..b709c2fde782 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -995,6 +995,9 @@ static struct btree *mca_alloc(struct cache_set *c, struct btree_op *op, * * The btree node will have either a read or a write lock held, depending on * level and op->lock. + * + * Note: Only error code or btree pointer will be returned, it is unncessary + * for callers to check NULL pointer. */ struct btree *bch_btree_node_get(struct cache_set *c, struct btree_op *op, struct bkey *k, int level, bool write, @@ -1106,6 +1109,10 @@ static void btree_node_free(struct btree *b) mutex_unlock(&b->c->bucket_lock); } +/* + * Only error code or btree pointer will be returned, it is unncessary for + * callers to check NULL pointer. + */ struct btree *__bch_btree_node_alloc(struct cache_set *c, struct btree_op *op, int level, bool wait, struct btree *parent) From 4a4bba9f0470d0cb6afd8c8e880b04567585f542 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Mon, 20 Nov 2023 13:25:03 +0800 Subject: [PATCH 1115/1397] bcache: avoid NULL checking to c->root in run_cache_set() [ Upstream commit 3eba5e0b2422aec3c9e79822029599961fdcab97 ] In run_cache_set() after c->root returned from bch_btree_node_get(), it is checked by IS_ERR_OR_NULL(). Indeed it is unncessary to check NULL because bch_btree_node_get() will not return NULL pointer to caller. This patch replaces IS_ERR_OR_NULL() by IS_ERR() for the above reason. Signed-off-by: Coly Li Link: https://lore.kernel.org/r/20231120052503.6122-11-colyli@suse.de Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/md/bcache/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 93791e46b1e8..1e677af38521 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -2017,7 +2017,7 @@ static int run_cache_set(struct cache_set *c) c->root = bch_btree_node_get(c, NULL, k, j->btree_level, true, NULL); - if (IS_ERR_OR_NULL(c->root)) + if (IS_ERR(c->root)) goto err; list_del_init(&c->root->list); From 4087936a97b899efa0f51d5b71f31f52cd34f302 Mon Sep 17 00:00:00 2001 From: Li Nan Date: Fri, 17 Nov 2023 00:23:14 +0800 Subject: [PATCH 1116/1397] nbd: fold nbd config initialization into nbd_alloc_config() [ Upstream commit 1b59860540a4018e8071dc18d4893ec389506b7d ] There are no functional changes, make the code cleaner and prepare to fix null-ptr-dereference while accessing 'nbd->config'. Signed-off-by: Li Nan Reviewed-by: Josef Bacik Link: https://lore.kernel.org/r/20231116162316.1740402-2-linan666@huaweicloud.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/nbd.c | 41 +++++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 855fdf5c3b4e..02f844832d91 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1530,17 +1530,20 @@ static int nbd_ioctl(struct block_device *bdev, blk_mode_t mode, return error; } -static struct nbd_config *nbd_alloc_config(void) +static int nbd_alloc_and_init_config(struct nbd_device *nbd) { struct nbd_config *config; + if (WARN_ON(nbd->config)) + return -EINVAL; + if (!try_module_get(THIS_MODULE)) - return ERR_PTR(-ENODEV); + return -ENODEV; config = kzalloc(sizeof(struct nbd_config), GFP_NOFS); if (!config) { module_put(THIS_MODULE); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } atomic_set(&config->recv_threads, 0); @@ -1548,7 +1551,10 @@ static struct nbd_config *nbd_alloc_config(void) init_waitqueue_head(&config->conn_wait); config->blksize_bits = NBD_DEF_BLKSIZE_BITS; atomic_set(&config->live_connections, 0); - return config; + nbd->config = config; + refcount_set(&nbd->config_refs, 1); + + return 0; } static int nbd_open(struct gendisk *disk, blk_mode_t mode) @@ -1567,21 +1573,17 @@ static int nbd_open(struct gendisk *disk, blk_mode_t mode) goto out; } if (!refcount_inc_not_zero(&nbd->config_refs)) { - struct nbd_config *config; - mutex_lock(&nbd->config_lock); if (refcount_inc_not_zero(&nbd->config_refs)) { mutex_unlock(&nbd->config_lock); goto out; } - config = nbd_alloc_config(); - if (IS_ERR(config)) { - ret = PTR_ERR(config); + ret = nbd_alloc_and_init_config(nbd); + if (ret) { mutex_unlock(&nbd->config_lock); goto out; } - nbd->config = config; - refcount_set(&nbd->config_refs, 1); + refcount_inc(&nbd->refs); mutex_unlock(&nbd->config_lock); if (max_part) @@ -1990,22 +1992,17 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info) pr_err("nbd%d already in use\n", index); return -EBUSY; } - if (WARN_ON(nbd->config)) { - mutex_unlock(&nbd->config_lock); - nbd_put(nbd); - return -EINVAL; - } - config = nbd_alloc_config(); - if (IS_ERR(config)) { + + ret = nbd_alloc_and_init_config(nbd); + if (ret) { mutex_unlock(&nbd->config_lock); nbd_put(nbd); pr_err("couldn't allocate config\n"); - return PTR_ERR(config); + return ret; } - nbd->config = config; - refcount_set(&nbd->config_refs, 1); - set_bit(NBD_RT_BOUND, &config->runtime_flags); + config = nbd->config; + set_bit(NBD_RT_BOUND, &config->runtime_flags); ret = nbd_genl_size_set(info, nbd); if (ret) goto out; From 8c1517344116cc47bdde5fb9b140189e7938a74c Mon Sep 17 00:00:00 2001 From: Li Nan Date: Fri, 17 Nov 2023 00:23:15 +0800 Subject: [PATCH 1117/1397] nbd: factor out a helper to get nbd_config without holding 'config_lock' [ Upstream commit 3123ac77923341774ca3ad1196ad20bb0732bf70 ] There are no functional changes, just to make code cleaner and prepare to fix null-ptr-dereference while accessing 'nbd->config'. Signed-off-by: Li Nan Reviewed-by: Josef Bacik Link: https://lore.kernel.org/r/20231116162316.1740402-3-linan666@huaweicloud.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/nbd.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 02f844832d91..daaf8805e876 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -395,6 +395,14 @@ static u32 req_to_nbd_cmd_type(struct request *req) } } +static struct nbd_config *nbd_get_config_unlocked(struct nbd_device *nbd) +{ + if (refcount_inc_not_zero(&nbd->config_refs)) + return nbd->config; + + return NULL; +} + static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req) { struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); @@ -409,13 +417,13 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req) return BLK_EH_DONE; } - if (!refcount_inc_not_zero(&nbd->config_refs)) { + config = nbd_get_config_unlocked(nbd); + if (!config) { cmd->status = BLK_STS_TIMEOUT; __clear_bit(NBD_CMD_INFLIGHT, &cmd->flags); mutex_unlock(&cmd->lock); goto done; } - config = nbd->config; if (config->num_connections > 1 || (config->num_connections == 1 && nbd->tag_set.timeout)) { @@ -977,12 +985,12 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index) struct nbd_sock *nsock; int ret; - if (!refcount_inc_not_zero(&nbd->config_refs)) { + config = nbd_get_config_unlocked(nbd); + if (!config) { dev_err_ratelimited(disk_to_dev(nbd->disk), "Socks array is empty\n"); return -EINVAL; } - config = nbd->config; if (index >= config->num_connections) { dev_err_ratelimited(disk_to_dev(nbd->disk), @@ -1560,6 +1568,7 @@ static int nbd_alloc_and_init_config(struct nbd_device *nbd) static int nbd_open(struct gendisk *disk, blk_mode_t mode) { struct nbd_device *nbd; + struct nbd_config *config; int ret = 0; mutex_lock(&nbd_index_mutex); @@ -1572,7 +1581,9 @@ static int nbd_open(struct gendisk *disk, blk_mode_t mode) ret = -ENXIO; goto out; } - if (!refcount_inc_not_zero(&nbd->config_refs)) { + + config = nbd_get_config_unlocked(nbd); + if (!config) { mutex_lock(&nbd->config_lock); if (refcount_inc_not_zero(&nbd->config_refs)) { mutex_unlock(&nbd->config_lock); @@ -1588,7 +1599,7 @@ static int nbd_open(struct gendisk *disk, blk_mode_t mode) mutex_unlock(&nbd->config_lock); if (max_part) set_bit(GD_NEED_PART_SCAN, &disk->state); - } else if (nbd_disconnected(nbd->config)) { + } else if (nbd_disconnected(config)) { if (max_part) set_bit(GD_NEED_PART_SCAN, &disk->state); } @@ -2205,7 +2216,8 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) } mutex_unlock(&nbd_index_mutex); - if (!refcount_inc_not_zero(&nbd->config_refs)) { + config = nbd_get_config_unlocked(nbd); + if (!config) { dev_err(nbd_to_dev(nbd), "not configured, cannot reconfigure\n"); nbd_put(nbd); @@ -2213,7 +2225,6 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) } mutex_lock(&nbd->config_lock); - config = nbd->config; if (!test_bit(NBD_RT_BOUND, &config->runtime_flags) || !nbd->pid) { dev_err(nbd_to_dev(nbd), From c34fa2d16e98ec26251939d9958731ef74d5c493 Mon Sep 17 00:00:00 2001 From: Li Nan Date: Fri, 17 Nov 2023 00:23:16 +0800 Subject: [PATCH 1118/1397] nbd: fix null-ptr-dereference while accessing 'nbd->config' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c2da049f419417808466c529999170f5c3ef7d3d ] Memory reordering may occur in nbd_genl_connect(), causing config_refs to be set to 1 while nbd->config is still empty. Opening nbd at this time will cause null-ptr-dereference. T1 T2 nbd_open nbd_get_config_unlocked nbd_genl_connect nbd_alloc_and_init_config //memory reordered refcount_set(&nbd->config_refs, 1) // 2 nbd->config ->null point nbd->config = config // 1 Fix it by adding smp barrier to guarantee the execution sequence. Signed-off-by: Li Nan Reviewed-by: Josef Bacik Link: https://lore.kernel.org/r/20231116162316.1740402-4-linan666@huaweicloud.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/nbd.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index daaf8805e876..3f03cb3dc33c 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -397,8 +397,16 @@ static u32 req_to_nbd_cmd_type(struct request *req) static struct nbd_config *nbd_get_config_unlocked(struct nbd_device *nbd) { - if (refcount_inc_not_zero(&nbd->config_refs)) + if (refcount_inc_not_zero(&nbd->config_refs)) { + /* + * Add smp_mb__after_atomic to ensure that reading nbd->config_refs + * and reading nbd->config is ordered. The pair is the barrier in + * nbd_alloc_and_init_config(), avoid nbd->config_refs is set + * before nbd->config. + */ + smp_mb__after_atomic(); return nbd->config; + } return NULL; } @@ -1559,7 +1567,15 @@ static int nbd_alloc_and_init_config(struct nbd_device *nbd) init_waitqueue_head(&config->conn_wait); config->blksize_bits = NBD_DEF_BLKSIZE_BITS; atomic_set(&config->live_connections, 0); + nbd->config = config; + /* + * Order refcount_set(&nbd->config_refs, 1) and nbd->config assignment, + * its pair is the barrier in nbd_get_config_unlocked(). + * So nbd_get_config_unlocked() won't see nbd->config as null after + * refcount_inc_not_zero() succeed. + */ + smp_mb__before_atomic(); refcount_set(&nbd->config_refs, 1); return 0; From 89fc9028e86e9968294ea4cb66d94fa7e5438871 Mon Sep 17 00:00:00 2001 From: Mark O'Donovan Date: Wed, 11 Oct 2023 08:45:12 +0000 Subject: [PATCH 1119/1397] nvme-auth: set explanation code for failure2 msgs [ Upstream commit 38ce1570e2c46e7e9af983aa337edd7e43723aa2 ] Some error cases were not setting an auth-failure-reason-code-explanation. This means an AUTH_Failure2 message will be sent with an explanation value of 0 which is a reserved value. Signed-off-by: Mark O'Donovan Reviewed-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/host/auth.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index 064592a5d546..811541ce206b 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -840,6 +840,8 @@ static void nvme_queue_auth_work(struct work_struct *work) } fail2: + if (chap->status == 0) + chap->status = NVME_AUTH_DHCHAP_FAILURE_FAILED; dev_dbg(ctrl->device, "%s: qid %d send failure2, status %x\n", __func__, chap->qid, chap->status); tl = nvme_auth_set_dhchap_failure2_data(ctrl, chap); From 9514925a9abc4e26ce58ec93f70bfb565b53bd72 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 14 Nov 2023 14:27:01 +0100 Subject: [PATCH 1120/1397] nvme: catch errors from nvme_configure_metadata() [ Upstream commit cd9aed606088d36a7ffff3e808db4e76b1854285 ] nvme_configure_metadata() is issuing I/O, so we might incur an I/O error which will cause the connection to be reset. But in that case any further probing will race with reset and cause UAF errors. So return a status from nvme_configure_metadata() and abort probing if there was an I/O error. Signed-off-by: Hannes Reinecke Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/host/core.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c09048984a27..d5c8b0a08d49 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1813,16 +1813,18 @@ static int nvme_init_ms(struct nvme_ns *ns, struct nvme_id_ns *id) return ret; } -static void nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) +static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) { struct nvme_ctrl *ctrl = ns->ctrl; + int ret; - if (nvme_init_ms(ns, id)) - return; + ret = nvme_init_ms(ns, id); + if (ret) + return ret; ns->features &= ~(NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS); if (!ns->ms || !(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) - return; + return 0; if (ctrl->ops->flags & NVME_F_FABRICS) { /* @@ -1831,7 +1833,7 @@ static void nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) * remap the separate metadata buffer from the block layer. */ if (WARN_ON_ONCE(!(id->flbas & NVME_NS_FLBAS_META_EXT))) - return; + return 0; ns->features |= NVME_NS_EXT_LBAS; @@ -1858,6 +1860,7 @@ static void nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) else ns->features |= NVME_NS_METADATA_SUPPORTED; } + return 0; } static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, @@ -2038,7 +2041,11 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, ns->lba_shift = id->lbaf[lbaf].ds; nvme_set_queue_limits(ns->ctrl, ns->queue); - nvme_configure_metadata(ns, id); + ret = nvme_configure_metadata(ns, id); + if (ret < 0) { + blk_mq_unfreeze_queue(ns->disk->queue); + goto out; + } nvme_set_chunk_sectors(ns, id); nvme_update_disk_info(ns->disk, ns, id); From 0ade0b82faf7b8dec11075fe8ea38a7dba898042 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Tue, 21 Nov 2023 04:06:53 +0200 Subject: [PATCH 1121/1397] selftests/bpf: fix bpf_loop_bench for new callback verification scheme MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f40bfd1679446b22d321e64a1fa98b7d07d2be08 ] This is a preparatory change. A follow-up patch "bpf: verify callbacks as if they are called unknown number of times" changes logic for callbacks handling. While previously callbacks were verified as a single function call, new scheme takes into account that callbacks could be executed unknown number of times. This has dire implications for bpf_loop_bench: SEC("fentry/" SYS_PREFIX "sys_getpgid") int benchmark(void *ctx) { for (int i = 0; i < 1000; i++) { bpf_loop(nr_loops, empty_callback, NULL, 0); __sync_add_and_fetch(&hits, nr_loops); } return 0; } W/o callbacks change verifier sees it as a 1000 calls to empty_callback(). However, with callbacks change things become exponential: - i=0: state exploring empty_callback is scheduled with i=0 (a); - i=1: state exploring empty_callback is scheduled with i=1; ... - i=999: state exploring empty_callback is scheduled with i=999; - state (a) is popped from stack; - i=1: state exploring empty_callback is scheduled with i=1; ... Avoid this issue by rewriting outer loop as bpf_loop(). Unfortunately, this adds a function call to a loop at runtime, which negatively affects performance: throughput latency before: 149.919 ± 0.168 M ops/s, 6.670 ns/op after : 137.040 ± 0.187 M ops/s, 7.297 ns/op Acked-by: Andrii Nakryiko Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20231121020701.26440-4-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/progs/bpf_loop_bench.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/bpf_loop_bench.c b/tools/testing/selftests/bpf/progs/bpf_loop_bench.c index 4ce76eb064c4..d461746fd3c1 100644 --- a/tools/testing/selftests/bpf/progs/bpf_loop_bench.c +++ b/tools/testing/selftests/bpf/progs/bpf_loop_bench.c @@ -15,13 +15,16 @@ static int empty_callback(__u32 index, void *data) return 0; } +static int outer_loop(__u32 index, void *data) +{ + bpf_loop(nr_loops, empty_callback, NULL, 0); + __sync_add_and_fetch(&hits, nr_loops); + return 0; +} + SEC("fentry/" SYS_PREFIX "sys_getpgid") int benchmark(void *ctx) { - for (int i = 0; i < 1000; i++) { - bpf_loop(nr_loops, empty_callback, NULL, 0); - - __sync_add_and_fetch(&hits, nr_loops); - } + bpf_loop(1000, outer_loop, NULL, 0); return 0; } From 03372601f5f1f143e759b611d93c0f2c2cb0b0b3 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 21 Nov 2023 15:03:25 +0800 Subject: [PATCH 1122/1397] LoongArch: Add dependency between vmlinuz.efi and vmlinux.efi MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d3ec75bc635cb0cb8185b63293d33a3d1b942d22 ] A common issue in Makefile is a race in parallel building. You need to be careful to prevent multiple threads from writing to the same file simultaneously. Commit 3939f3345050 ("ARM: 8418/1: add boot image dependencies to not generate invalid images") addressed such a bad scenario. A similar symptom occurs with the following command: $ make -j$(nproc) ARCH=loongarch vmlinux.efi vmlinuz.efi [ snip ] SORTTAB vmlinux OBJCOPY arch/loongarch/boot/vmlinux.efi OBJCOPY arch/loongarch/boot/vmlinux.efi PAD arch/loongarch/boot/vmlinux.bin GZIP arch/loongarch/boot/vmlinuz OBJCOPY arch/loongarch/boot/vmlinuz.o LD arch/loongarch/boot/vmlinuz.efi.elf OBJCOPY arch/loongarch/boot/vmlinuz.efi The log "OBJCOPY arch/loongarch/boot/vmlinux.efi" is displayed twice. It indicates that two threads simultaneously enter arch/loongarch/boot/ and write to arch/loongarch/boot/vmlinux.efi. It occasionally leads to a build failure: $ make -j$(nproc) ARCH=loongarch vmlinux.efi vmlinuz.efi [ snip ] SORTTAB vmlinux OBJCOPY arch/loongarch/boot/vmlinux.efi PAD arch/loongarch/boot/vmlinux.bin truncate: Invalid number: ‘arch/loongarch/boot/vmlinux.bin’ make[2]: *** [drivers/firmware/efi/libstub/Makefile.zboot:13: arch/loongarch/boot/vmlinux.bin] Error 1 make[2]: *** Deleting file 'arch/loongarch/boot/vmlinux.bin' make[1]: *** [arch/loongarch/Makefile:146: vmlinuz.efi] Error 2 make[1]: *** Waiting for unfinished jobs.... make: *** [Makefile:234: __sub-make] Error 2 vmlinuz.efi depends on vmlinux.efi, but such a dependency is not specified in arch/loongarch/Makefile. Signed-off-by: Masahiro Yamada Signed-off-by: Huacai Chen Signed-off-by: Sasha Levin --- arch/loongarch/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index fb0fada43197..96747bfec1a1 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -142,6 +142,8 @@ vdso_install: all: $(notdir $(KBUILD_IMAGE)) +vmlinuz.efi: vmlinux.efi + vmlinux.elf vmlinux.efi vmlinuz.efi: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(bootvars-y) $(boot)/$@ From ab3f300524697919f64ae920e904d0836b4057b0 Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Tue, 21 Nov 2023 15:03:25 +0800 Subject: [PATCH 1123/1397] LoongArch: Record pc instead of offset in la_abs relocation [ Upstream commit aa0cbc1b506b090c3a775b547c693ada108cc0d7 ] To clarify, the previous version functioned flawlessly. However, it's worth noting that the LLVM's LoongArch backend currently lacks support for cross-section label calculations. With this patch, we enable the use of clang to compile relocatable kernels. Tested-by: Nathan Chancellor Signed-off-by: WANG Rui Signed-off-by: Huacai Chen Signed-off-by: Sasha Levin --- arch/loongarch/include/asm/asmmacro.h | 3 +-- arch/loongarch/include/asm/setup.h | 2 +- arch/loongarch/kernel/relocate.c | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/loongarch/include/asm/asmmacro.h b/arch/loongarch/include/asm/asmmacro.h index c9544f358c33..655db7d7a427 100644 --- a/arch/loongarch/include/asm/asmmacro.h +++ b/arch/loongarch/include/asm/asmmacro.h @@ -609,8 +609,7 @@ lu32i.d \reg, 0 lu52i.d \reg, \reg, 0 .pushsection ".la_abs", "aw", %progbits - 768: - .dword 768b-766b + .dword 766b .dword \sym .popsection #endif diff --git a/arch/loongarch/include/asm/setup.h b/arch/loongarch/include/asm/setup.h index a0bc159ce8bd..ee52fb1e9963 100644 --- a/arch/loongarch/include/asm/setup.h +++ b/arch/loongarch/include/asm/setup.h @@ -25,7 +25,7 @@ extern void set_merr_handler(unsigned long offset, void *addr, unsigned long len #ifdef CONFIG_RELOCATABLE struct rela_la_abs { - long offset; + long pc; long symvalue; }; diff --git a/arch/loongarch/kernel/relocate.c b/arch/loongarch/kernel/relocate.c index 6c3eff9af9fb..288b739ca88d 100644 --- a/arch/loongarch/kernel/relocate.c +++ b/arch/loongarch/kernel/relocate.c @@ -52,7 +52,7 @@ static inline void __init relocate_absolute(long random_offset) for (p = begin; (void *)p < end; p++) { long v = p->symvalue; uint32_t lu12iw, ori, lu32id, lu52id; - union loongarch_instruction *insn = (void *)p - p->offset; + union loongarch_instruction *insn = (void *)p->pc; lu12iw = (v >> 12) & 0xfffff; ori = v & 0xfff; From 71d8348cca92fa423538df047771653bf14f7810 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 21 Nov 2023 15:03:25 +0800 Subject: [PATCH 1124/1397] LoongArch: Silence the boot warning about 'nokaslr' [ Upstream commit 902d75cdf0cf0a3fb58550089ee519abf12566f5 ] The kernel parameter 'nokaslr' is handled before start_kernel(), so we don't need early_param() to mark it technically. But it can cause a boot warning as follows: Unknown kernel command line parameters "nokaslr", will be passed to user space. When we use 'init=/bin/bash', 'nokaslr' which passed to user space will even cause a kernel panic. So we use early_param() to mark 'nokaslr', simply print a notice and silence the boot warning (also fix a potential panic). This logic is similar to RISC-V. Signed-off-by: Huacai Chen Signed-off-by: Sasha Levin --- arch/loongarch/kernel/relocate.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/loongarch/kernel/relocate.c b/arch/loongarch/kernel/relocate.c index 288b739ca88d..1acfa704c8d0 100644 --- a/arch/loongarch/kernel/relocate.c +++ b/arch/loongarch/kernel/relocate.c @@ -102,6 +102,14 @@ static inline __init unsigned long get_random_boot(void) return hash; } +static int __init nokaslr(char *p) +{ + pr_info("KASLR is disabled.\n"); + + return 0; /* Print a notice and silence the boot warning */ +} +early_param("nokaslr", nokaslr); + static inline __init bool kaslr_disabled(void) { char *str; From c28fec461df3869355c95b6baed3c36aa870f1fe Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 21 Nov 2023 15:03:25 +0800 Subject: [PATCH 1125/1397] LoongArch: Mark {dmw,tlb}_virt_to_page() exports as non-GPL [ Upstream commit 19d86a496233731882aea7ec24505ce6641b1c0c ] Mark {dmw,tlb}_virt_to_page() exports as non-GPL, in order to let out-of-tree modules (e.g. OpenZFS) be built without errors. Otherwise we get: ERROR: modpost: GPL-incompatible module zfs.ko uses GPL-only symbol 'dmw_virt_to_page' ERROR: modpost: GPL-incompatible module zfs.ko uses GPL-only symbol 'tlb_virt_to_page' Reported-by: Haowu Ge Signed-off-by: Huacai Chen Signed-off-by: Sasha Levin --- arch/loongarch/mm/pgtable.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/mm/pgtable.c b/arch/loongarch/mm/pgtable.c index 71d0539e2d0b..2aae72e63871 100644 --- a/arch/loongarch/mm/pgtable.c +++ b/arch/loongarch/mm/pgtable.c @@ -13,13 +13,13 @@ struct page *dmw_virt_to_page(unsigned long kaddr) { return pfn_to_page(virt_to_pfn(kaddr)); } -EXPORT_SYMBOL_GPL(dmw_virt_to_page); +EXPORT_SYMBOL(dmw_virt_to_page); struct page *tlb_virt_to_page(unsigned long kaddr) { return pfn_to_page(pte_pfn(*virt_to_kpte(kaddr))); } -EXPORT_SYMBOL_GPL(tlb_virt_to_page); +EXPORT_SYMBOL(tlb_virt_to_page); pgd_t *pgd_alloc(struct mm_struct *mm) { From f61771aec67660c24b50202b58861cc8ecc11382 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Tue, 21 Nov 2023 15:03:25 +0800 Subject: [PATCH 1126/1397] LoongArch: Implement constant timer shutdown interface [ Upstream commit d43f37b73468c172bc89ac4824a1511b411f0778 ] When a cpu is hot-unplugged, it is put in idle state and the function arch_cpu_idle_dead() is called. The timer interrupt for this processor should be disabled, otherwise there will be pending timer interrupt for the unplugged cpu, so that vcpu is prevented from giving up scheduling when system is running in vm mode. This patch implements the timer shutdown interface so that the constant timer will be properly disabled when a CPU is hot-unplugged. Reviewed-by: WANG Xuerui Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen Signed-off-by: Sasha Levin --- arch/loongarch/kernel/time.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c index 3064af94db9c..e7015f7b70e3 100644 --- a/arch/loongarch/kernel/time.c +++ b/arch/loongarch/kernel/time.c @@ -58,14 +58,16 @@ static int constant_set_state_oneshot(struct clock_event_device *evt) return 0; } -static int constant_set_state_oneshot_stopped(struct clock_event_device *evt) +static int constant_set_state_periodic(struct clock_event_device *evt) { + unsigned long period; unsigned long timer_config; raw_spin_lock(&state_lock); - timer_config = csr_read64(LOONGARCH_CSR_TCFG); - timer_config &= ~CSR_TCFG_EN; + period = const_clock_freq / HZ; + timer_config = period & CSR_TCFG_VAL; + timer_config |= (CSR_TCFG_PERIOD | CSR_TCFG_EN); csr_write64(timer_config, LOONGARCH_CSR_TCFG); raw_spin_unlock(&state_lock); @@ -73,16 +75,14 @@ static int constant_set_state_oneshot_stopped(struct clock_event_device *evt) return 0; } -static int constant_set_state_periodic(struct clock_event_device *evt) +static int constant_set_state_shutdown(struct clock_event_device *evt) { - unsigned long period; unsigned long timer_config; raw_spin_lock(&state_lock); - period = const_clock_freq / HZ; - timer_config = period & CSR_TCFG_VAL; - timer_config |= (CSR_TCFG_PERIOD | CSR_TCFG_EN); + timer_config = csr_read64(LOONGARCH_CSR_TCFG); + timer_config &= ~CSR_TCFG_EN; csr_write64(timer_config, LOONGARCH_CSR_TCFG); raw_spin_unlock(&state_lock); @@ -90,11 +90,6 @@ static int constant_set_state_periodic(struct clock_event_device *evt) return 0; } -static int constant_set_state_shutdown(struct clock_event_device *evt) -{ - return 0; -} - static int constant_timer_next_event(unsigned long delta, struct clock_event_device *evt) { unsigned long timer_config; @@ -161,7 +156,7 @@ int constant_clockevent_init(void) cd->rating = 320; cd->cpumask = cpumask_of(cpu); cd->set_state_oneshot = constant_set_state_oneshot; - cd->set_state_oneshot_stopped = constant_set_state_oneshot_stopped; + cd->set_state_oneshot_stopped = constant_set_state_shutdown; cd->set_state_periodic = constant_set_state_periodic; cd->set_state_shutdown = constant_set_state_shutdown; cd->set_next_event = constant_timer_next_event; From 086f91f3ce3b8d13cb794b955002b74444a384fa Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 20 Nov 2023 17:07:56 +0200 Subject: [PATCH 1127/1397] platform/x86: intel_telemetry: Fix kernel doc descriptions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a6584711e64d9d12ab79a450ec3628fd35e4f476 ] LKP found issues with a kernel doc in the driver: core.c:116: warning: Function parameter or member 'ioss_evtconfig' not described in 'telemetry_update_events' core.c:188: warning: Function parameter or member 'ioss_evtconfig' not described in 'telemetry_get_eventconfig' It looks like it were copy'n'paste typos when these descriptions had been introduced. Fix the typos. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202310070743.WALmRGSY-lkp@intel.com/ Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20231120150756.1661425-1-andriy.shevchenko@linux.intel.com Reviewed-by: Rajneesh Bhardwaj Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen Signed-off-by: Sasha Levin --- drivers/platform/x86/intel/telemetry/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/intel/telemetry/core.c b/drivers/platform/x86/intel/telemetry/core.c index fdf55b5d6948..e4be40f73eeb 100644 --- a/drivers/platform/x86/intel/telemetry/core.c +++ b/drivers/platform/x86/intel/telemetry/core.c @@ -102,7 +102,7 @@ static const struct telemetry_core_ops telm_defpltops = { /** * telemetry_update_events() - Update telemetry Configuration * @pss_evtconfig: PSS related config. No change if num_evts = 0. - * @pss_evtconfig: IOSS related config. No change if num_evts = 0. + * @ioss_evtconfig: IOSS related config. No change if num_evts = 0. * * This API updates the IOSS & PSS Telemetry configuration. Old config * is overwritten. Call telemetry_reset_events when logging is over @@ -176,7 +176,7 @@ EXPORT_SYMBOL_GPL(telemetry_reset_events); /** * telemetry_get_eventconfig() - Returns the pss and ioss events enabled * @pss_evtconfig: Pointer to PSS related configuration. - * @pss_evtconfig: Pointer to IOSS related configuration. + * @ioss_evtconfig: Pointer to IOSS related configuration. * @pss_len: Number of u32 elements allocated for pss_evtconfig array * @ioss_len: Number of u32 elements allocated for ioss_evtconfig array * From 2afe67cfe8f121b1b9dd244a0edc01b89cd65813 Mon Sep 17 00:00:00 2001 From: Hamish Martin Date: Wed, 25 Oct 2023 16:55:10 +1300 Subject: [PATCH 1128/1397] HID: mcp2221: Set driver data before I2C adapter add [ Upstream commit f2d4a5834638bbc967371b9168c0b481519f7c5e ] The process of adding an I2C adapter can invoke I2C accesses on that new adapter (see i2c_detect()). Ensure we have set the adapter's driver data to avoid null pointer dereferences in the xfer functions during the adapter add. This has been noted in the past and the same fix proposed but not completed. See: https://lore.kernel.org/lkml/ef597e73-ed71-168e-52af-0d19b03734ac@vigem.de/ Signed-off-by: Hamish Martin Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-mcp2221.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-mcp2221.c b/drivers/hid/hid-mcp2221.c index 72883e0ce757..b95f31cf0fa2 100644 --- a/drivers/hid/hid-mcp2221.c +++ b/drivers/hid/hid-mcp2221.c @@ -1157,12 +1157,12 @@ static int mcp2221_probe(struct hid_device *hdev, snprintf(mcp->adapter.name, sizeof(mcp->adapter.name), "MCP2221 usb-i2c bridge"); + i2c_set_adapdata(&mcp->adapter, mcp); ret = devm_i2c_add_adapter(&hdev->dev, &mcp->adapter); if (ret) { hid_err(hdev, "can't add usb-i2c adapter: %d\n", ret); return ret; } - i2c_set_adapdata(&mcp->adapter, mcp); #if IS_REACHABLE(CONFIG_GPIOLIB) /* Setup GPIO chip */ From d4b50ac06ea6e47bdd2b04e97d1090c673c80645 Mon Sep 17 00:00:00 2001 From: Hamish Martin Date: Wed, 25 Oct 2023 16:55:11 +1300 Subject: [PATCH 1129/1397] HID: mcp2221: Allow IO to start during probe [ Upstream commit 73ce9f1f2741a38f5d27393e627702ae2c46e6f2 ] During the probe we add an I2C adapter and as soon as we add that adapter it may be used for a transfer (e.g via the code in i2cdetect()). Those transfers are not able to complete and time out. This is because the HID raw_event callback (mcp2221_raw_event) will not be invoked until the HID device's 'driver_input_lock' is marked up at the completion of the probe in hid_device_probe(). This starves the driver of the responses it is waiting for. In order to allow the I2C transfers to complete while we are still in the probe, start the IO once we have completed init of the HID device. This issue seems to have been seen before and a patch was submitted but it seems it was never accepted. See: https://lore.kernel.org/all/20221103222714.21566-3-Enrik.Berkhan@inka.de/ Signed-off-by: Hamish Martin Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-mcp2221.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/hid-mcp2221.c b/drivers/hid/hid-mcp2221.c index b95f31cf0fa2..aef0785c91cc 100644 --- a/drivers/hid/hid-mcp2221.c +++ b/drivers/hid/hid-mcp2221.c @@ -1142,6 +1142,8 @@ static int mcp2221_probe(struct hid_device *hdev, if (ret) return ret; + hid_device_io_start(hdev); + /* Set I2C bus clock diviser */ if (i2c_clk_freq > 400) i2c_clk_freq = 400; From 6b3507b8ea55aae5377aaccd6d9cb9f6e0204136 Mon Sep 17 00:00:00 2001 From: Yihong Cao Date: Mon, 30 Oct 2023 01:05:38 +0800 Subject: [PATCH 1130/1397] HID: apple: add Jamesdonkey and A3R to non-apple keyboards list [ Upstream commit 113f736655e4f20633e107d731dd5bd097d5938c ] Jamesdonkey A3R keyboard is identified as "Jamesdonkey A3R" in wired mode, "A3R-U" in wireless mode and "A3R" in bluetooth mode. Adding them to non-apple keyboards fixes function key. Signed-off-by: Yihong Cao Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-apple.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c index 3ca45975c686..d9e9829b2200 100644 --- a/drivers/hid/hid-apple.c +++ b/drivers/hid/hid-apple.c @@ -345,6 +345,8 @@ static const struct apple_non_apple_keyboard non_apple_keyboards[] = { { "AONE" }, { "GANSS" }, { "Hailuck" }, + { "Jamesdonkey" }, + { "A3R" }, }; static bool apple_is_non_apple_keyboard(struct hid_device *hdev) From c38f7b0f554f0403eb90516332a9b70a64cadc75 Mon Sep 17 00:00:00 2001 From: Brett Raye Date: Thu, 2 Nov 2023 18:10:38 -0700 Subject: [PATCH 1131/1397] HID: glorious: fix Glorious Model I HID report [ Upstream commit a5e913c25b6b2b6ae02acef6d9400645ac03dfdf ] The Glorious Model I mouse has a buggy HID report descriptor for its keyboard endpoint (used for programmable buttons). For report ID 2, there is a mismatch between Logical Minimum and Usage Minimum in the array that reports keycodes. The offending portion of the descriptor: (from hid-decode) 0x95, 0x05, // Report Count (5) 30 0x75, 0x08, // Report Size (8) 32 0x15, 0x00, // Logical Minimum (0) 34 0x25, 0x65, // Logical Maximum (101) 36 0x05, 0x07, // Usage Page (Keyboard) 38 0x19, 0x01, // Usage Minimum (1) 40 0x29, 0x65, // Usage Maximum (101) 42 0x81, 0x00, // Input (Data,Arr,Abs) 44 This bug shifts all programmed keycodes up by 1. Importantly, this causes "empty" array indexes of 0x00 to be interpreted as 0x01, ErrorRollOver. The presence of ErrorRollOver causes the system to ignore all keypresses from the endpoint and breaks the ability to use the programmable buttons. Setting byte 41 to 0x00 fixes this, and causes keycodes to be interpreted correctly. Also, USB_VENDOR_ID_GLORIOUS is changed to USB_VENDOR_ID_SINOWEALTH, and a new ID for Laview Technology is added. Glorious seems to be white-labeling controller boards or mice from these vendors. There isn't a single canonical vendor ID for Glorious products. Signed-off-by: Brett Raye Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-glorious.c | 16 ++++++++++++++-- drivers/hid/hid-ids.h | 11 +++++++---- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/drivers/hid/hid-glorious.c b/drivers/hid/hid-glorious.c index 558eb08c19ef..281b3a7187ce 100644 --- a/drivers/hid/hid-glorious.c +++ b/drivers/hid/hid-glorious.c @@ -21,6 +21,10 @@ MODULE_DESCRIPTION("HID driver for Glorious PC Gaming Race mice"); * Glorious Model O and O- specify the const flag in the consumer input * report descriptor, which leads to inputs being ignored. Fix this * by patching the descriptor. + * + * Glorious Model I incorrectly specifes the Usage Minimum for its + * keyboard HID report, causing keycodes to be misinterpreted. + * Fix this by setting Usage Minimum to 0 in that report. */ static __u8 *glorious_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) @@ -32,6 +36,10 @@ static __u8 *glorious_report_fixup(struct hid_device *hdev, __u8 *rdesc, rdesc[85] = rdesc[113] = rdesc[141] = \ HID_MAIN_ITEM_VARIABLE | HID_MAIN_ITEM_RELATIVE; } + if (*rsize == 156 && rdesc[41] == 1) { + hid_info(hdev, "patching Glorious Model I keyboard report descriptor\n"); + rdesc[41] = 0; + } return rdesc; } @@ -44,6 +52,8 @@ static void glorious_update_name(struct hid_device *hdev) model = "Model O"; break; case USB_DEVICE_ID_GLORIOUS_MODEL_D: model = "Model D"; break; + case USB_DEVICE_ID_GLORIOUS_MODEL_I: + model = "Model I"; break; } snprintf(hdev->name, sizeof(hdev->name), "%s %s", "Glorious", model); @@ -66,10 +76,12 @@ static int glorious_probe(struct hid_device *hdev, } static const struct hid_device_id glorious_devices[] = { - { HID_USB_DEVICE(USB_VENDOR_ID_GLORIOUS, + { HID_USB_DEVICE(USB_VENDOR_ID_SINOWEALTH, USB_DEVICE_ID_GLORIOUS_MODEL_O) }, - { HID_USB_DEVICE(USB_VENDOR_ID_GLORIOUS, + { HID_USB_DEVICE(USB_VENDOR_ID_SINOWEALTH, USB_DEVICE_ID_GLORIOUS_MODEL_D) }, + { HID_USB_DEVICE(USB_VENDOR_ID_LAVIEW, + USB_DEVICE_ID_GLORIOUS_MODEL_I) }, { } }; MODULE_DEVICE_TABLE(hid, glorious_devices); diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 97ab317570dd..72046039d1be 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -511,10 +511,6 @@ #define USB_DEVICE_ID_GENERAL_TOUCH_WIN8_PIT_010A 0x010a #define USB_DEVICE_ID_GENERAL_TOUCH_WIN8_PIT_E100 0xe100 -#define USB_VENDOR_ID_GLORIOUS 0x258a -#define USB_DEVICE_ID_GLORIOUS_MODEL_D 0x0033 -#define USB_DEVICE_ID_GLORIOUS_MODEL_O 0x0036 - #define I2C_VENDOR_ID_GOODIX 0x27c6 #define I2C_DEVICE_ID_GOODIX_01F0 0x01f0 @@ -746,6 +742,9 @@ #define USB_DEVICE_ID_LABTEC_WIRELESS_KEYBOARD 0x0006 #define USB_DEVICE_ID_LABTEC_ODDOR_HANDBRAKE 0x8888 +#define USB_VENDOR_ID_LAVIEW 0x22D4 +#define USB_DEVICE_ID_GLORIOUS_MODEL_I 0x1503 + #define USB_VENDOR_ID_LCPOWER 0x1241 #define USB_DEVICE_ID_LCPOWER_LC1000 0xf767 @@ -1160,6 +1159,10 @@ #define USB_VENDOR_ID_SIGMATEL 0x066F #define USB_DEVICE_ID_SIGMATEL_STMP3780 0x3780 +#define USB_VENDOR_ID_SINOWEALTH 0x258a +#define USB_DEVICE_ID_GLORIOUS_MODEL_D 0x0033 +#define USB_DEVICE_ID_GLORIOUS_MODEL_O 0x0036 + #define USB_VENDOR_ID_SIS_TOUCH 0x0457 #define USB_DEVICE_ID_SIS9200_TOUCH 0x9200 #define USB_DEVICE_ID_SIS817_TOUCH 0x0817 From af48c4099bd86ae0b600eabfc3911f0923526525 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 14 Nov 2023 15:54:30 +0100 Subject: [PATCH 1132/1397] HID: add ALWAYS_POLL quirk for Apple kb [ Upstream commit c55092187d9ad7b2f8f5a8645286fa03997d442f ] These devices disconnect if suspended without remote wakeup. They can operate with the standard driver. Signed-off-by: Oliver Neukum Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index d9a4f8f7bbb0..e0bbf0c6345d 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -33,6 +33,7 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_AKAI, USB_DEVICE_ID_AKAI_MPKMINI2), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_ALPS, USB_DEVICE_ID_IBM_GAMEPAD), HID_QUIRK_BADPAD }, { HID_USB_DEVICE(USB_VENDOR_ID_AMI, USB_DEVICE_ID_AMI_VIRT_KEYBOARD_AND_MOUSE), HID_QUIRK_ALWAYS_POLL }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ANSI), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_2PORTKVM), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_4PORTKVMC), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_4PORTKVM), HID_QUIRK_NOGET }, From b3ebe19c197b4dd8731de0ec7fd31fe9c12c774d Mon Sep 17 00:00:00 2001 From: Li Nan Date: Mon, 11 Sep 2023 10:33:08 +0800 Subject: [PATCH 1133/1397] nbd: pass nbd_sock to nbd_read_reply() instead of index [ Upstream commit 98c598afc22d4e43c2ad91860b65996d0c099a5d ] If a socket is processing ioctl 'NBD_SET_SOCK', config->socks might be krealloc in nbd_add_socket(), and a garbage request is received now, a UAF may occurs. T1 nbd_ioctl __nbd_ioctl nbd_add_socket blk_mq_freeze_queue T2 recv_work nbd_read_reply sock_xmit krealloc config->socks def config->socks Pass nbd_sock to nbd_read_reply(). And introduce a new function sock_xmit_recv(), which differs from sock_xmit only in the way it get socket. ================================================================== BUG: KASAN: use-after-free in sock_xmit+0x525/0x550 Read of size 8 at addr ffff8880188ec428 by task kworker/u12:1/18779 Workqueue: knbd4-recv recv_work Call Trace: __dump_stack dump_stack+0xbe/0xfd print_address_description.constprop.0+0x19/0x170 __kasan_report.cold+0x6c/0x84 kasan_report+0x3a/0x50 sock_xmit+0x525/0x550 nbd_read_reply+0xfe/0x2c0 recv_work+0x1c2/0x750 process_one_work+0x6b6/0xf10 worker_thread+0xdd/0xd80 kthread+0x30a/0x410 ret_from_fork+0x22/0x30 Allocated by task 18784: kasan_save_stack+0x1b/0x40 kasan_set_track set_alloc_info __kasan_kmalloc __kasan_kmalloc.constprop.0+0xf0/0x130 slab_post_alloc_hook slab_alloc_node slab_alloc __kmalloc_track_caller+0x157/0x550 __do_krealloc krealloc+0x37/0xb0 nbd_add_socket +0x2d3/0x880 __nbd_ioctl nbd_ioctl+0x584/0x8e0 __blkdev_driver_ioctl blkdev_ioctl+0x2a0/0x6e0 block_ioctl+0xee/0x130 vfs_ioctl __do_sys_ioctl __se_sys_ioctl+0x138/0x190 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x61/0xc6 Freed by task 18784: kasan_save_stack+0x1b/0x40 kasan_set_track+0x1c/0x30 kasan_set_free_info+0x20/0x40 __kasan_slab_free.part.0+0x13f/0x1b0 slab_free_hook slab_free_freelist_hook slab_free kfree+0xcb/0x6c0 krealloc+0x56/0xb0 nbd_add_socket+0x2d3/0x880 __nbd_ioctl nbd_ioctl+0x584/0x8e0 __blkdev_driver_ioctl blkdev_ioctl+0x2a0/0x6e0 block_ioctl+0xee/0x130 vfs_ioctl __do_sys_ioctl __se_sys_ioctl+0x138/0x190 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x61/0xc6 Signed-off-by: Li Nan Reviewed-by: Yu Kuai Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20230911023308.3467802-1-linan666@huaweicloud.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/nbd.c | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 3f03cb3dc33c..b6414e1e645b 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -67,6 +67,7 @@ struct nbd_sock { struct recv_thread_args { struct work_struct work; struct nbd_device *nbd; + struct nbd_sock *nsock; int index; }; @@ -505,15 +506,9 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req) return BLK_EH_DONE; } -/* - * Send or receive packet. Return a positive value on success and - * negtive value on failue, and never return 0. - */ -static int sock_xmit(struct nbd_device *nbd, int index, int send, - struct iov_iter *iter, int msg_flags, int *sent) +static int __sock_xmit(struct nbd_device *nbd, struct socket *sock, int send, + struct iov_iter *iter, int msg_flags, int *sent) { - struct nbd_config *config = nbd->config; - struct socket *sock = config->socks[index]->sock; int result; struct msghdr msg; unsigned int noreclaim_flag; @@ -556,6 +551,19 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send, return result; } +/* + * Send or receive packet. Return a positive value on success and + * negtive value on failure, and never return 0. + */ +static int sock_xmit(struct nbd_device *nbd, int index, int send, + struct iov_iter *iter, int msg_flags, int *sent) +{ + struct nbd_config *config = nbd->config; + struct socket *sock = config->socks[index]->sock; + + return __sock_xmit(nbd, sock, send, iter, msg_flags, sent); +} + /* * Different settings for sk->sk_sndtimeo can result in different return values * if there is a signal pending when we enter sendmsg, because reasons? @@ -712,7 +720,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) return 0; } -static int nbd_read_reply(struct nbd_device *nbd, int index, +static int nbd_read_reply(struct nbd_device *nbd, struct socket *sock, struct nbd_reply *reply) { struct kvec iov = {.iov_base = reply, .iov_len = sizeof(*reply)}; @@ -721,7 +729,7 @@ static int nbd_read_reply(struct nbd_device *nbd, int index, reply->magic = 0; iov_iter_kvec(&to, ITER_DEST, &iov, 1, sizeof(*reply)); - result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL); + result = __sock_xmit(nbd, sock, 0, &to, MSG_WAITALL, NULL); if (result < 0) { if (!nbd_disconnected(nbd->config)) dev_err(disk_to_dev(nbd->disk), @@ -845,14 +853,14 @@ static void recv_work(struct work_struct *work) struct nbd_device *nbd = args->nbd; struct nbd_config *config = nbd->config; struct request_queue *q = nbd->disk->queue; - struct nbd_sock *nsock; + struct nbd_sock *nsock = args->nsock; struct nbd_cmd *cmd; struct request *rq; while (1) { struct nbd_reply reply; - if (nbd_read_reply(nbd, args->index, &reply)) + if (nbd_read_reply(nbd, nsock->sock, &reply)) break; /* @@ -887,7 +895,6 @@ static void recv_work(struct work_struct *work) percpu_ref_put(&q->q_usage_counter); } - nsock = config->socks[args->index]; mutex_lock(&nsock->tx_lock); nbd_mark_nsock_dead(nbd, nsock, 1); mutex_unlock(&nsock->tx_lock); @@ -1231,6 +1238,7 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg) INIT_WORK(&args->work, recv_work); args->index = i; args->nbd = nbd; + args->nsock = nsock; nsock->cookie++; mutex_unlock(&nsock->tx_lock); sockfd_put(old); @@ -1413,6 +1421,7 @@ static int nbd_start_device(struct nbd_device *nbd) refcount_inc(&nbd->config_refs); INIT_WORK(&args->work, recv_work); args->nbd = nbd; + args->nsock = config->socks[i]; args->index = i; queue_work(nbd->recv_workq, &args->work); } From 9fc2827c02425c1a74233acaa258e725043d7d59 Mon Sep 17 00:00:00 2001 From: Denis Benato Date: Fri, 17 Nov 2023 14:15:56 +1300 Subject: [PATCH 1134/1397] HID: hid-asus: reset the backlight brightness level on resume [ Upstream commit 546edbd26cff7ae990e480a59150e801a06f77b1 ] Some devices managed by this driver automatically set brightness to 0 before entering a suspended state and reset it back to a default brightness level after the resume: this has the effect of having the kernel report wrong brightness status after a sleep, and on some devices (like the Asus RC71L) that brightness is the intensity of LEDs directly facing the user. Fix the above issue by setting back brightness to the level it had before entering a sleep state. Signed-off-by: Denis Benato Signed-off-by: Luke D. Jones Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-asus.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c index fd61dba88233..194a86cf30db 100644 --- a/drivers/hid/hid-asus.c +++ b/drivers/hid/hid-asus.c @@ -1000,6 +1000,24 @@ static int asus_start_multitouch(struct hid_device *hdev) return 0; } +static int __maybe_unused asus_resume(struct hid_device *hdev) { + struct asus_drvdata *drvdata = hid_get_drvdata(hdev); + int ret = 0; + + if (drvdata->kbd_backlight) { + const u8 buf[] = { FEATURE_KBD_REPORT_ID, 0xba, 0xc5, 0xc4, + drvdata->kbd_backlight->cdev.brightness }; + ret = asus_kbd_set_report(hdev, buf, sizeof(buf)); + if (ret < 0) { + hid_err(hdev, "Asus failed to set keyboard backlight: %d\n", ret); + goto asus_resume_err; + } + } + +asus_resume_err: + return ret; +} + static int __maybe_unused asus_reset_resume(struct hid_device *hdev) { struct asus_drvdata *drvdata = hid_get_drvdata(hdev); @@ -1294,6 +1312,7 @@ static struct hid_driver asus_driver = { .input_configured = asus_input_configured, #ifdef CONFIG_PM .reset_resume = asus_reset_resume, + .resume = asus_resume, #endif .event = asus_event, .raw_event = asus_raw_event From c9d25e4639c1d95d384e08d48d7b89cf690db9d1 Mon Sep 17 00:00:00 2001 From: Aoba K Date: Tue, 21 Nov 2023 20:23:11 +0800 Subject: [PATCH 1135/1397] HID: multitouch: Add quirk for HONOR GLO-GXXX touchpad [ Upstream commit 9ffccb691adb854e7b7f3ee57fbbda12ff70533f ] Honor MagicBook 13 2023 has a touchpad which do not switch to the multitouch mode until the input mode feature is written by the host. The touchpad do report the input mode at touchpad(3), while itself working under mouse mode. As a workaround, it is possible to call MT_QUIRE_FORCE_GET_FEATURE to force set feature in mt_set_input_mode for such device. The touchpad reports as BLTP7853, which cannot retrive any useful manufacture information on the internel by this string at present. As the serial number of the laptop is GLO-G52, while DMI info reports the laptop serial number as GLO-GXXX, this workaround should applied to all models which has the GLO-GXXX. Signed-off-by: Aoba K Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-multitouch.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 8db4ae05febc..5ec1f174127a 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -2048,6 +2048,11 @@ static const struct hid_device_id mt_devices[] = { MT_USB_DEVICE(USB_VENDOR_ID_HANVON_ALT, USB_DEVICE_ID_HANVON_ALT_MULTITOUCH) }, + /* HONOR GLO-GXXX panel */ + { .driver_data = MT_CLS_VTL, + HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, + 0x347d, 0x7853) }, + /* Ilitek dual touch panel */ { .driver_data = MT_CLS_NSMU, MT_USB_DEVICE(USB_VENDOR_ID_ILITEK, From 1f75542ce7c4e958386fb4932185842c3c8e1c4d Mon Sep 17 00:00:00 2001 From: Nguyen Dinh Phi Date: Tue, 21 Nov 2023 15:53:57 +0800 Subject: [PATCH 1136/1397] nfc: virtual_ncidev: Add variable to check if ndev is running [ Upstream commit 84d2db91f14a32dc856a5972e3f0907089093c7a ] syzbot reported an memory leak that happens when an skb is add to send_buff after virtual nci closed. This patch adds a variable to track if the ndev is running before handling new skb in send function. Signed-off-by: Nguyen Dinh Phi Reported-by: syzbot+6eb09d75211863f15e3e@syzkaller.appspotmail.com Closes: https://lore.kernel.org/lkml/00000000000075472b06007df4fb@google.com Reviewed-by: Bongsu Jeon Reviewed-by: Krzysztof Kozlowski Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/nfc/virtual_ncidev.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/nfc/virtual_ncidev.c b/drivers/nfc/virtual_ncidev.c index b027be0b0b6f..590b038e449e 100644 --- a/drivers/nfc/virtual_ncidev.c +++ b/drivers/nfc/virtual_ncidev.c @@ -26,10 +26,14 @@ struct virtual_nci_dev { struct mutex mtx; struct sk_buff *send_buff; struct wait_queue_head wq; + bool running; }; static int virtual_nci_open(struct nci_dev *ndev) { + struct virtual_nci_dev *vdev = nci_get_drvdata(ndev); + + vdev->running = true; return 0; } @@ -40,6 +44,7 @@ static int virtual_nci_close(struct nci_dev *ndev) mutex_lock(&vdev->mtx); kfree_skb(vdev->send_buff); vdev->send_buff = NULL; + vdev->running = false; mutex_unlock(&vdev->mtx); return 0; @@ -50,7 +55,7 @@ static int virtual_nci_send(struct nci_dev *ndev, struct sk_buff *skb) struct virtual_nci_dev *vdev = nci_get_drvdata(ndev); mutex_lock(&vdev->mtx); - if (vdev->send_buff) { + if (vdev->send_buff || !vdev->running) { mutex_unlock(&vdev->mtx); kfree_skb(skb); return -1; From 97774998f8e18dd1dfd728701ddbf82e73fa037f Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 20 Nov 2023 13:00:00 +0100 Subject: [PATCH 1137/1397] scripts/checkstack.pl: match all stack sizes for s390 [ Upstream commit aab1f809d7540def24498e81347740a7239a74d5 ] For some unknown reason the regular expression for checkstack only matches three digit numbers starting with the number "3", or any higher number. Which means that it skips any stack sizes smaller than 304 bytes. This makes the checkstack script a bit less useful than it could be. Change the script to match any number. To be filtered out stack sizes can be configured with the min_stack variable, which omits any stack frame sizes smaller than 100 bytes by default. Tested-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev Signed-off-by: Sasha Levin --- scripts/checkstack.pl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/checkstack.pl b/scripts/checkstack.pl index ac74f8629cea..f27d552aec43 100755 --- a/scripts/checkstack.pl +++ b/scripts/checkstack.pl @@ -97,8 +97,7 @@ # 11160: a7 fb ff 60 aghi %r15,-160 # or # 100092: e3 f0 ff c8 ff 71 lay %r15,-56(%r15) - $re = qr/.*(?:lay|ag?hi).*\%r15,-(([0-9]{2}|[3-9])[0-9]{2}) - (?:\(\%r15\))?$/ox; + $re = qr/.*(?:lay|ag?hi).*\%r15,-([0-9]+)(?:\(\%r15\))?$/o; } elsif ($arch eq 'sparc' || $arch eq 'sparc64') { # f0019d10: 9d e3 bf 90 save %sp, -112, %sp $re = qr/.*save.*%sp, -(([0-9]{2}|[3-9])[0-9]{2}), %sp/o; From a739ceb7474561e522250b6209f73c12f15c0b77 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 9 Nov 2023 22:22:13 -0800 Subject: [PATCH 1138/1397] asm-generic: qspinlock: fix queued_spin_value_unlocked() implementation [ Upstream commit 125b0bb95dd6bec81b806b997a4ccb026eeecf8f ] We really don't want to do atomic_read() or anything like that, since we already have the value, not the lock. The whole point of this is that we've loaded the lock from memory, and we want to check whether the value we loaded was a locked one or not. The main use of this is the lockref code, which loads both the lock and the reference count in one atomic operation, and then works on that combined value. With the atomic_read(), the compiler would pointlessly spill the value to the stack, in order to then be able to read it back "atomically". This is the qspinlock version of commit c6f4a9002252 ("asm-generic: ticket-lock: Optimize arch_spin_value_unlocked()") which fixed this same bug for ticket locks. Cc: Guo Ren Cc: Ingo Molnar Cc: Waiman Long Link: https://lore.kernel.org/all/CAHk-=whNRv0v6kQiV5QO6DJhjH4KEL36vWQ6Re8Csrnh4zbRkQ@mail.gmail.com/ Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- include/asm-generic/qspinlock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h index 995513fa2690..0655aa5b57b2 100644 --- a/include/asm-generic/qspinlock.h +++ b/include/asm-generic/qspinlock.h @@ -70,7 +70,7 @@ static __always_inline int queued_spin_is_locked(struct qspinlock *lock) */ static __always_inline int queued_spin_value_unlocked(struct qspinlock lock) { - return !atomic_read(&lock.val); + return !lock.val.counter; } /** From 6abb8c223ce12078a0f2c129656a13338dfe960b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 21 Nov 2023 18:10:06 -0500 Subject: [PATCH 1139/1397] eventfs: Do not allow NULL parent to eventfs_start_creating() [ Upstream commit fc4561226feaad5fcdcb55646c348d77b8ee69c5 ] The eventfs directory is dynamically created via the meta data supplied by the existing trace events. All files and directories in eventfs has a parent. Do not allow NULL to be passed into eventfs_start_creating() as the parent because that should never happen. Warn if it does. Link: https://lkml.kernel.org/r/20231121231112.693841807@goodmis.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Andrew Morton Reviewed-by: Josef Bacik Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin --- fs/tracefs/inode.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 891653ba9cf3..0292c6a2bed9 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -509,20 +509,15 @@ struct dentry *eventfs_start_creating(const char *name, struct dentry *parent) struct dentry *dentry; int error; + /* Must always have a parent. */ + if (WARN_ON_ONCE(!parent)) + return ERR_PTR(-EINVAL); + error = simple_pin_fs(&trace_fs_type, &tracefs_mount, &tracefs_mount_count); if (error) return ERR_PTR(error); - /* - * If the parent is not specified, we create it in the root. - * We need the root dentry to do this, which is in the super - * block. A pointer to that is in the struct vfsmount that we - * have around. - */ - if (!parent) - parent = tracefs_mount->mnt_root; - if (unlikely(IS_DEADDIR(parent->d_inode))) dentry = ERR_PTR(-ENOENT); else From e25ee0c2459ae021b19d81febea329baad641eba Mon Sep 17 00:00:00 2001 From: Lech Perczak Date: Sat, 18 Nov 2023 00:19:18 +0100 Subject: [PATCH 1140/1397] net: usb: qmi_wwan: claim interface 4 for ZTE MF290 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 99360d9620f09fb8bc15548d855011bbb198c680 ] Interface 4 is used by for QMI interface in stock firmware of MF28D, the router which uses MF290 modem. Rebind it to qmi_wwan after freeing it up from option driver. The proper configuration is: Interface mapping is: 0: QCDM, 1: (unknown), 2: AT (PCUI), 2: AT (Modem), 4: QMI T: Bus=01 Lev=02 Prnt=02 Port=00 Cnt=01 Dev#= 4 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=19d2 ProdID=0189 Rev= 0.00 S: Manufacturer=ZTE, Incorporated S: Product=ZTE LTE Technologies MSM C:* #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=84(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan E: Ad=86(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms Cc: Bjørn Mork Signed-off-by: Lech Perczak Link: https://lore.kernel.org/r/20231117231918.100278-3-lech.perczak@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 344af3c5c836..e2e181378f41 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1289,6 +1289,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x19d2, 0x0168, 4)}, {QMI_FIXED_INTF(0x19d2, 0x0176, 3)}, {QMI_FIXED_INTF(0x19d2, 0x0178, 3)}, + {QMI_FIXED_INTF(0x19d2, 0x0189, 4)}, /* ZTE MF290 */ {QMI_FIXED_INTF(0x19d2, 0x0191, 4)}, /* ZTE EuFi890 */ {QMI_FIXED_INTF(0x19d2, 0x0199, 1)}, /* ZTE MF820S */ {QMI_FIXED_INTF(0x19d2, 0x0200, 1)}, From d5c959a1dba6b21ba305ca173ffee89891640b03 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 21 Nov 2023 20:12:52 -0300 Subject: [PATCH 1141/1397] smb: client: implement ->query_reparse_point() for SMB1 [ Upstream commit ed3e0a149b58ea8cfd10cc4f7cefb39877ff07ac ] Reparse points are not limited to symlinks, so implement ->query_reparse_point() in order to handle different file types. Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/cifspdu.h | 2 +- fs/smb/client/cifsproto.h | 9 ++ fs/smb/client/cifssmb.c | 193 +++++++++++++++----------------------- fs/smb/client/smb1ops.c | 49 ++-------- fs/smb/client/smb2ops.c | 35 ++++--- 5 files changed, 113 insertions(+), 175 deletions(-) diff --git a/fs/smb/client/cifspdu.h b/fs/smb/client/cifspdu.h index a75220db5c1e..2a90134331a4 100644 --- a/fs/smb/client/cifspdu.h +++ b/fs/smb/client/cifspdu.h @@ -1356,7 +1356,7 @@ typedef struct smb_com_transaction_ioctl_rsp { __le32 DataDisplacement; __u8 SetupCount; /* 1 */ __le16 ReturnedDataLen; - __u16 ByteCount; + __le16 ByteCount; } __attribute__((packed)) TRANSACT_IOCTL_RSP; #define CIFS_ACL_OWNER 1 diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index 8e53abcfc5ec..dc2c43dd6a91 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -457,6 +457,12 @@ extern int CIFSSMBUnixQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon, const unsigned char *searchName, char **syminfo, const struct nls_table *nls_codepage, int remap); +extern int cifs_query_reparse_point(const unsigned int xid, + struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb, + const char *full_path, + u32 *tag, struct kvec *rsp, + int *rsp_buftype); extern int CIFSSMBQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon, __u16 fid, char **symlinkinfo, const struct nls_table *nls_codepage); @@ -656,6 +662,9 @@ void cifs_put_tcp_super(struct super_block *sb); int cifs_update_super_prepath(struct cifs_sb_info *cifs_sb, char *prefix); char *extract_hostname(const char *unc); char *extract_sharename(const char *unc); +int parse_reparse_point(struct reparse_data_buffer *buf, + u32 plen, struct cifs_sb_info *cifs_sb, + bool unicode, char **target_path); #ifdef CONFIG_CIFS_DFS_UPCALL static inline int get_dfs_path(const unsigned int xid, struct cifs_ses *ses, diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index 25503f1a4fd2..bad91ba6c3a9 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -2690,136 +2690,97 @@ CIFSSMBUnixQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon, return rc; } -/* - * Recent Windows versions now create symlinks more frequently - * and they use the "reparse point" mechanism below. We can of course - * do symlinks nicely to Samba and other servers which support the - * CIFS Unix Extensions and we can also do SFU symlinks and "client only" - * "MF" symlinks optionally, but for recent Windows we really need to - * reenable the code below and fix the cifs_symlink callers to handle this. - * In the interim this code has been moved to its own config option so - * it is not compiled in by default until callers fixed up and more tested. - */ -int -CIFSSMBQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon, - __u16 fid, char **symlinkinfo, - const struct nls_table *nls_codepage) +int cifs_query_reparse_point(const unsigned int xid, + struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb, + const char *full_path, + u32 *tag, struct kvec *rsp, + int *rsp_buftype) { - int rc = 0; - int bytes_returned; - struct smb_com_transaction_ioctl_req *pSMB; - struct smb_com_transaction_ioctl_rsp *pSMBr; - bool is_unicode; - unsigned int sub_len; - char *sub_start; - struct reparse_symlink_data *reparse_buf; - struct reparse_posix_data *posix_buf; + struct cifs_open_parms oparms; + TRANSACT_IOCTL_REQ *io_req = NULL; + TRANSACT_IOCTL_RSP *io_rsp = NULL; + struct cifs_fid fid; __u32 data_offset, data_count; - char *end_of_smb; + __u8 *start, *end; + int io_rsp_len; + int oplock = 0; + int rc; - cifs_dbg(FYI, "In Windows reparse style QueryLink for fid %u\n", fid); - rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB, - (void **) &pSMBr); + cifs_tcon_dbg(FYI, "%s: path=%s\n", __func__, full_path); + + if (cap_unix(tcon->ses)) + return -EOPNOTSUPP; + + oparms = (struct cifs_open_parms) { + .tcon = tcon, + .cifs_sb = cifs_sb, + .desired_access = FILE_READ_ATTRIBUTES, + .create_options = cifs_create_options(cifs_sb, + OPEN_REPARSE_POINT), + .disposition = FILE_OPEN, + .path = full_path, + .fid = &fid, + }; + + rc = CIFS_open(xid, &oparms, &oplock, NULL); if (rc) return rc; - pSMB->TotalParameterCount = 0 ; - pSMB->TotalDataCount = 0; - pSMB->MaxParameterCount = cpu_to_le32(2); - /* BB find exact data count max from sess structure BB */ - pSMB->MaxDataCount = cpu_to_le32(CIFSMaxBufSize & 0xFFFFFF00); - pSMB->MaxSetupCount = 4; - pSMB->Reserved = 0; - pSMB->ParameterOffset = 0; - pSMB->DataCount = 0; - pSMB->DataOffset = 0; - pSMB->SetupCount = 4; - pSMB->SubCommand = cpu_to_le16(NT_TRANSACT_IOCTL); - pSMB->ParameterCount = pSMB->TotalParameterCount; - pSMB->FunctionCode = cpu_to_le32(FSCTL_GET_REPARSE_POINT); - pSMB->IsFsctl = 1; /* FSCTL */ - pSMB->IsRootFlag = 0; - pSMB->Fid = fid; /* file handle always le */ - pSMB->ByteCount = 0; + rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, + (void **)&io_req, (void **)&io_rsp); + if (rc) + goto error; - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, - (struct smb_hdr *) pSMBr, &bytes_returned, 0); - if (rc) { - cifs_dbg(FYI, "Send error in QueryReparseLinkInfo = %d\n", rc); - goto qreparse_out; - } + io_req->TotalParameterCount = 0; + io_req->TotalDataCount = 0; + io_req->MaxParameterCount = cpu_to_le32(2); + /* BB find exact data count max from sess structure BB */ + io_req->MaxDataCount = cpu_to_le32(CIFSMaxBufSize & 0xFFFFFF00); + io_req->MaxSetupCount = 4; + io_req->Reserved = 0; + io_req->ParameterOffset = 0; + io_req->DataCount = 0; + io_req->DataOffset = 0; + io_req->SetupCount = 4; + io_req->SubCommand = cpu_to_le16(NT_TRANSACT_IOCTL); + io_req->ParameterCount = io_req->TotalParameterCount; + io_req->FunctionCode = cpu_to_le32(FSCTL_GET_REPARSE_POINT); + io_req->IsFsctl = 1; + io_req->IsRootFlag = 0; + io_req->Fid = fid.netfid; + io_req->ByteCount = 0; + + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *)io_req, + (struct smb_hdr *)io_rsp, &io_rsp_len, 0); + if (rc) + goto error; - data_offset = le32_to_cpu(pSMBr->DataOffset); - data_count = le32_to_cpu(pSMBr->DataCount); - if (get_bcc(&pSMBr->hdr) < 2 || data_offset > 512) { - /* BB also check enough total bytes returned */ - rc = -EIO; /* bad smb */ - goto qreparse_out; - } - if (!data_count || (data_count > 2048)) { + data_offset = le32_to_cpu(io_rsp->DataOffset); + data_count = le32_to_cpu(io_rsp->DataCount); + if (get_bcc(&io_rsp->hdr) < 2 || data_offset > 512 || + !data_count || data_count > 2048) { rc = -EIO; - cifs_dbg(FYI, "Invalid return data count on get reparse info ioctl\n"); - goto qreparse_out; - } - end_of_smb = 2 + get_bcc(&pSMBr->hdr) + (char *)&pSMBr->ByteCount; - reparse_buf = (struct reparse_symlink_data *) - ((char *)&pSMBr->hdr.Protocol + data_offset); - if ((char *)reparse_buf >= end_of_smb) { - rc = -EIO; - goto qreparse_out; - } - if (reparse_buf->ReparseTag == cpu_to_le32(IO_REPARSE_TAG_NFS)) { - cifs_dbg(FYI, "NFS style reparse tag\n"); - posix_buf = (struct reparse_posix_data *)reparse_buf; - - if (posix_buf->InodeType != cpu_to_le64(NFS_SPECFILE_LNK)) { - cifs_dbg(FYI, "unsupported file type 0x%llx\n", - le64_to_cpu(posix_buf->InodeType)); - rc = -EOPNOTSUPP; - goto qreparse_out; - } - is_unicode = true; - sub_len = le16_to_cpu(reparse_buf->ReparseDataLength); - if (posix_buf->PathBuffer + sub_len > end_of_smb) { - cifs_dbg(FYI, "reparse buf beyond SMB\n"); - rc = -EIO; - goto qreparse_out; - } - *symlinkinfo = cifs_strndup_from_utf16(posix_buf->PathBuffer, - sub_len, is_unicode, nls_codepage); - goto qreparse_out; - } else if (reparse_buf->ReparseTag != - cpu_to_le32(IO_REPARSE_TAG_SYMLINK)) { - rc = -EOPNOTSUPP; - goto qreparse_out; + goto error; } - /* Reparse tag is NTFS symlink */ - sub_start = le16_to_cpu(reparse_buf->SubstituteNameOffset) + - reparse_buf->PathBuffer; - sub_len = le16_to_cpu(reparse_buf->SubstituteNameLength); - if (sub_start + sub_len > end_of_smb) { - cifs_dbg(FYI, "reparse buf beyond SMB\n"); + end = 2 + get_bcc(&io_rsp->hdr) + (__u8 *)&io_rsp->ByteCount; + start = (__u8 *)&io_rsp->hdr.Protocol + data_offset; + if (start >= end) { rc = -EIO; - goto qreparse_out; + goto error; } - if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) - is_unicode = true; - else - is_unicode = false; - - /* BB FIXME investigate remapping reserved chars here */ - *symlinkinfo = cifs_strndup_from_utf16(sub_start, sub_len, is_unicode, - nls_codepage); - if (!*symlinkinfo) - rc = -ENOMEM; -qreparse_out: - cifs_buf_release(pSMB); - /* - * Note: On -EAGAIN error only caller can retry on handle based calls - * since file handle passed in no longer valid. - */ + *tag = le32_to_cpu(((struct reparse_data_buffer *)start)->ReparseTag); + rsp->iov_base = io_rsp; + rsp->iov_len = io_rsp_len; + *rsp_buftype = CIFS_LARGE_BUFFER; + CIFSSMBClose(xid, tcon, fid.netfid); + return 0; + +error: + cifs_buf_release(io_req); + CIFSSMBClose(xid, tcon, fid.netfid); return rc; } diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index 9bf8735cdd1e..6b4d8effa79d 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -979,18 +979,13 @@ static int cifs_query_symlink(const unsigned int xid, char **target_path, struct kvec *rsp_iov) { + struct reparse_data_buffer *buf; + TRANSACT_IOCTL_RSP *io = rsp_iov->iov_base; + bool unicode = !!(io->hdr.Flags2 & SMBFLG2_UNICODE); + u32 plen = le16_to_cpu(io->ByteCount); int rc; - int oplock = 0; - bool is_reparse_point = !!rsp_iov; - struct cifs_fid fid; - struct cifs_open_parms oparms; - cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path); - - if (is_reparse_point) { - cifs_dbg(VFS, "reparse points not handled for SMB1 symlinks\n"); - return -EOPNOTSUPP; - } + cifs_tcon_dbg(FYI, "%s: path=%s\n", __func__, full_path); /* Check for unix extensions */ if (cap_unix(tcon->ses)) { @@ -1001,37 +996,12 @@ static int cifs_query_symlink(const unsigned int xid, rc = cifs_unix_dfs_readlink(xid, tcon, full_path, target_path, cifs_sb->local_nls); - - goto out; + return rc; } - oparms = (struct cifs_open_parms) { - .tcon = tcon, - .cifs_sb = cifs_sb, - .desired_access = FILE_READ_ATTRIBUTES, - .create_options = cifs_create_options(cifs_sb, - OPEN_REPARSE_POINT), - .disposition = FILE_OPEN, - .path = full_path, - .fid = &fid, - }; - - rc = CIFS_open(xid, &oparms, &oplock, NULL); - if (rc) - goto out; - - rc = CIFSSMBQuerySymLink(xid, tcon, fid.netfid, target_path, - cifs_sb->local_nls); - if (rc) - goto out_close; - - convert_delimiter(*target_path, '/'); -out_close: - CIFSSMBClose(xid, tcon, fid.netfid); -out: - if (!rc) - cifs_dbg(FYI, "%s: target path: %s\n", __func__, *target_path); - return rc; + buf = (struct reparse_data_buffer *)((__u8 *)&io->hdr.Protocol + + le32_to_cpu(io->DataOffset)); + return parse_reparse_point(buf, plen, cifs_sb, unicode, target_path); } static bool @@ -1214,6 +1184,7 @@ struct smb_version_operations smb1_operations = { .is_path_accessible = cifs_is_path_accessible, .can_echo = cifs_can_echo, .query_path_info = cifs_query_path_info, + .query_reparse_point = cifs_query_reparse_point, .query_file_info = cifs_query_file_info, .get_srv_inum = cifs_get_srv_inum, .set_path_size = CIFSSMBSetEOF, diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 386b62d5c133..f1966ab9941c 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -2894,27 +2894,26 @@ parse_reparse_posix(struct reparse_posix_data *symlink_buf, return 0; } -static int -parse_reparse_symlink(struct reparse_symlink_data_buffer *symlink_buf, - u32 plen, char **target_path, - struct cifs_sb_info *cifs_sb) +static int parse_reparse_symlink(struct reparse_symlink_data_buffer *sym, + u32 plen, bool unicode, char **target_path, + struct cifs_sb_info *cifs_sb) { unsigned int sub_len; unsigned int sub_offset; /* We handle Symbolic Link reparse tag here. See: MS-FSCC 2.1.2.4 */ - sub_offset = le16_to_cpu(symlink_buf->SubstituteNameOffset); - sub_len = le16_to_cpu(symlink_buf->SubstituteNameLength); + sub_offset = le16_to_cpu(sym->SubstituteNameOffset); + sub_len = le16_to_cpu(sym->SubstituteNameLength); if (sub_offset + 20 > plen || sub_offset + sub_len + 20 > plen) { cifs_dbg(VFS, "srv returned malformed symlink buffer\n"); return -EIO; } - *target_path = cifs_strndup_from_utf16( - symlink_buf->PathBuffer + sub_offset, - sub_len, true, cifs_sb->local_nls); + *target_path = cifs_strndup_from_utf16(sym->PathBuffer + sub_offset, + sub_len, unicode, + cifs_sb->local_nls); if (!(*target_path)) return -ENOMEM; @@ -2924,19 +2923,17 @@ parse_reparse_symlink(struct reparse_symlink_data_buffer *symlink_buf, return 0; } -static int -parse_reparse_point(struct reparse_data_buffer *buf, - u32 plen, char **target_path, - struct cifs_sb_info *cifs_sb) +int parse_reparse_point(struct reparse_data_buffer *buf, + u32 plen, struct cifs_sb_info *cifs_sb, + bool unicode, char **target_path) { - if (plen < sizeof(struct reparse_data_buffer)) { + if (plen < sizeof(*buf)) { cifs_dbg(VFS, "reparse buffer is too small. Must be at least 8 bytes but was %d\n", plen); return -EIO; } - if (plen < le16_to_cpu(buf->ReparseDataLength) + - sizeof(struct reparse_data_buffer)) { + if (plen < le16_to_cpu(buf->ReparseDataLength) + sizeof(*buf)) { cifs_dbg(VFS, "srv returned invalid reparse buf length: %d\n", plen); return -EIO; @@ -2951,7 +2948,7 @@ parse_reparse_point(struct reparse_data_buffer *buf, case IO_REPARSE_TAG_SYMLINK: return parse_reparse_symlink( (struct reparse_symlink_data_buffer *)buf, - plen, target_path, cifs_sb); + plen, unicode, target_path, cifs_sb); default: cifs_dbg(VFS, "srv returned unknown symlink buffer tag:0x%08x\n", le32_to_cpu(buf->ReparseTag)); @@ -2970,11 +2967,11 @@ static int smb2_query_symlink(const unsigned int xid, struct smb2_ioctl_rsp *io = rsp_iov->iov_base; u32 plen = le32_to_cpu(io->OutputCount); - cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path); + cifs_tcon_dbg(FYI, "%s: path: %s\n", __func__, full_path); buf = (struct reparse_data_buffer *)((u8 *)io + le32_to_cpu(io->OutputOffset)); - return parse_reparse_point(buf, plen, target_path, cifs_sb); + return parse_reparse_point(buf, plen, cifs_sb, true, target_path); } static int smb2_query_reparse_point(const unsigned int xid, From 4d07e5df13877921cbbebd1e305b50f512c241ee Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 21 Nov 2023 20:12:53 -0300 Subject: [PATCH 1142/1397] smb: client: introduce ->parse_reparse_point() [ Upstream commit 539aad7f14dab7f947e5ab81901c0b20513a50db ] Parse reparse point into cifs_open_info_data structure and feed it through cifs_open_info_to_fattr(). Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/cifsglob.h | 6 ++++-- fs/smb/client/inode.c | 23 +++++++++++++--------- fs/smb/client/smb1ops.c | 41 ++++++++++++++++++++++------------------ fs/smb/client/smb2ops.c | 28 ++++++++++++++------------- 4 files changed, 56 insertions(+), 42 deletions(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index b8d1c19f6771..7180b5713bea 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -395,8 +395,7 @@ struct smb_version_operations { struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, - char **target_path, - struct kvec *rsp_iov); + char **target_path); /* open a file for non-posix mounts */ int (*open)(const unsigned int xid, struct cifs_open_parms *oparms, __u32 *oplock, void *buf); @@ -551,6 +550,9 @@ struct smb_version_operations { bool (*is_status_io_timeout)(char *buf); /* Check for STATUS_NETWORK_NAME_DELETED */ bool (*is_network_name_deleted)(char *buf, struct TCP_Server_Info *srv); + int (*parse_reparse_point)(struct cifs_sb_info *cifs_sb, + struct kvec *rsp_iov, + struct cifs_open_info_data *data); }; struct smb_version_values { diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index d6aa5e474d5e..7a2a013bb05e 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -457,8 +457,7 @@ static int cifs_get_unix_fattr(const unsigned char *full_path, return -EOPNOTSUPP; rc = server->ops->query_symlink(xid, tcon, cifs_sb, full_path, - &fattr->cf_symlink_target, - NULL); + &fattr->cf_symlink_target); cifs_dbg(FYI, "%s: query_symlink: %d\n", __func__, rc); } return rc; @@ -1035,22 +1034,28 @@ static int reparse_info_to_fattr(struct cifs_open_info_data *data, if (!rc) iov = &rsp_iov; } + + rc = -EOPNOTSUPP; switch ((data->reparse_tag = tag)) { case 0: /* SMB1 symlink */ - iov = NULL; - fallthrough; - case IO_REPARSE_TAG_NFS: - case IO_REPARSE_TAG_SYMLINK: - if (!data->symlink_target && server->ops->query_symlink) { + if (server->ops->query_symlink) { rc = server->ops->query_symlink(xid, tcon, cifs_sb, full_path, - &data->symlink_target, - iov); + &data->symlink_target); } break; case IO_REPARSE_TAG_MOUNT_POINT: cifs_create_junction_fattr(fattr, sb); + rc = 0; goto out; + default: + if (data->symlink_target) { + rc = 0; + } else if (server->ops->parse_reparse_point) { + rc = server->ops->parse_reparse_point(cifs_sb, + iov, data); + } + break; } cifs_open_info_to_fattr(fattr, data, sb); diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index 6b4d8effa79d..0dd599004e04 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -976,32 +976,36 @@ static int cifs_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, - char **target_path, - struct kvec *rsp_iov) + char **target_path) { - struct reparse_data_buffer *buf; - TRANSACT_IOCTL_RSP *io = rsp_iov->iov_base; - bool unicode = !!(io->hdr.Flags2 & SMBFLG2_UNICODE); - u32 plen = le16_to_cpu(io->ByteCount); int rc; cifs_tcon_dbg(FYI, "%s: path=%s\n", __func__, full_path); - /* Check for unix extensions */ - if (cap_unix(tcon->ses)) { - rc = CIFSSMBUnixQuerySymLink(xid, tcon, full_path, target_path, - cifs_sb->local_nls, - cifs_remap(cifs_sb)); - if (rc == -EREMOTE) - rc = cifs_unix_dfs_readlink(xid, tcon, full_path, - target_path, - cifs_sb->local_nls); - return rc; - } + if (!cap_unix(tcon->ses)) + return -EOPNOTSUPP; + + rc = CIFSSMBUnixQuerySymLink(xid, tcon, full_path, target_path, + cifs_sb->local_nls, cifs_remap(cifs_sb)); + if (rc == -EREMOTE) + rc = cifs_unix_dfs_readlink(xid, tcon, full_path, + target_path, cifs_sb->local_nls); + return rc; +} + +static int cifs_parse_reparse_point(struct cifs_sb_info *cifs_sb, + struct kvec *rsp_iov, + struct cifs_open_info_data *data) +{ + struct reparse_data_buffer *buf; + TRANSACT_IOCTL_RSP *io = rsp_iov->iov_base; + bool unicode = !!(io->hdr.Flags2 & SMBFLG2_UNICODE); + u32 plen = le16_to_cpu(io->ByteCount); buf = (struct reparse_data_buffer *)((__u8 *)&io->hdr.Protocol + le32_to_cpu(io->DataOffset)); - return parse_reparse_point(buf, plen, cifs_sb, unicode, target_path); + return parse_reparse_point(buf, plen, cifs_sb, unicode, + &data->symlink_target); } static bool @@ -1200,6 +1204,7 @@ struct smb_version_operations smb1_operations = { .rename = CIFSSMBRename, .create_hardlink = CIFSCreateHardLink, .query_symlink = cifs_query_symlink, + .parse_reparse_point = cifs_parse_reparse_point, .open = cifs_open_file, .set_fid = cifs_set_fid, .close = cifs_close_file, diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index f1966ab9941c..0390d7a801d1 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -2949,6 +2949,12 @@ int parse_reparse_point(struct reparse_data_buffer *buf, return parse_reparse_symlink( (struct reparse_symlink_data_buffer *)buf, plen, unicode, target_path, cifs_sb); + case IO_REPARSE_TAG_LX_SYMLINK: + case IO_REPARSE_TAG_AF_UNIX: + case IO_REPARSE_TAG_LX_FIFO: + case IO_REPARSE_TAG_LX_CHR: + case IO_REPARSE_TAG_LX_BLK: + return 0; default: cifs_dbg(VFS, "srv returned unknown symlink buffer tag:0x%08x\n", le32_to_cpu(buf->ReparseTag)); @@ -2956,22 +2962,18 @@ int parse_reparse_point(struct reparse_data_buffer *buf, } } -static int smb2_query_symlink(const unsigned int xid, - struct cifs_tcon *tcon, - struct cifs_sb_info *cifs_sb, - const char *full_path, - char **target_path, - struct kvec *rsp_iov) +static int smb2_parse_reparse_point(struct cifs_sb_info *cifs_sb, + struct kvec *rsp_iov, + struct cifs_open_info_data *data) { struct reparse_data_buffer *buf; struct smb2_ioctl_rsp *io = rsp_iov->iov_base; u32 plen = le32_to_cpu(io->OutputCount); - cifs_tcon_dbg(FYI, "%s: path: %s\n", __func__, full_path); - buf = (struct reparse_data_buffer *)((u8 *)io + le32_to_cpu(io->OutputOffset)); - return parse_reparse_point(buf, plen, cifs_sb, true, target_path); + return parse_reparse_point(buf, plen, cifs_sb, + true, &data->symlink_target); } static int smb2_query_reparse_point(const unsigned int xid, @@ -5215,7 +5217,7 @@ struct smb_version_operations smb20_operations = { .unlink = smb2_unlink, .rename = smb2_rename_path, .create_hardlink = smb2_create_hardlink, - .query_symlink = smb2_query_symlink, + .parse_reparse_point = smb2_parse_reparse_point, .query_mf_symlink = smb3_query_mf_symlink, .create_mf_symlink = smb3_create_mf_symlink, .open = smb2_open_file, @@ -5317,7 +5319,7 @@ struct smb_version_operations smb21_operations = { .unlink = smb2_unlink, .rename = smb2_rename_path, .create_hardlink = smb2_create_hardlink, - .query_symlink = smb2_query_symlink, + .parse_reparse_point = smb2_parse_reparse_point, .query_mf_symlink = smb3_query_mf_symlink, .create_mf_symlink = smb3_create_mf_symlink, .open = smb2_open_file, @@ -5422,7 +5424,7 @@ struct smb_version_operations smb30_operations = { .unlink = smb2_unlink, .rename = smb2_rename_path, .create_hardlink = smb2_create_hardlink, - .query_symlink = smb2_query_symlink, + .parse_reparse_point = smb2_parse_reparse_point, .query_mf_symlink = smb3_query_mf_symlink, .create_mf_symlink = smb3_create_mf_symlink, .open = smb2_open_file, @@ -5536,7 +5538,7 @@ struct smb_version_operations smb311_operations = { .unlink = smb2_unlink, .rename = smb2_rename_path, .create_hardlink = smb2_create_hardlink, - .query_symlink = smb2_query_symlink, + .parse_reparse_point = smb2_parse_reparse_point, .query_mf_symlink = smb3_query_mf_symlink, .create_mf_symlink = smb3_create_mf_symlink, .open = smb2_open_file, From df32e887d32b80875c410e2a1091be166549e051 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 21 Nov 2023 20:12:54 -0300 Subject: [PATCH 1143/1397] smb: client: set correct file type from NFS reparse points [ Upstream commit 45e724022e2704b5a5193fd96f378822b0448e07 ] Handle all file types in NFS reparse points as specified in MS-FSCC 2.1.2.6 Network File System (NFS) Reparse Data Buffer. The client is now able to set all file types based on the parsed NFS reparse point, which used to support only symlinks. This works for SMB1+. Before patch: $ mount.cifs //srv/share /mnt -o ... $ ls -l /mnt ls: cannot access 'block': Operation not supported ls: cannot access 'char': Operation not supported ls: cannot access 'fifo': Operation not supported ls: cannot access 'sock': Operation not supported total 1 l????????? ? ? ? ? ? block l????????? ? ? ? ? ? char -rwxr-xr-x 1 root root 5 Nov 18 23:22 f0 l????????? ? ? ? ? ? fifo l--------- 1 root root 0 Nov 18 23:23 link -> f0 l????????? ? ? ? ? ? sock After patch: $ mount.cifs //srv/share /mnt -o ... $ ls -l /mnt total 1 brwxr-xr-x 1 root root 123, 123 Nov 18 00:34 block crwxr-xr-x 1 root root 1234, 1234 Nov 18 00:33 char -rwxr-xr-x 1 root root 5 Nov 18 23:22 f0 prwxr-xr-x 1 root root 0 Nov 18 23:23 fifo lrwxr-xr-x 1 root root 0 Nov 18 23:23 link -> f0 srwxr-xr-x 1 root root 0 Nov 19 2023 sock Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/cifsglob.h | 8 ++- fs/smb/client/cifspdu.h | 2 +- fs/smb/client/cifsproto.h | 4 +- fs/smb/client/inode.c | 51 +++++++++++++++++-- fs/smb/client/readdir.c | 6 ++- fs/smb/client/smb1ops.c | 3 +- fs/smb/client/smb2inode.c | 2 +- fs/smb/client/smb2ops.c | 101 ++++++++++++++++++++------------------ 8 files changed, 116 insertions(+), 61 deletions(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 7180b5713bea..bd7fc20c49de 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -191,7 +191,13 @@ struct cifs_open_info_data { bool reparse_point; bool symlink; }; - __u32 reparse_tag; + struct { + __u32 tag; + union { + struct reparse_data_buffer *buf; + struct reparse_posix_data *posix; + }; + } reparse; char *symlink_target; union { struct smb2_file_all_info fi; diff --git a/fs/smb/client/cifspdu.h b/fs/smb/client/cifspdu.h index 2a90134331a4..83ccc51a54d0 100644 --- a/fs/smb/client/cifspdu.h +++ b/fs/smb/client/cifspdu.h @@ -1509,7 +1509,7 @@ struct reparse_posix_data { __le16 ReparseDataLength; __u16 Reserved; __le64 InodeType; /* LNK, FIFO, CHR etc. */ - char PathBuffer[]; + __u8 DataBuffer[]; } __attribute__((packed)); struct cifs_quota_data { diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index dc2c43dd6a91..c858feaf4f92 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -209,7 +209,7 @@ int cifs_get_inode_info(struct inode **inode, const char *full_path, const struct cifs_fid *fid); bool cifs_reparse_point_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, - u32 tag); + struct cifs_open_info_data *data); extern int smb311_posix_get_inode_info(struct inode **pinode, const char *search_path, struct super_block *sb, unsigned int xid); extern int cifs_get_inode_info_unix(struct inode **pinode, @@ -664,7 +664,7 @@ char *extract_hostname(const char *unc); char *extract_sharename(const char *unc); int parse_reparse_point(struct reparse_data_buffer *buf, u32 plen, struct cifs_sb_info *cifs_sb, - bool unicode, char **target_path); + bool unicode, struct cifs_open_info_data *data); #ifdef CONFIG_CIFS_DFS_UPCALL static inline int get_dfs_path(const unsigned int xid, struct cifs_ses *ses, diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index 7a2a013bb05e..6a856945f2b4 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -719,10 +719,51 @@ static void smb311_posix_info_to_fattr(struct cifs_fattr *fattr, fattr->cf_mode, fattr->cf_uniqueid, fattr->cf_nlink); } +static inline dev_t nfs_mkdev(struct reparse_posix_data *buf) +{ + u64 v = le64_to_cpu(*(__le64 *)buf->DataBuffer); + + return MKDEV(v >> 32, v & 0xffffffff); +} + bool cifs_reparse_point_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, - u32 tag) + struct cifs_open_info_data *data) { + struct reparse_posix_data *buf = data->reparse.posix; + u32 tag = data->reparse.tag; + + if (tag == IO_REPARSE_TAG_NFS && buf) { + switch (le64_to_cpu(buf->InodeType)) { + case NFS_SPECFILE_CHR: + fattr->cf_mode |= S_IFCHR | cifs_sb->ctx->file_mode; + fattr->cf_dtype = DT_CHR; + fattr->cf_rdev = nfs_mkdev(buf); + break; + case NFS_SPECFILE_BLK: + fattr->cf_mode |= S_IFBLK | cifs_sb->ctx->file_mode; + fattr->cf_dtype = DT_BLK; + fattr->cf_rdev = nfs_mkdev(buf); + break; + case NFS_SPECFILE_FIFO: + fattr->cf_mode |= S_IFIFO | cifs_sb->ctx->file_mode; + fattr->cf_dtype = DT_FIFO; + break; + case NFS_SPECFILE_SOCK: + fattr->cf_mode |= S_IFSOCK | cifs_sb->ctx->file_mode; + fattr->cf_dtype = DT_SOCK; + break; + case NFS_SPECFILE_LNK: + fattr->cf_mode = S_IFLNK | cifs_sb->ctx->file_mode; + fattr->cf_dtype = DT_LNK; + break; + default: + WARN_ON_ONCE(1); + return false; + } + return true; + } + switch (tag) { case IO_REPARSE_TAG_LX_SYMLINK: fattr->cf_mode |= S_IFLNK | cifs_sb->ctx->file_mode; @@ -788,7 +829,7 @@ static void cifs_open_info_to_fattr(struct cifs_fattr *fattr, fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks); if (cifs_open_data_reparse(data) && - cifs_reparse_point_to_fattr(cifs_sb, fattr, data->reparse_tag)) + cifs_reparse_point_to_fattr(cifs_sb, fattr, data)) goto out_reparse; if (fattr->cf_cifsattrs & ATTR_DIRECTORY) { @@ -855,7 +896,7 @@ cifs_get_file_info(struct file *filp) data.adjust_tz = false; if (data.symlink_target) { data.symlink = true; - data.reparse_tag = IO_REPARSE_TAG_SYMLINK; + data.reparse.tag = IO_REPARSE_TAG_SYMLINK; } cifs_open_info_to_fattr(&fattr, &data, inode->i_sb); break; @@ -1024,7 +1065,7 @@ static int reparse_info_to_fattr(struct cifs_open_info_data *data, struct cifs_sb_info *cifs_sb = CIFS_SB(sb); struct kvec rsp_iov, *iov = NULL; int rsp_buftype = CIFS_NO_BUFFER; - u32 tag = data->reparse_tag; + u32 tag = data->reparse.tag; int rc = 0; if (!tag && server->ops->query_reparse_point) { @@ -1036,7 +1077,7 @@ static int reparse_info_to_fattr(struct cifs_open_info_data *data, } rc = -EOPNOTSUPP; - switch ((data->reparse_tag = tag)) { + switch ((data->reparse.tag = tag)) { case 0: /* SMB1 symlink */ if (server->ops->query_symlink) { rc = server->ops->query_symlink(xid, tcon, diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c index 47fc22de8d20..d30ea2005eb3 100644 --- a/fs/smb/client/readdir.c +++ b/fs/smb/client/readdir.c @@ -153,6 +153,10 @@ static bool reparse_file_needs_reval(const struct cifs_fattr *fattr) static void cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) { + struct cifs_open_info_data data = { + .reparse = { .tag = fattr->cf_cifstag, }, + }; + fattr->cf_uid = cifs_sb->ctx->linux_uid; fattr->cf_gid = cifs_sb->ctx->linux_gid; @@ -165,7 +169,7 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) * reasonably map some of them to directories vs. files vs. symlinks */ if ((fattr->cf_cifsattrs & ATTR_REPARSE) && - cifs_reparse_point_to_fattr(cifs_sb, fattr, fattr->cf_cifstag)) + cifs_reparse_point_to_fattr(cifs_sb, fattr, &data)) goto out_reparse; if (fattr->cf_cifsattrs & ATTR_DIRECTORY) { diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index 0dd599004e04..64e25233e85d 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -1004,8 +1004,7 @@ static int cifs_parse_reparse_point(struct cifs_sb_info *cifs_sb, buf = (struct reparse_data_buffer *)((__u8 *)&io->hdr.Protocol + le32_to_cpu(io->DataOffset)); - return parse_reparse_point(buf, plen, cifs_sb, unicode, - &data->symlink_target); + return parse_reparse_point(buf, plen, cifs_sb, unicode, data); } static bool diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index 0b89f7008ac0..c94940af5d4b 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -555,7 +555,7 @@ static int parse_create_response(struct cifs_open_info_data *data, break; } data->reparse_point = reparse_point; - data->reparse_tag = tag; + data->reparse.tag = tag; return rc; } diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 0390d7a801d1..0ca6dcf20261 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -2866,89 +2866,95 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses, return rc; } -static int -parse_reparse_posix(struct reparse_posix_data *symlink_buf, - u32 plen, char **target_path, - struct cifs_sb_info *cifs_sb) +/* See MS-FSCC 2.1.2.6 for the 'NFS' style reparse tags */ +static int parse_reparse_posix(struct reparse_posix_data *buf, + struct cifs_sb_info *cifs_sb, + struct cifs_open_info_data *data) { unsigned int len; - - /* See MS-FSCC 2.1.2.6 for the 'NFS' style reparse tags */ - len = le16_to_cpu(symlink_buf->ReparseDataLength); - - if (le64_to_cpu(symlink_buf->InodeType) != NFS_SPECFILE_LNK) { - cifs_dbg(VFS, "%lld not a supported symlink type\n", - le64_to_cpu(symlink_buf->InodeType)); + u64 type; + + switch ((type = le64_to_cpu(buf->InodeType))) { + case NFS_SPECFILE_LNK: + len = le16_to_cpu(buf->ReparseDataLength); + data->symlink_target = cifs_strndup_from_utf16(buf->DataBuffer, + len, true, + cifs_sb->local_nls); + if (!data->symlink_target) + return -ENOMEM; + convert_delimiter(data->symlink_target, '/'); + cifs_dbg(FYI, "%s: target path: %s\n", + __func__, data->symlink_target); + break; + case NFS_SPECFILE_CHR: + case NFS_SPECFILE_BLK: + case NFS_SPECFILE_FIFO: + case NFS_SPECFILE_SOCK: + break; + default: + cifs_dbg(VFS, "%s: unhandled inode type: 0x%llx\n", + __func__, type); return -EOPNOTSUPP; } - - *target_path = cifs_strndup_from_utf16( - symlink_buf->PathBuffer, - len, true, cifs_sb->local_nls); - if (!(*target_path)) - return -ENOMEM; - - convert_delimiter(*target_path, '/'); - cifs_dbg(FYI, "%s: target path: %s\n", __func__, *target_path); - return 0; } static int parse_reparse_symlink(struct reparse_symlink_data_buffer *sym, - u32 plen, bool unicode, char **target_path, - struct cifs_sb_info *cifs_sb) + u32 plen, bool unicode, + struct cifs_sb_info *cifs_sb, + struct cifs_open_info_data *data) { - unsigned int sub_len; - unsigned int sub_offset; + unsigned int len; + unsigned int offs; /* We handle Symbolic Link reparse tag here. See: MS-FSCC 2.1.2.4 */ - sub_offset = le16_to_cpu(sym->SubstituteNameOffset); - sub_len = le16_to_cpu(sym->SubstituteNameLength); - if (sub_offset + 20 > plen || - sub_offset + sub_len + 20 > plen) { + offs = le16_to_cpu(sym->SubstituteNameOffset); + len = le16_to_cpu(sym->SubstituteNameLength); + if (offs + 20 > plen || offs + len + 20 > plen) { cifs_dbg(VFS, "srv returned malformed symlink buffer\n"); return -EIO; } - *target_path = cifs_strndup_from_utf16(sym->PathBuffer + sub_offset, - sub_len, unicode, - cifs_sb->local_nls); - if (!(*target_path)) + data->symlink_target = cifs_strndup_from_utf16(sym->PathBuffer + offs, + len, unicode, + cifs_sb->local_nls); + if (!data->symlink_target) return -ENOMEM; - convert_delimiter(*target_path, '/'); - cifs_dbg(FYI, "%s: target path: %s\n", __func__, *target_path); + convert_delimiter(data->symlink_target, '/'); + cifs_dbg(FYI, "%s: target path: %s\n", __func__, data->symlink_target); return 0; } int parse_reparse_point(struct reparse_data_buffer *buf, u32 plen, struct cifs_sb_info *cifs_sb, - bool unicode, char **target_path) + bool unicode, struct cifs_open_info_data *data) { if (plen < sizeof(*buf)) { - cifs_dbg(VFS, "reparse buffer is too small. Must be at least 8 bytes but was %d\n", - plen); + cifs_dbg(VFS, "%s: reparse buffer is too small. Must be at least 8 bytes but was %d\n", + __func__, plen); return -EIO; } if (plen < le16_to_cpu(buf->ReparseDataLength) + sizeof(*buf)) { - cifs_dbg(VFS, "srv returned invalid reparse buf length: %d\n", - plen); + cifs_dbg(VFS, "%s: invalid reparse buf length: %d\n", + __func__, plen); return -EIO; } + data->reparse.buf = buf; + /* See MS-FSCC 2.1.2 */ switch (le32_to_cpu(buf->ReparseTag)) { case IO_REPARSE_TAG_NFS: - return parse_reparse_posix( - (struct reparse_posix_data *)buf, - plen, target_path, cifs_sb); + return parse_reparse_posix((struct reparse_posix_data *)buf, + cifs_sb, data); case IO_REPARSE_TAG_SYMLINK: return parse_reparse_symlink( (struct reparse_symlink_data_buffer *)buf, - plen, unicode, target_path, cifs_sb); + plen, unicode, cifs_sb, data); case IO_REPARSE_TAG_LX_SYMLINK: case IO_REPARSE_TAG_AF_UNIX: case IO_REPARSE_TAG_LX_FIFO: @@ -2956,8 +2962,8 @@ int parse_reparse_point(struct reparse_data_buffer *buf, case IO_REPARSE_TAG_LX_BLK: return 0; default: - cifs_dbg(VFS, "srv returned unknown symlink buffer tag:0x%08x\n", - le32_to_cpu(buf->ReparseTag)); + cifs_dbg(VFS, "%s: unhandled reparse tag: 0x%08x\n", + __func__, le32_to_cpu(buf->ReparseTag)); return -EOPNOTSUPP; } } @@ -2972,8 +2978,7 @@ static int smb2_parse_reparse_point(struct cifs_sb_info *cifs_sb, buf = (struct reparse_data_buffer *)((u8 *)io + le32_to_cpu(io->OutputOffset)); - return parse_reparse_point(buf, plen, cifs_sb, - true, &data->symlink_target); + return parse_reparse_point(buf, plen, cifs_sb, true, data); } static int smb2_query_reparse_point(const unsigned int xid, From 610610da58af14a2da813c550264616e99a1a756 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 19 Nov 2023 14:32:34 +0900 Subject: [PATCH 1144/1397] arm64: add dependency between vmlinuz.efi and Image [ Upstream commit c0a8574204054effad6ac83cc75c02576e2985fe ] A common issue in Makefile is a race in parallel building. You need to be careful to prevent multiple threads from writing to the same file simultaneously. Commit 3939f3345050 ("ARM: 8418/1: add boot image dependencies to not generate invalid images") addressed such a bad scenario. A similar symptom occurs with the following command: $ make -j$(nproc) ARCH=arm64 Image vmlinuz.efi [ snip ] SORTTAB vmlinux OBJCOPY arch/arm64/boot/Image OBJCOPY arch/arm64/boot/Image AS arch/arm64/boot/zboot-header.o PAD arch/arm64/boot/vmlinux.bin GZIP arch/arm64/boot/vmlinuz OBJCOPY arch/arm64/boot/vmlinuz.o LD arch/arm64/boot/vmlinuz.efi.elf OBJCOPY arch/arm64/boot/vmlinuz.efi The log "OBJCOPY arch/arm64/boot/Image" is displayed twice. It indicates that two threads simultaneously enter arch/arm64/boot/ and write to arch/arm64/boot/Image. It occasionally leads to a build failure: $ make -j$(nproc) ARCH=arm64 Image vmlinuz.efi [ snip ] SORTTAB vmlinux OBJCOPY arch/arm64/boot/Image PAD arch/arm64/boot/vmlinux.bin truncate: Invalid number: 'arch/arm64/boot/vmlinux.bin' make[2]: *** [drivers/firmware/efi/libstub/Makefile.zboot:13: arch/arm64/boot/vmlinux.bin] Error 1 make[2]: *** Deleting file 'arch/arm64/boot/vmlinux.bin' make[1]: *** [arch/arm64/Makefile:163: vmlinuz.efi] Error 2 make[1]: *** Waiting for unfinished jobs.... make: *** [Makefile:234: __sub-make] Error 2 vmlinuz.efi depends on Image, but such a dependency is not specified in arch/arm64/Makefile. Signed-off-by: Masahiro Yamada Acked-by: Ard Biesheuvel Reviewed-by: SImon Glass Link: https://lore.kernel.org/r/20231119053234.2367621-1-masahiroy@kernel.org Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin --- arch/arm64/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 2d49aea0ff67..26b8c7630a21 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -158,7 +158,7 @@ endif all: $(notdir $(KBUILD_IMAGE)) - +vmlinuz.efi: Image Image vmlinuz.efi: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ From 5ce0fb87311d17907910e06e0a9b1306da4c62b2 Mon Sep 17 00:00:00 2001 From: Denis Benato Date: Fri, 17 Nov 2023 14:15:55 +1300 Subject: [PATCH 1145/1397] HID: hid-asus: add const to read-only outgoing usb buffer [ Upstream commit 06ae5afce8cc1f7621cc5c7751e449ce20d68af7 ] In the function asus_kbd_set_report the parameter buf is read-only as it gets copied in a memory portion suitable for USB transfer, but the parameter is not marked as const: add the missing const and mark const immutable buffers passed to that function. Signed-off-by: Denis Benato Signed-off-by: Luke D. Jones Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-asus.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c index 194a86cf30db..78cdfb8b9a7a 100644 --- a/drivers/hid/hid-asus.c +++ b/drivers/hid/hid-asus.c @@ -381,7 +381,7 @@ static int asus_raw_event(struct hid_device *hdev, return 0; } -static int asus_kbd_set_report(struct hid_device *hdev, u8 *buf, size_t buf_size) +static int asus_kbd_set_report(struct hid_device *hdev, const u8 *buf, size_t buf_size) { unsigned char *dmabuf; int ret; @@ -404,7 +404,7 @@ static int asus_kbd_set_report(struct hid_device *hdev, u8 *buf, size_t buf_size static int asus_kbd_init(struct hid_device *hdev) { - u8 buf[] = { FEATURE_KBD_REPORT_ID, 0x41, 0x53, 0x55, 0x53, 0x20, 0x54, + const u8 buf[] = { FEATURE_KBD_REPORT_ID, 0x41, 0x53, 0x55, 0x53, 0x20, 0x54, 0x65, 0x63, 0x68, 0x2e, 0x49, 0x6e, 0x63, 0x2e, 0x00 }; int ret; @@ -418,7 +418,7 @@ static int asus_kbd_init(struct hid_device *hdev) static int asus_kbd_get_functions(struct hid_device *hdev, unsigned char *kbd_func) { - u8 buf[] = { FEATURE_KBD_REPORT_ID, 0x05, 0x20, 0x31, 0x00, 0x08 }; + const u8 buf[] = { FEATURE_KBD_REPORT_ID, 0x05, 0x20, 0x31, 0x00, 0x08 }; u8 *readbuf; int ret; @@ -449,7 +449,7 @@ static int asus_kbd_get_functions(struct hid_device *hdev, static int rog_nkey_led_init(struct hid_device *hdev) { - u8 buf_init_start[] = { FEATURE_KBD_LED_REPORT_ID1, 0xB9 }; + const u8 buf_init_start[] = { FEATURE_KBD_LED_REPORT_ID1, 0xB9 }; u8 buf_init2[] = { FEATURE_KBD_LED_REPORT_ID1, 0x41, 0x53, 0x55, 0x53, 0x20, 0x54, 0x65, 0x63, 0x68, 0x2e, 0x49, 0x6e, 0x63, 0x2e, 0x00 }; u8 buf_init3[] = { FEATURE_KBD_LED_REPORT_ID1, From 545d55a3e0c8f4e59fbc9d92c99ef5725c799014 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 15 Dec 2023 11:24:50 +0000 Subject: [PATCH 1146/1397] perf: Fix perf_event_validate_size() lockdep splat commit 7e2c1e4b34f07d9aa8937fab88359d4a0fce468e upstream. When lockdep is enabled, the for_each_sibling_event(sibling, event) macro checks that event->ctx->mutex is held. When creating a new group leader event, we call perf_event_validate_size() on a partially initialized event where event->ctx is NULL, and so when for_each_sibling_event() attempts to check event->ctx->mutex, we get a splat, as reported by Lucas De Marchi: WARNING: CPU: 8 PID: 1471 at kernel/events/core.c:1950 __do_sys_perf_event_open+0xf37/0x1080 This only happens for a new event which is its own group_leader, and in this case there cannot be any sibling events. Thus it's safe to skip the check for siblings, which avoids having to make invasive and ugly changes to for_each_sibling_event(). Avoid the splat by bailing out early when the new event is its own group_leader. Fixes: 382c27f4ed28f803 ("perf: Fix perf_event_validate_size()") Closes: https://lore.kernel.org/lkml/20231214000620.3081018-1-lucas.demarchi@intel.com/ Closes: https://lore.kernel.org/lkml/ZXpm6gQ%2Fd59jGsuW@xpf.sh.intel.com/ Reported-by: Lucas De Marchi Reported-by: Pengfei Xu Signed-off-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20231215112450.3972309-1-mark.rutland@arm.com Signed-off-by: Greg Kroah-Hartman --- kernel/events/core.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index 252387b6ac8d..58ecb1c24387 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1947,6 +1947,16 @@ static bool perf_event_validate_size(struct perf_event *event) group_leader->nr_siblings + 1) > 16*1024) return false; + /* + * When creating a new group leader, group_leader->ctx is initialized + * after the size has been validated, but we cannot safely use + * for_each_sibling_event() until group_leader->ctx is set. A new group + * leader cannot have any siblings yet, so we can safely skip checking + * the non-existent siblings. + */ + if (event == group_leader) + return true; + for_each_sibling_event(sibling, group_leader) { if (__perf_event_read_size(sibling->attr.read_format, group_leader->nr_siblings + 1) > 16*1024) From 654461744af8c33ee6943b7937272e5a4945e5c9 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 15 Dec 2023 10:01:44 -0500 Subject: [PATCH 1147/1397] btrfs: do not allow non subvolume root targets for snapshot commit a8892fd71933126ebae3d60aec5918d4dceaae76 upstream. Our btrfs subvolume snapshot utility enforces that is the root of the subvolume, however this isn't enforced in the kernel. Update the kernel to also enforce this limitation to avoid problems with other users of this ioctl that don't have the appropriate checks in place. Reported-by: Martin Michaelis CC: stable@vger.kernel.org # 4.14+ Reviewed-by: Neal Gompa Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ioctl.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 0b120716aeb9..e611b3a3ad38 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1290,6 +1290,15 @@ static noinline int __btrfs_ioctl_snap_create(struct file *file, * are limited to own subvolumes only */ ret = -EPERM; + } else if (btrfs_ino(BTRFS_I(src_inode)) != BTRFS_FIRST_FREE_OBJECTID) { + /* + * Snapshots must be made with the src_inode referring + * to the subvolume inode, otherwise the permission + * checking above is useless because we may have + * permission on a lower directory but not the subvol + * itself. + */ + ret = -EINVAL; } else { ret = btrfs_mksnapshot(&file->f_path, idmap, name, namelen, From c1d2d084751dfd8a8dc71856f741a4e0170af115 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 6 Dec 2023 19:11:14 -0800 Subject: [PATCH 1148/1397] cxl/hdm: Fix dpa translation locking commit 6f5c4eca48ffe18307b4e1d375817691c9005c87 upstream. The helper, cxl_dpa_resource_start(), snapshots the dpa-address of an endpoint-decoder after acquiring the cxl_dpa_rwsem. However, it is sufficient to assert that cxl_dpa_rwsem is held rather than acquire it in the helper. Otherwise, it triggers multiple lockdep reports: 1/ Tracing callbacks are in an atomic context that can not acquire sleeping locks: BUG: sleeping function called from invalid context at kernel/locking/rwsem.c:1525 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 1288, name: bash preempt_count: 2, expected: 0 RCU nest depth: 0, expected: 0 [..] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS edk2-20230524-3.fc38 05/24/2023 Call Trace: dump_stack_lvl+0x71/0x90 __might_resched+0x1b2/0x2c0 down_read+0x1a/0x190 cxl_dpa_resource_start+0x15/0x50 [cxl_core] cxl_trace_hpa+0x122/0x300 [cxl_core] trace_event_raw_event_cxl_poison+0x1c9/0x2d0 [cxl_core] 2/ The rwsem is already held in the inject poison path: WARNING: possible recursive locking detected 6.7.0-rc2+ #12 Tainted: G W OE N -------------------------------------------- bash/1288 is trying to acquire lock: ffffffffc05f73d0 (cxl_dpa_rwsem){++++}-{3:3}, at: cxl_dpa_resource_start+0x15/0x50 [cxl_core] but task is already holding lock: ffffffffc05f73d0 (cxl_dpa_rwsem){++++}-{3:3}, at: cxl_inject_poison+0x7d/0x1e0 [cxl_core] [..] Call Trace: dump_stack_lvl+0x71/0x90 __might_resched+0x1b2/0x2c0 down_read+0x1a/0x190 cxl_dpa_resource_start+0x15/0x50 [cxl_core] cxl_trace_hpa+0x122/0x300 [cxl_core] trace_event_raw_event_cxl_poison+0x1c9/0x2d0 [cxl_core] __traceiter_cxl_poison+0x5c/0x80 [cxl_core] cxl_inject_poison+0x1bc/0x1e0 [cxl_core] This appears to have been an issue since the initial implementation and uncovered by the new cxl-poison.sh test [1]. That test is now passing with these changes. Fixes: 28a3ae4ff66c ("cxl/trace: Add an HPA to cxl_poison trace events") Link: http://lore.kernel.org/r/e4f2716646918135ddbadf4146e92abb659de734.1700615159.git.alison.schofield@intel.com [1] Cc: Cc: Alison Schofield Cc: Jonathan Cameron Cc: Dave Jiang Cc: Ira Weiny Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/cxl/core/hdm.c | 3 +-- drivers/cxl/core/port.c | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 64e86b786db5..22829267ccd8 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -373,10 +373,9 @@ resource_size_t cxl_dpa_resource_start(struct cxl_endpoint_decoder *cxled) { resource_size_t base = -1; - down_read(&cxl_dpa_rwsem); + lockdep_assert_held(&cxl_dpa_rwsem); if (cxled->dpa_res) base = cxled->dpa_res->start; - up_read(&cxl_dpa_rwsem); return base; } diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 6a75a3cb601e..1e0558d18b96 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -219,9 +219,9 @@ static ssize_t dpa_resource_show(struct device *dev, struct device_attribute *at char *buf) { struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev); - u64 base = cxl_dpa_resource_start(cxled); - return sysfs_emit(buf, "%#llx\n", base); + guard(rwsem_read)(&cxl_dpa_rwsem); + return sysfs_emit(buf, "%#llx\n", (u64)cxl_dpa_resource_start(cxled)); } static DEVICE_ATTR_RO(dpa_resource); From 40abc387459a0fc1bfd52feab095b2384a79c262 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 24 Nov 2023 19:01:36 +0100 Subject: [PATCH 1149/1397] soundwire: stream: fix NULL pointer dereference for multi_link commit e199bf52ffda8f98f129728d57244a9cd9ad5623 upstream. If bus is marked as multi_link, but number of masters in the stream is not higher than bus->hw_sync_min_links (bus->multi_link && m_rt_count >= bus->hw_sync_min_links), bank switching should not happen. The first part of do_bank_switch() code properly takes these conditions into account, but second part (sdw_ml_sync_bank_switch()) relies purely on bus->multi_link property. This is not balanced and leads to NULL pointer dereference: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 ... Call trace: wait_for_completion_timeout+0x124/0x1f0 do_bank_switch+0x370/0x6f8 sdw_prepare_stream+0x2d0/0x438 qcom_snd_sdw_prepare+0xa0/0x118 sm8450_snd_prepare+0x128/0x148 snd_soc_link_prepare+0x5c/0xe8 __soc_pcm_prepare+0x28/0x1ec dpcm_be_dai_prepare+0x1e0/0x2c0 dpcm_fe_dai_prepare+0x108/0x28c snd_pcm_do_prepare+0x44/0x68 snd_pcm_action_single+0x54/0xc0 snd_pcm_action_nonatomic+0xe4/0xec snd_pcm_prepare+0xc4/0x114 snd_pcm_common_ioctl+0x1154/0x1cc0 snd_pcm_ioctl+0x54/0x74 Fixes: ce6e74d008ff ("soundwire: Add support for multi link bank switch") Cc: stable@vger.kernel.org Signed-off-by: Krzysztof Kozlowski Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20231124180136.390621-1-krzysztof.kozlowski@linaro.org Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/soundwire/stream.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/soundwire/stream.c b/drivers/soundwire/stream.c index d77a8a0d42c8..68d54887992d 100644 --- a/drivers/soundwire/stream.c +++ b/drivers/soundwire/stream.c @@ -742,14 +742,15 @@ static int sdw_bank_switch(struct sdw_bus *bus, int m_rt_count) * sdw_ml_sync_bank_switch: Multilink register bank switch * * @bus: SDW bus instance + * @multi_link: whether this is a multi-link stream with hardware-based sync * * Caller function should free the buffers on error */ -static int sdw_ml_sync_bank_switch(struct sdw_bus *bus) +static int sdw_ml_sync_bank_switch(struct sdw_bus *bus, bool multi_link) { unsigned long time_left; - if (!bus->multi_link) + if (!multi_link) return 0; /* Wait for completion of transfer */ @@ -847,7 +848,7 @@ static int do_bank_switch(struct sdw_stream_runtime *stream) bus->bank_switch_timeout = DEFAULT_BANK_SWITCH_TIMEOUT; /* Check if bank switch was successful */ - ret = sdw_ml_sync_bank_switch(bus); + ret = sdw_ml_sync_bank_switch(bus, multi_link); if (ret < 0) { dev_err(bus->dev, "multi link bank switch failed: %d\n", ret); From 4f18d187fb2a99f181b1aab0322f3f8451255b6b Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Mon, 27 Nov 2023 14:33:13 +0800 Subject: [PATCH 1150/1397] ext4: prevent the normalized size from exceeding EXT_MAX_BLOCKS commit 2dcf5fde6dffb312a4bfb8ef940cea2d1f402e32 upstream. For files with logical blocks close to EXT_MAX_BLOCKS, the file size predicted in ext4_mb_normalize_request() may exceed EXT_MAX_BLOCKS. This can cause some blocks to be preallocated that will not be used. And after [Fixes], the following issue may be triggered: ========================================================= kernel BUG at fs/ext4/mballoc.c:4653! Internal error: Oops - BUG: 00000000f2000800 [#1] SMP CPU: 1 PID: 2357 Comm: xfs_io 6.7.0-rc2-00195-g0f5cc96c367f Hardware name: linux,dummy-virt (DT) pc : ext4_mb_use_inode_pa+0x148/0x208 lr : ext4_mb_use_inode_pa+0x98/0x208 Call trace: ext4_mb_use_inode_pa+0x148/0x208 ext4_mb_new_inode_pa+0x240/0x4a8 ext4_mb_use_best_found+0x1d4/0x208 ext4_mb_try_best_found+0xc8/0x110 ext4_mb_regular_allocator+0x11c/0xf48 ext4_mb_new_blocks+0x790/0xaa8 ext4_ext_map_blocks+0x7cc/0xd20 ext4_map_blocks+0x170/0x600 ext4_iomap_begin+0x1c0/0x348 ========================================================= Here is a calculation when adjusting ac_b_ex in ext4_mb_new_inode_pa(): ex.fe_logical = orig_goal_end - EXT4_C2B(sbi, ex.fe_len); if (ac->ac_o_ex.fe_logical >= ex.fe_logical) goto adjust_bex; The problem is that when orig_goal_end is subtracted from ac_b_ex.fe_len it is still greater than EXT_MAX_BLOCKS, which causes ex.fe_logical to overflow to a very small value, which ultimately triggers a BUG_ON in ext4_mb_new_inode_pa() because pa->pa_free < len. The last logical block of an actual write request does not exceed EXT_MAX_BLOCKS, so in ext4_mb_normalize_request() also avoids normalizing the last logical block to exceed EXT_MAX_BLOCKS to avoid the above issue. The test case in [Link] can reproduce the above issue with 64k block size. Link: https://patchwork.kernel.org/project/fstests/list/?series=804003 Cc: # 6.4 Fixes: 93cdf49f6eca ("ext4: Fix best extent lstart adjustment logic in ext4_mb_new_inode_pa()") Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20231127063313.3734294-1-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/mballoc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index a7b8558c0d09..d70f7a06bab4 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4489,6 +4489,10 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, start = max(start, rounddown(ac->ac_o_ex.fe_logical, (ext4_lblk_t)EXT4_BLOCKS_PER_GROUP(ac->ac_sb))); + /* avoid unnecessary preallocation that may trigger assertions */ + if (start + size > EXT_MAX_BLOCKS) + size = EXT_MAX_BLOCKS - start; + /* don't cover already allocated blocks in selected range */ if (ar->pleft && start <= ar->lleft) { size -= ar->lleft + 1 - start; From d228e98dfacb205d4e9ae181f5a8f467e28fc777 Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Fri, 8 Dec 2023 18:01:44 -0800 Subject: [PATCH 1151/1397] Revert "selftests: error out if kernel header files are not yet built" commit 43e8832fed08438e2a27afed9bac21acd0ceffe5 upstream. This reverts commit 9fc96c7c19df ("selftests: error out if kernel header files are not yet built"). It turns out that requiring the kernel headers to be built as a prerequisite to building selftests, does not work in many cases. For example, Peter Zijlstra writes: "My biggest beef with the whole thing is that I simply do not want to use 'make headers', it doesn't work for me. I have a ton of output directories and I don't care to build tools into the output dirs, in fact some of them flat out refuse to work that way (bpf comes to mind)." [1] Therefore, stop erroring out on the selftests build. Additional patches will be required in order to change over to not requiring the kernel headers. [1] https://lore.kernel.org/20231208221007.GO28727@noisy.programming.kicks-ass.net Link: https://lkml.kernel.org/r/20231209020144.244759-1-jhubbard@nvidia.com Fixes: 9fc96c7c19df ("selftests: error out if kernel header files are not yet built") Signed-off-by: John Hubbard Cc: Anders Roxell Cc: Muhammad Usama Anjum Cc: David Hildenbrand Cc: Peter Xu Cc: Jonathan Corbet Cc: Nathan Chancellor Cc: Shuah Khan Cc: Peter Zijlstra Cc: Marcos Paulo de Souza Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/Makefile | 21 +---------------- tools/testing/selftests/lib.mk | 40 +++----------------------------- 2 files changed, 4 insertions(+), 57 deletions(-) diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 1a21d6beebc6..697f13bbbc32 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -152,12 +152,10 @@ ifneq ($(KBUILD_OUTPUT),) abs_objtree := $(realpath $(abs_objtree)) BUILD := $(abs_objtree)/kselftest KHDR_INCLUDES := -isystem ${abs_objtree}/usr/include - KHDR_DIR := ${abs_objtree}/usr/include else BUILD := $(CURDIR) abs_srctree := $(shell cd $(top_srcdir) && pwd) KHDR_INCLUDES := -isystem ${abs_srctree}/usr/include - KHDR_DIR := ${abs_srctree}/usr/include DEFAULT_INSTALL_HDR_PATH := 1 endif @@ -171,7 +169,7 @@ export KHDR_INCLUDES # all isn't the first target in the file. .DEFAULT_GOAL := all -all: kernel_header_files +all: @ret=1; \ for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ @@ -182,23 +180,6 @@ all: kernel_header_files ret=$$((ret * $$?)); \ done; exit $$ret; -kernel_header_files: - @ls $(KHDR_DIR)/linux/*.h >/dev/null 2>/dev/null; \ - if [ $$? -ne 0 ]; then \ - RED='\033[1;31m'; \ - NOCOLOR='\033[0m'; \ - echo; \ - echo -e "$${RED}error$${NOCOLOR}: missing kernel header files."; \ - echo "Please run this and try again:"; \ - echo; \ - echo " cd $(top_srcdir)"; \ - echo " make headers"; \ - echo; \ - exit 1; \ - fi - -.PHONY: kernel_header_files - run_tests: all @for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 118e0964bda9..aa646e0661f3 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -44,26 +44,10 @@ endif selfdir = $(realpath $(dir $(filter %/lib.mk,$(MAKEFILE_LIST)))) top_srcdir = $(selfdir)/../../.. -ifeq ("$(origin O)", "command line") - KBUILD_OUTPUT := $(O) +ifeq ($(KHDR_INCLUDES),) +KHDR_INCLUDES := -isystem $(top_srcdir)/usr/include endif -ifneq ($(KBUILD_OUTPUT),) - # Make's built-in functions such as $(abspath ...), $(realpath ...) cannot - # expand a shell special character '~'. We use a somewhat tedious way here. - abs_objtree := $(shell cd $(top_srcdir) && mkdir -p $(KBUILD_OUTPUT) && cd $(KBUILD_OUTPUT) && pwd) - $(if $(abs_objtree),, \ - $(error failed to create output directory "$(KBUILD_OUTPUT)")) - # $(realpath ...) resolves symlinks - abs_objtree := $(realpath $(abs_objtree)) - KHDR_DIR := ${abs_objtree}/usr/include -else - abs_srctree := $(shell cd $(top_srcdir) && pwd) - KHDR_DIR := ${abs_srctree}/usr/include -endif - -KHDR_INCLUDES := -isystem $(KHDR_DIR) - # The following are built by lib.mk common compile rules. # TEST_CUSTOM_PROGS should be used by tests that require # custom build rule and prevent common build rule use. @@ -74,25 +58,7 @@ TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS)) TEST_GEN_PROGS_EXTENDED := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS_EXTENDED)) TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES)) -all: kernel_header_files $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) \ - $(TEST_GEN_FILES) - -kernel_header_files: - @ls $(KHDR_DIR)/linux/*.h >/dev/null 2>/dev/null; \ - if [ $$? -ne 0 ]; then \ - RED='\033[1;31m'; \ - NOCOLOR='\033[0m'; \ - echo; \ - echo -e "$${RED}error$${NOCOLOR}: missing kernel header files."; \ - echo "Please run this and try again:"; \ - echo; \ - echo " cd $(top_srcdir)"; \ - echo " make headers"; \ - echo; \ - exit 1; \ - fi - -.PHONY: kernel_header_files +all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) define RUN_TESTS BASE_DIR="$(selfdir)"; \ From 2c8a21a124cab4391254680cda796036d061d9b9 Mon Sep 17 00:00:00 2001 From: James Houghton Date: Mon, 4 Dec 2023 17:26:46 +0000 Subject: [PATCH 1152/1397] arm64: mm: Always make sw-dirty PTEs hw-dirty in pte_modify commit 3c0696076aad60a2f04c019761921954579e1b0e upstream. It is currently possible for a userspace application to enter an infinite page fault loop when using HugeTLB pages implemented with contiguous PTEs when HAFDBS is not available. This happens because: 1. The kernel may sometimes write PTEs that are sw-dirty but hw-clean (PTE_DIRTY | PTE_RDONLY | PTE_WRITE). 2. If, during a write, the CPU uses a sw-dirty, hw-clean PTE in handling the memory access on a system without HAFDBS, we will get a page fault. 3. HugeTLB will check if it needs to update the dirty bits on the PTE. For contiguous PTEs, it will check to see if the pgprot bits need updating. In this case, HugeTLB wants to write a sequence of sw-dirty, hw-dirty PTEs, but it finds that all the PTEs it is about to overwrite are all pte_dirty() (pte_sw_dirty() => pte_dirty()), so it thinks no update is necessary. We can get the kernel to write a sw-dirty, hw-clean PTE with the following steps (showing the relevant VMA flags and pgprot bits): i. Create a valid, writable contiguous PTE. VMA vmflags: VM_SHARED | VM_READ | VM_WRITE VMA pgprot bits: PTE_RDONLY | PTE_WRITE PTE pgprot bits: PTE_DIRTY | PTE_WRITE ii. mprotect the VMA to PROT_NONE. VMA vmflags: VM_SHARED VMA pgprot bits: PTE_RDONLY PTE pgprot bits: PTE_DIRTY | PTE_RDONLY iii. mprotect the VMA back to PROT_READ | PROT_WRITE. VMA vmflags: VM_SHARED | VM_READ | VM_WRITE VMA pgprot bits: PTE_RDONLY | PTE_WRITE PTE pgprot bits: PTE_DIRTY | PTE_WRITE | PTE_RDONLY Make it impossible to create a writeable sw-dirty, hw-clean PTE with pte_modify(). Such a PTE should be impossible to create, and there may be places that assume that pte_dirty() implies pte_hw_dirty(). Signed-off-by: James Houghton Fixes: 031e6e6b4e12 ("arm64: hugetlb: Avoid unnecessary clearing in huge_ptep_set_access_flags") Cc: Acked-by: Will Deacon Reviewed-by: Ryan Roberts Link: https://lore.kernel.org/r/20231204172646.2541916-3-jthoughton@google.com Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/pgtable.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 7f7d9b1df4e5..07bdf5dd8ebe 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -826,6 +826,12 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) pte = set_pte_bit(pte, __pgprot(PTE_DIRTY)); pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask); + /* + * If we end up clearing hw dirtiness for a sw-dirty PTE, set hardware + * dirtiness again. + */ + if (pte_sw_dirty(pte)) + pte = pte_mkdirty(pte); return pte; } From 28b36426b83e35dbfbf3d2104c9aac61c1d8f5df Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Wed, 6 Dec 2023 13:37:18 +0100 Subject: [PATCH 1153/1397] team: Fix use-after-free when an option instance allocation fails commit c12296bbecc488623b7d1932080e394d08f3226b upstream. In __team_options_register, team_options are allocated and appended to the team's option_list. If one option instance allocation fails, the "inst_rollback" cleanup path frees the previously allocated options but doesn't remove them from the team's option_list. This leaves dangling pointers that can be dereferenced later by other parts of the team driver that iterate over options. This patch fixes the cleanup path to remove the dangling pointers from the list. As far as I can tell, this uaf doesn't have much security implications since it would be fairly hard to exploit (an attacker would need to make the allocation of that specific small object fail) but it's still nice to fix. Cc: stable@vger.kernel.org Fixes: 80f7c6683fe0 ("team: add support for per-port options") Signed-off-by: Florent Revest Reviewed-by: Jiri Pirko Reviewed-by: Hangbin Liu Link: https://lore.kernel.org/r/20231206123719.1963153-1-revest@chromium.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/team/team.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 508d9a392ab1..f575f225d417 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -281,8 +281,10 @@ static int __team_options_register(struct team *team, return 0; inst_rollback: - for (i--; i >= 0; i--) + for (i--; i >= 0; i--) { __team_option_inst_del_option(team, dst_opts[i]); + list_del(&dst_opts[i]->list); + } i = option_count; alloc_rollback: From 3aae4ef4d799fb3d0381157640fdb251008cf0ae Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 7 Dec 2023 10:14:41 -0500 Subject: [PATCH 1154/1397] drm/amdgpu/sdma5.2: add begin/end_use ring callbacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ab4750332dbe535243def5dcebc24ca00c1f98ac upstream. Add begin/end_use ring callbacks to disallow GFXOFF when SDMA work is submitted and allow it again afterward. This should avoid corner cases where GFXOFF is erroneously entered when SDMA is still active. For now just allow/disallow GFXOFF in the begin and end helpers until we root cause the issue. This should not impact power as SDMA usage is pretty minimal and GFXOSS should not be active when SDMA is active anyway, this just makes it explicit. v2: move everything into sdma5.2 code. No reason for this to be generic at this point. v3: Add comments in new code Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2220 Reviewed-by: Mario Limonciello (v1) Tested-by: Mario Limonciello (v1) Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 5.15+ Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 28 ++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 2b3ebebc4299..0f930fd8a383 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -1651,6 +1651,32 @@ static void sdma_v5_2_get_clockgating_state(void *handle, u64 *flags) *flags |= AMD_CG_SUPPORT_SDMA_LS; } +static void sdma_v5_2_ring_begin_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + /* SDMA 5.2.3 (RMB) FW doesn't seem to properly + * disallow GFXOFF in some cases leading to + * hangs in SDMA. Disallow GFXOFF while SDMA is active. + * We can probably just limit this to 5.2.3, + * but it shouldn't hurt for other parts since + * this GFXOFF will be disallowed anyway when SDMA is + * active, this just makes it explicit. + */ + amdgpu_gfx_off_ctrl(adev, false); +} + +static void sdma_v5_2_ring_end_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + /* SDMA 5.2.3 (RMB) FW doesn't seem to properly + * disallow GFXOFF in some cases leading to + * hangs in SDMA. Allow GFXOFF when SDMA is complete. + */ + amdgpu_gfx_off_ctrl(adev, true); +} + const struct amd_ip_funcs sdma_v5_2_ip_funcs = { .name = "sdma_v5_2", .early_init = sdma_v5_2_early_init, @@ -1698,6 +1724,8 @@ static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = { .test_ib = sdma_v5_2_ring_test_ib, .insert_nop = sdma_v5_2_ring_insert_nop, .pad_ib = sdma_v5_2_ring_pad_ib, + .begin_use = sdma_v5_2_ring_begin_use, + .end_use = sdma_v5_2_ring_end_use, .emit_wreg = sdma_v5_2_ring_emit_wreg, .emit_reg_wait = sdma_v5_2_ring_emit_reg_wait, .emit_reg_write_reg_wait = sdma_v5_2_ring_emit_reg_write_reg_wait, From 03e63e497a404f444f5af25e120739da3a4f03ff Mon Sep 17 00:00:00 2001 From: Stuart Lee Date: Fri, 10 Nov 2023 09:29:14 +0800 Subject: [PATCH 1155/1397] drm/mediatek: Fix access violation in mtk_drm_crtc_dma_dev_get commit b6961d187fcd138981b8707dac87b9fcdbfe75d1 upstream. Add error handling to check NULL input in mtk_drm_crtc_dma_dev_get function. While display path is not configured correctly, none of crtc is established. So the caller of mtk_drm_crtc_dma_dev_get may pass input parameter *crtc as NULL, Which may cause coredump when we try to get the container of NULL pointer. Fixes: cb1d6bcca542 ("drm/mediatek: Add dma dev get function") Signed-off-by: Stuart Lee Cc: stable@vger.kernel.org Reviewed-by: AngeloGioacchino DEl Regno Tested-by: Macpaul Lin Link: https://patchwork.kernel.org/project/dri-devel/patch/20231110012914.14884-2-stuart.lee@mediatek.com/ Signed-off-by: Chun-Kuang Hu Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/mediatek/mtk_drm_crtc.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index a033da279a94..80e34b39a6b1 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -885,7 +885,14 @@ static int mtk_drm_crtc_init_comp_planes(struct drm_device *drm_dev, struct device *mtk_drm_crtc_dma_dev_get(struct drm_crtc *crtc) { - struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc); + struct mtk_drm_crtc *mtk_crtc = NULL; + + if (!crtc) + return NULL; + + mtk_crtc = to_mtk_crtc(crtc); + if (!mtk_crtc) + return NULL; return mtk_crtc->dma_dev; } From 9127515bf9cd196d71ffe33bd7791bf272bbff0c Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Mon, 6 Nov 2023 14:48:32 +0100 Subject: [PATCH 1156/1397] dmaengine: stm32-dma: avoid bitfield overflow assertion commit 54bed6bafa0f38daf9697af50e3aff5ff1354fe1 upstream. stm32_dma_get_burst() returns a negative error for invalid input, which gets turned into a large u32 value in stm32_dma_prep_dma_memcpy() that in turn triggers an assertion because it does not fit into a two-bit field: drivers/dma/stm32-dma.c: In function 'stm32_dma_prep_dma_memcpy': include/linux/compiler_types.h:354:38: error: call to '__compiletime_assert_282' declared with attribute error: FIELD_PREP: value too large for the field _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) ^ include/linux/compiler_types.h:335:4: note: in definition of macro '__compiletime_assert' prefix ## suffix(); \ ^~~~~~ include/linux/compiler_types.h:354:2: note: in expansion of macro '_compiletime_assert' _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) ^~~~~~~~~~~~~~~~~~~ include/linux/build_bug.h:39:37: note: in expansion of macro 'compiletime_assert' #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg) ^~~~~~~~~~~~~~~~~~ include/linux/bitfield.h:68:3: note: in expansion of macro 'BUILD_BUG_ON_MSG' BUILD_BUG_ON_MSG(__builtin_constant_p(_val) ? \ ^~~~~~~~~~~~~~~~ include/linux/bitfield.h:114:3: note: in expansion of macro '__BF_FIELD_CHECK' __BF_FIELD_CHECK(_mask, 0ULL, _val, "FIELD_PREP: "); \ ^~~~~~~~~~~~~~~~ drivers/dma/stm32-dma.c:1237:4: note: in expansion of macro 'FIELD_PREP' FIELD_PREP(STM32_DMA_SCR_PBURST_MASK, dma_burst) | ^~~~~~~~~~ As an easy workaround, assume the error can happen, so try to handle this by failing stm32_dma_prep_dma_memcpy() before the assertion. It replicates what is done in stm32_dma_set_xfer_param() where stm32_dma_get_burst() is also used. Fixes: 1c32d6c37cc2 ("dmaengine: stm32-dma: use bitfield helpers") Fixes: a2b6103b7a8a ("dmaengine: stm32-dma: Improve memory burst management") Signed-off-by: Arnd Bergmann Signed-off-by: Amelie Delaunay Cc: stable@vger.kernel.org Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202311060135.Q9eMnpCL-lkp@intel.com/ Link: https://lore.kernel.org/r/20231106134832.1470305-1-amelie.delaunay@foss.st.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/stm32-dma.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c index 0b30151fb45c..9840594a6aaa 100644 --- a/drivers/dma/stm32-dma.c +++ b/drivers/dma/stm32-dma.c @@ -1249,8 +1249,8 @@ static struct dma_async_tx_descriptor *stm32_dma_prep_dma_memcpy( enum dma_slave_buswidth max_width; struct stm32_dma_desc *desc; size_t xfer_count, offset; - u32 num_sgs, best_burst, dma_burst, threshold; - int i; + u32 num_sgs, best_burst, threshold; + int dma_burst, i; num_sgs = DIV_ROUND_UP(len, STM32_DMA_ALIGNED_MAX_DATA_ITEMS); desc = kzalloc(struct_size(desc, sg_req, num_sgs), GFP_NOWAIT); @@ -1268,6 +1268,10 @@ static struct dma_async_tx_descriptor *stm32_dma_prep_dma_memcpy( best_burst = stm32_dma_get_best_burst(len, STM32_DMA_MAX_BURST, threshold, max_width); dma_burst = stm32_dma_get_burst(chan, best_burst); + if (dma_burst < 0) { + kfree(desc); + return NULL; + } stm32_dma_clear_reg(&desc->sg_req[i].chan_reg); desc->sg_req[i].chan_reg.dma_scr = From ed50e07d6a8eae1c079b637926811d7500e8dd9e Mon Sep 17 00:00:00 2001 From: Frank Li Date: Mon, 27 Nov 2023 16:43:25 -0500 Subject: [PATCH 1157/1397] dmaengine: fsl-edma: fix DMA channel leak in eDMAv4 commit 4ee632c82d2dbb9e2dcc816890ef182a151cbd99 upstream. Allocate channel count consistently increases due to a missing source ID (srcid) cleanup in the fsl_edma_free_chan_resources() function at imx93 eDMAv4. Reset 'srcid' at fsl_edma_free_chan_resources(). Cc: stable@vger.kernel.org Fixes: 72f5801a4e2b ("dmaengine: fsl-edma: integrate v3 support") Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20231127214325.2477247-1-Frank.Li@nxp.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/fsl-edma-common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/fsl-edma-common.c b/drivers/dma/fsl-edma-common.c index 6a3abe5b1790..b53f46245c37 100644 --- a/drivers/dma/fsl-edma-common.c +++ b/drivers/dma/fsl-edma-common.c @@ -828,6 +828,7 @@ void fsl_edma_free_chan_resources(struct dma_chan *chan) dma_pool_destroy(fsl_chan->tcd_pool); fsl_chan->tcd_pool = NULL; fsl_chan->is_sw = false; + fsl_chan->srcid = 0; } void fsl_edma_cleanup_vchan(struct dma_device *dmadev) From b2ce691b452f2731bd7d30c5b05333bf2ae97a0d Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Thu, 7 Dec 2023 23:14:04 -0700 Subject: [PATCH 1158/1397] mm/mglru: fix underprotected page cache commit 081488051d28d32569ebb7c7a23572778b2e7d57 upstream. Unmapped folios accessed through file descriptors can be underprotected. Those folios are added to the oldest generation based on: 1. The fact that they are less costly to reclaim (no need to walk the rmap and flush the TLB) and have less impact on performance (don't cause major PFs and can be non-blocking if needed again). 2. The observation that they are likely to be single-use. E.g., for client use cases like Android, its apps parse configuration files and store the data in heap (anon); for server use cases like MySQL, it reads from InnoDB files and holds the cached data for tables in buffer pools (anon). However, the oldest generation can be very short lived, and if so, it doesn't provide the PID controller with enough time to respond to a surge of refaults. (Note that the PID controller uses weighted refaults and those from evicted generations only take a half of the whole weight.) In other words, for a short lived generation, the moving average smooths out the spike quickly. To fix the problem: 1. For folios that are already on LRU, if they can be beyond the tracking range of tiers, i.e., five accesses through file descriptors, move them to the second oldest generation to give them more time to age. (Note that tiers are used by the PID controller to statistically determine whether folios accessed multiple times through file descriptors are worth protecting.) 2. When adding unmapped folios to LRU, adjust the placement of them so that they are not too close to the tail. The effect of this is similar to the above. On Android, launching 55 apps sequentially: Before After Change workingset_refault_anon 25641024 25598972 0% workingset_refault_file 115016834 106178438 -8% Link: https://lkml.kernel.org/r/20231208061407.2125867-1-yuzhao@google.com Fixes: ac35a4902374 ("mm: multi-gen LRU: minimal implementation") Signed-off-by: Yu Zhao Reported-by: Charan Teja Kalla Tested-by: Kalesh Singh Cc: T.J. Mercier Cc: Kairui Song Cc: Hillf Danton Cc: Jaroslav Pulchart Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- include/linux/mm_inline.h | 23 ++++++++++++++--------- mm/vmscan.c | 2 +- mm/workingset.c | 6 +++--- 3 files changed, 18 insertions(+), 13 deletions(-) diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 8148b30a9df1..96b1c157554c 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -231,22 +231,27 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, if (folio_test_unevictable(folio) || !lrugen->enabled) return false; /* - * There are three common cases for this page: - * 1. If it's hot, e.g., freshly faulted in or previously hot and - * migrated, add it to the youngest generation. - * 2. If it's cold but can't be evicted immediately, i.e., an anon page - * not in swapcache or a dirty page pending writeback, add it to the - * second oldest generation. - * 3. Everything else (clean, cold) is added to the oldest generation. + * There are four common cases for this page: + * 1. If it's hot, i.e., freshly faulted in, add it to the youngest + * generation, and it's protected over the rest below. + * 2. If it can't be evicted immediately, i.e., a dirty page pending + * writeback, add it to the second youngest generation. + * 3. If it should be evicted first, e.g., cold and clean from + * folio_rotate_reclaimable(), add it to the oldest generation. + * 4. Everything else falls between 2 & 3 above and is added to the + * second oldest generation if it's considered inactive, or the + * oldest generation otherwise. See lru_gen_is_active(). */ if (folio_test_active(folio)) seq = lrugen->max_seq; else if ((type == LRU_GEN_ANON && !folio_test_swapcache(folio)) || (folio_test_reclaim(folio) && (folio_test_dirty(folio) || folio_test_writeback(folio)))) - seq = lrugen->min_seq[type] + 1; - else + seq = lrugen->max_seq - 1; + else if (reclaiming || lrugen->min_seq[type] + MIN_NR_GENS >= lrugen->max_seq) seq = lrugen->min_seq[type]; + else + seq = lrugen->min_seq[type] + 1; gen = lru_gen_from_seq(seq); flags = (gen + 1UL) << LRU_GEN_PGOFF; diff --git a/mm/vmscan.c b/mm/vmscan.c index 6f13394b112e..65ef14146c85 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -4933,7 +4933,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c } /* protected */ - if (tier > tier_idx) { + if (tier > tier_idx || refs == BIT(LRU_REFS_WIDTH)) { int hist = lru_hist_from_seq(lrugen->min_seq[type]); gen = folio_inc_gen(lruvec, folio, false); diff --git a/mm/workingset.c b/mm/workingset.c index da58a26d0d4d..2559a1f2fc1c 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -313,10 +313,10 @@ static void lru_gen_refault(struct folio *folio, void *shadow) * 1. For pages accessed through page tables, hotter pages pushed out * hot pages which refaulted immediately. * 2. For pages accessed multiple times through file descriptors, - * numbers of accesses might have been out of the range. + * they would have been protected by sort_folio(). */ - if (lru_gen_in_fault() || refs == BIT(LRU_REFS_WIDTH)) { - folio_set_workingset(folio); + if (lru_gen_in_fault() || refs >= BIT(LRU_REFS_WIDTH) - 1) { + set_mask_bits(&folio->flags, 0, LRU_REFS_MASK | BIT(PG_workingset)); mod_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + type, delta); } unlock: From c5f67b7e847490d41219f1dfe1b1610e88a997e4 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Thu, 7 Dec 2023 23:14:05 -0700 Subject: [PATCH 1159/1397] mm/mglru: try to stop at high watermarks commit 5095a2b23987d3c3c47dd16b3d4080e2733b8bb9 upstream. The initial MGLRU patchset didn't include the memcg LRU support, and it relied on should_abort_scan(), added by commit f76c83378851 ("mm: multi-gen LRU: optimize multiple memcgs"), to "backoff to avoid overshooting their aggregate reclaim target by too much". Later on when the memcg LRU was added, should_abort_scan() was deemed unnecessary, and the test results [1] showed no side effects after it was removed by commit a579086c99ed ("mm: multi-gen LRU: remove eviction fairness safeguard"). However, that test used memory.reclaim, which sets nr_to_reclaim to SWAP_CLUSTER_MAX. So it can overshoot only by SWAP_CLUSTER_MAX-1 pages, i.e., from nr_reclaimed=nr_to_reclaim-1 to nr_reclaimed=nr_to_reclaim+SWAP_CLUSTER_MAX-1. Compared with the batch size kswapd sets to nr_to_reclaim, SWAP_CLUSTER_MAX is tiny. Therefore that test isn't able to reproduce the worst case scenario, i.e., kswapd overshooting GBs on large systems and "consuming 100% CPU" (see the Closes tag). Bring back a simplified version of should_abort_scan() on top of the memcg LRU, so that kswapd stops when all eligible zones are above their respective high watermarks plus a small delta to lower the chance of KSWAPD_HIGH_WMARK_HIT_QUICKLY. Note that this only applies to order-0 reclaim, meaning compaction-induced reclaim can still run wild (which is a different problem). On Android, launching 55 apps sequentially: Before After Change pgpgin 838377172 802955040 -4% pgpgout 38037080 34336300 -10% [1] https://lore.kernel.org/20221222041905.2431096-1-yuzhao@google.com/ Link: https://lkml.kernel.org/r/20231208061407.2125867-2-yuzhao@google.com Fixes: a579086c99ed ("mm: multi-gen LRU: remove eviction fairness safeguard") Signed-off-by: Yu Zhao Reported-by: Charan Teja Kalla Reported-by: Jaroslav Pulchart Closes: https://lore.kernel.org/CAK8fFZ4DY+GtBA40Pm7Nn5xCHy+51w3sfxPqkqpqakSXYyX+Wg@mail.gmail.com/ Tested-by: Jaroslav Pulchart Tested-by: Kalesh Singh Cc: Hillf Danton Cc: Kairui Song Cc: T.J. Mercier Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/vmscan.c | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 65ef14146c85..a4f599ea3e06 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -5341,20 +5341,41 @@ static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, bool return try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false) ? -1 : 0; } -static unsigned long get_nr_to_reclaim(struct scan_control *sc) +static bool should_abort_scan(struct lruvec *lruvec, struct scan_control *sc) { + int i; + enum zone_watermarks mark; + /* don't abort memcg reclaim to ensure fairness */ if (!root_reclaim(sc)) - return -1; + return false; + + if (sc->nr_reclaimed >= max(sc->nr_to_reclaim, compact_gap(sc->order))) + return true; + + /* check the order to exclude compaction-induced reclaim */ + if (!current_is_kswapd() || sc->order) + return false; - return max(sc->nr_to_reclaim, compact_gap(sc->order)); + mark = sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING ? + WMARK_PROMO : WMARK_HIGH; + + for (i = 0; i <= sc->reclaim_idx; i++) { + struct zone *zone = lruvec_pgdat(lruvec)->node_zones + i; + unsigned long size = wmark_pages(zone, mark) + MIN_LRU_BATCH; + + if (managed_zone(zone) && !zone_watermark_ok(zone, 0, size, sc->reclaim_idx, 0)) + return false; + } + + /* kswapd should abort if all eligible zones are safe */ + return true; } static bool try_to_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) { long nr_to_scan; unsigned long scanned = 0; - unsigned long nr_to_reclaim = get_nr_to_reclaim(sc); int swappiness = get_swappiness(lruvec, sc); /* clean file folios are more likely to exist */ @@ -5376,7 +5397,7 @@ static bool try_to_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) if (scanned >= nr_to_scan) break; - if (sc->nr_reclaimed >= nr_to_reclaim) + if (should_abort_scan(lruvec, sc)) break; cond_resched(); @@ -5437,7 +5458,6 @@ static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc) struct lru_gen_folio *lrugen; struct mem_cgroup *memcg; const struct hlist_nulls_node *pos; - unsigned long nr_to_reclaim = get_nr_to_reclaim(sc); bin = first_bin = get_random_u32_below(MEMCG_NR_BINS); restart: @@ -5470,7 +5490,7 @@ static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc) rcu_read_lock(); - if (sc->nr_reclaimed >= nr_to_reclaim) + if (should_abort_scan(lruvec, sc)) break; } @@ -5481,7 +5501,7 @@ static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc) mem_cgroup_put(memcg); - if (sc->nr_reclaimed >= nr_to_reclaim) + if (!is_a_nulls(pos)) return; /* restart if raced with lru_gen_rotate_memcg() */ From 6b131c2a2875682bc5ea0d4b15ba76775aa3ea80 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Thu, 7 Dec 2023 23:14:06 -0700 Subject: [PATCH 1160/1397] mm/mglru: respect min_ttl_ms with memcgs commit 8aa420617918d12d1f5d55030a503c9418e73c2c upstream. While investigating kswapd "consuming 100% CPU" [1] (also see "mm/mglru: try to stop at high watermarks"), it was discovered that the memcg LRU can breach the thrashing protection imposed by min_ttl_ms. Before the memcg LRU: kswapd() shrink_node_memcgs() mem_cgroup_iter() inc_max_seq() // always hit a different memcg lru_gen_age_node() mem_cgroup_iter() check the timestamp of the oldest generation After the memcg LRU: kswapd() shrink_many() restart: iterate the memcg LRU: inc_max_seq() // occasionally hit the same memcg if raced with lru_gen_rotate_memcg(): goto restart lru_gen_age_node() mem_cgroup_iter() check the timestamp of the oldest generation Specifically, when the restart happens in shrink_many(), it needs to stick with the (memcg LRU) generation it began with. In other words, it should neither re-read memcg_lru->seq nor age an lruvec of a different generation. Otherwise it can hit the same memcg multiple times without giving lru_gen_age_node() a chance to check the timestamp of that memcg's oldest generation (against min_ttl_ms). [1] https://lore.kernel.org/CAK8fFZ4DY+GtBA40Pm7Nn5xCHy+51w3sfxPqkqpqakSXYyX+Wg@mail.gmail.com/ Link: https://lkml.kernel.org/r/20231208061407.2125867-3-yuzhao@google.com Fixes: e4dde56cd208 ("mm: multi-gen LRU: per-node lru_gen_folio lists") Signed-off-by: Yu Zhao Tested-by: T.J. Mercier Cc: Charan Teja Kalla Cc: Hillf Danton Cc: Jaroslav Pulchart Cc: Kairui Song Cc: Kalesh Singh Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- include/linux/mmzone.h | 30 +++++++++++++++++------------- mm/vmscan.c | 30 ++++++++++++++++-------------- 2 files changed, 33 insertions(+), 27 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 4106fbc5b4b3..e57d93c51abe 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -505,33 +505,37 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw); * the old generation, is incremented when all its bins become empty. * * There are four operations: - * 1. MEMCG_LRU_HEAD, which moves an memcg to the head of a random bin in its + * 1. MEMCG_LRU_HEAD, which moves a memcg to the head of a random bin in its * current generation (old or young) and updates its "seg" to "head"; - * 2. MEMCG_LRU_TAIL, which moves an memcg to the tail of a random bin in its + * 2. MEMCG_LRU_TAIL, which moves a memcg to the tail of a random bin in its * current generation (old or young) and updates its "seg" to "tail"; - * 3. MEMCG_LRU_OLD, which moves an memcg to the head of a random bin in the old + * 3. MEMCG_LRU_OLD, which moves a memcg to the head of a random bin in the old * generation, updates its "gen" to "old" and resets its "seg" to "default"; - * 4. MEMCG_LRU_YOUNG, which moves an memcg to the tail of a random bin in the + * 4. MEMCG_LRU_YOUNG, which moves a memcg to the tail of a random bin in the * young generation, updates its "gen" to "young" and resets its "seg" to * "default". * * The events that trigger the above operations are: * 1. Exceeding the soft limit, which triggers MEMCG_LRU_HEAD; - * 2. The first attempt to reclaim an memcg below low, which triggers + * 2. The first attempt to reclaim a memcg below low, which triggers * MEMCG_LRU_TAIL; - * 3. The first attempt to reclaim an memcg below reclaimable size threshold, + * 3. The first attempt to reclaim a memcg below reclaimable size threshold, * which triggers MEMCG_LRU_TAIL; - * 4. The second attempt to reclaim an memcg below reclaimable size threshold, + * 4. The second attempt to reclaim a memcg below reclaimable size threshold, * which triggers MEMCG_LRU_YOUNG; - * 5. Attempting to reclaim an memcg below min, which triggers MEMCG_LRU_YOUNG; + * 5. Attempting to reclaim a memcg below min, which triggers MEMCG_LRU_YOUNG; * 6. Finishing the aging on the eviction path, which triggers MEMCG_LRU_YOUNG; - * 7. Offlining an memcg, which triggers MEMCG_LRU_OLD. + * 7. Offlining a memcg, which triggers MEMCG_LRU_OLD. * - * Note that memcg LRU only applies to global reclaim, and the round-robin - * incrementing of their max_seq counters ensures the eventual fairness to all - * eligible memcgs. For memcg reclaim, it still relies on mem_cgroup_iter(). + * Notes: + * 1. Memcg LRU only applies to global reclaim, and the round-robin incrementing + * of their max_seq counters ensures the eventual fairness to all eligible + * memcgs. For memcg reclaim, it still relies on mem_cgroup_iter(). + * 2. There are only two valid generations: old (seq) and young (seq+1). + * MEMCG_NR_GENS is set to three so that when reading the generation counter + * locklessly, a stale value (seq-1) does not wraparound to young. */ -#define MEMCG_NR_GENS 2 +#define MEMCG_NR_GENS 3 #define MEMCG_NR_BINS 8 struct lru_gen_memcg { diff --git a/mm/vmscan.c b/mm/vmscan.c index a4f599ea3e06..cd15648ee786 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -4790,6 +4790,9 @@ static void lru_gen_rotate_memcg(struct lruvec *lruvec, int op) else VM_WARN_ON_ONCE(true); + WRITE_ONCE(lruvec->lrugen.seg, seg); + WRITE_ONCE(lruvec->lrugen.gen, new); + hlist_nulls_del_rcu(&lruvec->lrugen.list); if (op == MEMCG_LRU_HEAD || op == MEMCG_LRU_OLD) @@ -4800,9 +4803,6 @@ static void lru_gen_rotate_memcg(struct lruvec *lruvec, int op) pgdat->memcg_lru.nr_memcgs[old]--; pgdat->memcg_lru.nr_memcgs[new]++; - lruvec->lrugen.gen = new; - WRITE_ONCE(lruvec->lrugen.seg, seg); - if (!pgdat->memcg_lru.nr_memcgs[old] && old == get_memcg_gen(pgdat->memcg_lru.seq)) WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1); @@ -4825,11 +4825,11 @@ void lru_gen_online_memcg(struct mem_cgroup *memcg) gen = get_memcg_gen(pgdat->memcg_lru.seq); + lruvec->lrugen.gen = gen; + hlist_nulls_add_tail_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[gen][bin]); pgdat->memcg_lru.nr_memcgs[gen]++; - lruvec->lrugen.gen = gen; - spin_unlock_irq(&pgdat->memcg_lru.lock); } } @@ -5328,7 +5328,7 @@ static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, bool DEFINE_MAX_SEQ(lruvec); if (mem_cgroup_below_min(sc->target_mem_cgroup, memcg)) - return 0; + return -1; if (!should_run_aging(lruvec, max_seq, sc, can_swap, &nr_to_scan)) return nr_to_scan; @@ -5403,7 +5403,7 @@ static bool try_to_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) cond_resched(); } - /* whether try_to_inc_max_seq() was successful */ + /* whether this lruvec should be rotated */ return nr_to_scan < 0; } @@ -5457,13 +5457,13 @@ static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc) struct lruvec *lruvec; struct lru_gen_folio *lrugen; struct mem_cgroup *memcg; - const struct hlist_nulls_node *pos; + struct hlist_nulls_node *pos; + gen = get_memcg_gen(READ_ONCE(pgdat->memcg_lru.seq)); bin = first_bin = get_random_u32_below(MEMCG_NR_BINS); restart: op = 0; memcg = NULL; - gen = get_memcg_gen(READ_ONCE(pgdat->memcg_lru.seq)); rcu_read_lock(); @@ -5474,6 +5474,10 @@ static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc) } mem_cgroup_put(memcg); + memcg = NULL; + + if (gen != READ_ONCE(lrugen->gen)) + continue; lruvec = container_of(lrugen, struct lruvec, lrugen); memcg = lruvec_memcg(lruvec); @@ -5558,16 +5562,14 @@ static void set_initial_priority(struct pglist_data *pgdat, struct scan_control if (sc->priority != DEF_PRIORITY || sc->nr_to_reclaim < MIN_LRU_BATCH) return; /* - * Determine the initial priority based on ((total / MEMCG_NR_GENS) >> - * priority) * reclaimed_to_scanned_ratio = nr_to_reclaim, where the - * estimated reclaimed_to_scanned_ratio = inactive / total. + * Determine the initial priority based on + * (total >> priority) * reclaimed_to_scanned_ratio = nr_to_reclaim, + * where reclaimed_to_scanned_ratio = inactive / total. */ reclaimable = node_page_state(pgdat, NR_INACTIVE_FILE); if (get_swappiness(lruvec, sc)) reclaimable += node_page_state(pgdat, NR_INACTIVE_ANON); - reclaimable /= MEMCG_NR_GENS; - /* round down reclaimable and round up sc->nr_to_reclaim */ priority = fls_long(reclaimable) - 1 - fls_long(sc->nr_to_reclaim - 1); From a107d6a132cbb596afa648f588f0078713ad46e9 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Thu, 7 Dec 2023 23:14:07 -0700 Subject: [PATCH 1161/1397] mm/mglru: reclaim offlined memcgs harder commit 4376807bf2d5371c3e00080c972be568c3f8a7d1 upstream. In the effort to reduce zombie memcgs [1], it was discovered that the memcg LRU doesn't apply enough pressure on offlined memcgs. Specifically, instead of rotating them to the tail of the current generation (MEMCG_LRU_TAIL) for a second attempt, it moves them to the next generation (MEMCG_LRU_YOUNG) after the first attempt. Not applying enough pressure on offlined memcgs can cause them to build up, and this can be particularly harmful to memory-constrained systems. On Pixel 8 Pro, launching apps for 50 cycles: Before After Change Zombie memcgs 45 35 -22% [1] https://lore.kernel.org/CABdmKX2M6koq4Q0Cmp_-=wbP0Qa190HdEGGaHfxNS05gAkUtPA@mail.gmail.com/ Link: https://lkml.kernel.org/r/20231208061407.2125867-4-yuzhao@google.com Fixes: e4dde56cd208 ("mm: multi-gen LRU: per-node lru_gen_folio lists") Signed-off-by: Yu Zhao Reported-by: T.J. Mercier Tested-by: T.J. Mercier Cc: Charan Teja Kalla Cc: Hillf Danton Cc: Jaroslav Pulchart Cc: Kairui Song Cc: Kalesh Singh Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- include/linux/mmzone.h | 8 ++++---- mm/vmscan.c | 24 ++++++++++++++++-------- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index e57d93c51abe..b6b688b229ea 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -519,10 +519,10 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw); * 1. Exceeding the soft limit, which triggers MEMCG_LRU_HEAD; * 2. The first attempt to reclaim a memcg below low, which triggers * MEMCG_LRU_TAIL; - * 3. The first attempt to reclaim a memcg below reclaimable size threshold, - * which triggers MEMCG_LRU_TAIL; - * 4. The second attempt to reclaim a memcg below reclaimable size threshold, - * which triggers MEMCG_LRU_YOUNG; + * 3. The first attempt to reclaim a memcg offlined or below reclaimable size + * threshold, which triggers MEMCG_LRU_TAIL; + * 4. The second attempt to reclaim a memcg offlined or below reclaimable size + * threshold, which triggers MEMCG_LRU_YOUNG; * 5. Attempting to reclaim a memcg below min, which triggers MEMCG_LRU_YOUNG; * 6. Finishing the aging on the eviction path, which triggers MEMCG_LRU_YOUNG; * 7. Offlining a memcg, which triggers MEMCG_LRU_OLD. diff --git a/mm/vmscan.c b/mm/vmscan.c index cd15648ee786..dcc264d3c92f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -5291,7 +5291,12 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, } /* try to scrape all its memory if this memcg was deleted */ - *nr_to_scan = mem_cgroup_online(memcg) ? (total >> sc->priority) : total; + if (!mem_cgroup_online(memcg)) { + *nr_to_scan = total; + return false; + } + + *nr_to_scan = total >> sc->priority; /* * The aging tries to be lazy to reduce the overhead, while the eviction @@ -5412,14 +5417,9 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc) bool success; unsigned long scanned = sc->nr_scanned; unsigned long reclaimed = sc->nr_reclaimed; - int seg = lru_gen_memcg_seg(lruvec); struct mem_cgroup *memcg = lruvec_memcg(lruvec); struct pglist_data *pgdat = lruvec_pgdat(lruvec); - /* see the comment on MEMCG_NR_GENS */ - if (!lruvec_is_sizable(lruvec, sc)) - return seg != MEMCG_LRU_TAIL ? MEMCG_LRU_TAIL : MEMCG_LRU_YOUNG; - mem_cgroup_calculate_protection(NULL, memcg); if (mem_cgroup_below_min(NULL, memcg)) @@ -5427,7 +5427,7 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc) if (mem_cgroup_below_low(NULL, memcg)) { /* see the comment on MEMCG_NR_GENS */ - if (seg != MEMCG_LRU_TAIL) + if (lru_gen_memcg_seg(lruvec) != MEMCG_LRU_TAIL) return MEMCG_LRU_TAIL; memcg_memory_event(memcg, MEMCG_LOW); @@ -5443,7 +5443,15 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc) flush_reclaim_state(sc); - return success ? MEMCG_LRU_YOUNG : 0; + if (success && mem_cgroup_online(memcg)) + return MEMCG_LRU_YOUNG; + + if (!success && lruvec_is_sizable(lruvec, sc)) + return 0; + + /* one retry if offlined or too small */ + return lru_gen_memcg_seg(lruvec) != MEMCG_LRU_TAIL ? + MEMCG_LRU_TAIL : MEMCG_LRU_YOUNG; } #ifdef CONFIG_MEMCG From 7a4ae7acd20805940307be98b304871a43288a01 Mon Sep 17 00:00:00 2001 From: David Stevens Date: Tue, 18 Apr 2023 17:40:31 +0900 Subject: [PATCH 1162/1397] mm/shmem: fix race in shmem_undo_range w/THP commit 55ac8bbe358bdd2f3c044c12f249fd22d48fe015 upstream. Split folios during the second loop of shmem_undo_range. It's not sufficient to only split folios when dealing with partial pages, since it's possible for a THP to be faulted in after that point. Calling truncate_inode_folio in that situation can result in throwing away data outside of the range being targeted. [akpm@linux-foundation.org: tidy up comment layout] Link: https://lkml.kernel.org/r/20230418084031.3439795-1-stevensd@google.com Fixes: b9a8a4195c7d ("truncate,shmem: Handle truncates that split large folios") Signed-off-by: David Stevens Cc: Matthew Wilcox (Oracle) Cc: Suleiman Souhlal Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/shmem.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/mm/shmem.c b/mm/shmem.c index 69595d341882..e826be732b9b 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1098,7 +1098,24 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, } VM_BUG_ON_FOLIO(folio_test_writeback(folio), folio); - truncate_inode_folio(mapping, folio); + + if (!folio_test_large(folio)) { + truncate_inode_folio(mapping, folio); + } else if (truncate_inode_partial_folio(folio, lstart, lend)) { + /* + * If we split a page, reset the loop so + * that we pick up the new sub pages. + * Otherwise the THP was entirely + * dropped or the target range was + * zeroed, so just continue the loop as + * is. + */ + if (!folio_test_large(folio)) { + folio_unlock(folio); + index = start; + break; + } + } } folio_unlock(folio); } From 37b561d55936d0fcebd199d22180e595bb72b784 Mon Sep 17 00:00:00 2001 From: Ignat Korchagin Date: Wed, 29 Nov 2023 22:04:09 +0000 Subject: [PATCH 1163/1397] kexec: drop dependency on ARCH_SUPPORTS_KEXEC from CRASH_DUMP commit c41bd2514184d75db087fe4c1221237fb7922875 upstream. In commit f8ff23429c62 ("kernel/Kconfig.kexec: drop select of KEXEC for CRASH_DUMP") we tried to fix a config regression, where CONFIG_CRASH_DUMP required CONFIG_KEXEC. However, it was not enough at least for arm64 platforms. While further testing the patch with our arm64 config I noticed that CONFIG_CRASH_DUMP is unavailable in menuconfig. This is because CONFIG_CRASH_DUMP still depends on the new CONFIG_ARCH_SUPPORTS_KEXEC introduced in commit 91506f7e5d21 ("arm64/kexec: refactor for kernel/Kconfig.kexec") and on arm64 CONFIG_ARCH_SUPPORTS_KEXEC requires CONFIG_PM_SLEEP_SMP=y, which in turn requires either CONFIG_SUSPEND=y or CONFIG_HIBERNATION=y neither of which are set in our config. Given that we already established that CONFIG_KEXEC (which is a switch for kexec system call itself) is not required for CONFIG_CRASH_DUMP drop CONFIG_ARCH_SUPPORTS_KEXEC dependency as well. The arm64 kernel builds just fine with CONFIG_CRASH_DUMP=y and with both CONFIG_KEXEC=n and CONFIG_KEXEC_FILE=n after f8ff23429c62 ("kernel/Kconfig.kexec: drop select of KEXEC for CRASH_DUMP") and this patch are applied given that the necessary shared bits are included via CONFIG_KEXEC_CORE dependency. [bhe@redhat.com: don't export some symbols when CONFIG_MMU=n] Link: https://lkml.kernel.org/r/ZW03ODUKGGhP1ZGU@MiWiFi-R3L-srv [bhe@redhat.com: riscv, kexec: fix dependency of two items] Link: https://lkml.kernel.org/r/ZW04G/SKnhbE5mnX@MiWiFi-R3L-srv Link: https://lkml.kernel.org/r/20231129220409.55006-1-ignat@cloudflare.com Fixes: 91506f7e5d21 ("arm64/kexec: refactor for kernel/Kconfig.kexec") Signed-off-by: Ignat Korchagin Signed-off-by: Baoquan He Acked-by: Baoquan He Cc: Alexander Gordeev Cc: # 6.6+: f8ff234: kernel/Kconfig.kexec: drop select of KEXEC for CRASH_DUMP Cc: # 6.6+ Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- arch/riscv/Kconfig | 4 ++-- arch/riscv/kernel/crash_core.c | 4 +++- kernel/Kconfig.kexec | 1 - 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 9c48fecc6719..6688cbbed0b4 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -669,7 +669,7 @@ config RISCV_BOOT_SPINWAIT If unsure what to do here, say N. config ARCH_SUPPORTS_KEXEC - def_bool MMU + def_bool y config ARCH_SELECTS_KEXEC def_bool y @@ -677,7 +677,7 @@ config ARCH_SELECTS_KEXEC select HOTPLUG_CPU if SMP config ARCH_SUPPORTS_KEXEC_FILE - def_bool 64BIT && MMU + def_bool 64BIT config ARCH_SELECTS_KEXEC_FILE def_bool y diff --git a/arch/riscv/kernel/crash_core.c b/arch/riscv/kernel/crash_core.c index 55f1d7856b54..8706736fd4e2 100644 --- a/arch/riscv/kernel/crash_core.c +++ b/arch/riscv/kernel/crash_core.c @@ -5,17 +5,19 @@ void arch_crash_save_vmcoreinfo(void) { - VMCOREINFO_NUMBER(VA_BITS); VMCOREINFO_NUMBER(phys_ram_base); vmcoreinfo_append_str("NUMBER(PAGE_OFFSET)=0x%lx\n", PAGE_OFFSET); vmcoreinfo_append_str("NUMBER(VMALLOC_START)=0x%lx\n", VMALLOC_START); vmcoreinfo_append_str("NUMBER(VMALLOC_END)=0x%lx\n", VMALLOC_END); +#ifdef CONFIG_MMU + VMCOREINFO_NUMBER(VA_BITS); vmcoreinfo_append_str("NUMBER(VMEMMAP_START)=0x%lx\n", VMEMMAP_START); vmcoreinfo_append_str("NUMBER(VMEMMAP_END)=0x%lx\n", VMEMMAP_END); #ifdef CONFIG_64BIT vmcoreinfo_append_str("NUMBER(MODULES_VADDR)=0x%lx\n", MODULES_VADDR); vmcoreinfo_append_str("NUMBER(MODULES_END)=0x%lx\n", MODULES_END); +#endif #endif vmcoreinfo_append_str("NUMBER(KERNEL_LINK_ADDR)=0x%lx\n", KERNEL_LINK_ADDR); vmcoreinfo_append_str("NUMBER(va_kernel_pa_offset)=0x%lx\n", diff --git a/kernel/Kconfig.kexec b/kernel/Kconfig.kexec index 143f4d8eab7c..f9619ac6b71d 100644 --- a/kernel/Kconfig.kexec +++ b/kernel/Kconfig.kexec @@ -94,7 +94,6 @@ config KEXEC_JUMP config CRASH_DUMP bool "kernel crash dumps" depends on ARCH_SUPPORTS_CRASH_DUMP - depends on ARCH_SUPPORTS_KEXEC select CRASH_CORE select KEXEC_CORE help From 14570dfa170ede01eacedc60f717df121d8b9bbd Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Fri, 1 Dec 2023 13:00:09 -0800 Subject: [PATCH 1164/1397] btrfs: free qgroup reserve when ORDERED_IOERR is set commit f63e1164b90b385cd832ff0fdfcfa76c3cc15436 upstream. An ordered extent completing is a critical moment in qgroup reserve handling, as the ownership of the reservation is handed off from the ordered extent to the delayed ref. In the happy path we release (unlock) but do not free (decrement counter) the reservation, and the delayed ref drives the free. However, on an error, we don't create a delayed ref, since there is no ref to add. Therefore, free on the error path. CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Qu Wenruo Signed-off-by: Boris Burkov Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ordered-data.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 345c449d588c..0fa0bc13e8ac 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -603,7 +603,9 @@ void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode, release = entry->disk_num_bytes; else release = entry->num_bytes; - btrfs_delalloc_release_metadata(btrfs_inode, release, false); + btrfs_delalloc_release_metadata(btrfs_inode, release, + test_bit(BTRFS_ORDERED_IOERR, + &entry->flags)); } percpu_counter_add_batch(&fs_info->ordered_bytes, -entry->num_bytes, From 1823491513e3f72beb85f7fba2478ef3562615d8 Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Fri, 1 Dec 2023 13:00:10 -0800 Subject: [PATCH 1165/1397] btrfs: fix qgroup_free_reserved_data int overflow commit 9e65bfca24cf1d77e4a5c7a170db5867377b3fe7 upstream. The reserved data counter and input parameter is a u64, but we inadvertently accumulate it in an int. Overflowing that int results in freeing the wrong amount of data and breaking reserve accounting. Unfortunately, this overflow rot spreads from there, as the qgroup release/free functions rely on returning an int to take advantage of negative values for error codes. Therefore, the full fix is to return the "released" or "freed" amount by a u64 argument and to return 0 or negative error code via the return value. Most of the call sites simply ignore the return value, though some of them handle the error and count the returned bytes. Change all of them accordingly. CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Qu Wenruo Signed-off-by: Boris Burkov Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/delalloc-space.c | 2 +- fs/btrfs/file.c | 2 +- fs/btrfs/inode.c | 16 ++++++++-------- fs/btrfs/ordered-data.c | 7 ++++--- fs/btrfs/qgroup.c | 25 +++++++++++++++---------- fs/btrfs/qgroup.h | 4 ++-- 6 files changed, 31 insertions(+), 25 deletions(-) diff --git a/fs/btrfs/delalloc-space.c b/fs/btrfs/delalloc-space.c index 0d105ed1b8de..eef341bbcc60 100644 --- a/fs/btrfs/delalloc-space.c +++ b/fs/btrfs/delalloc-space.c @@ -199,7 +199,7 @@ void btrfs_free_reserved_data_space(struct btrfs_inode *inode, start = round_down(start, fs_info->sectorsize); btrfs_free_reserved_data_space_noquota(fs_info, len); - btrfs_qgroup_free_data(inode, reserved, start, len); + btrfs_qgroup_free_data(inode, reserved, start, len, NULL); } /* diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 23a145ca9457..c997b790568f 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -3187,7 +3187,7 @@ static long btrfs_fallocate(struct file *file, int mode, qgroup_reserved -= range->len; } else if (qgroup_reserved > 0) { btrfs_qgroup_free_data(BTRFS_I(inode), data_reserved, - range->start, range->len); + range->start, range->len, NULL); qgroup_reserved -= range->len; } list_del(&range->list); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c92c589b454d..7bf5c2c1a54d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -687,7 +687,7 @@ static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 size, * And at reserve time, it's always aligned to page size, so * just free one page here. */ - btrfs_qgroup_free_data(inode, NULL, 0, PAGE_SIZE); + btrfs_qgroup_free_data(inode, NULL, 0, PAGE_SIZE, NULL); btrfs_free_path(path); btrfs_end_transaction(trans); return ret; @@ -5129,7 +5129,7 @@ static void evict_inode_truncate_pages(struct inode *inode) */ if (state_flags & EXTENT_DELALLOC) btrfs_qgroup_free_data(BTRFS_I(inode), NULL, start, - end - start + 1); + end - start + 1, NULL); clear_extent_bit(io_tree, start, end, EXTENT_CLEAR_ALL_BITS | EXTENT_DO_ACCOUNTING, @@ -8051,7 +8051,7 @@ static void btrfs_invalidate_folio(struct folio *folio, size_t offset, * reserved data space. * Since the IO will never happen for this page. */ - btrfs_qgroup_free_data(inode, NULL, cur, range_end + 1 - cur); + btrfs_qgroup_free_data(inode, NULL, cur, range_end + 1 - cur, NULL); if (!inode_evicting) { clear_extent_bit(tree, cur, range_end, EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_UPTODATE | @@ -9481,7 +9481,7 @@ static struct btrfs_trans_handle *insert_prealloc_file_extent( struct btrfs_path *path; u64 start = ins->objectid; u64 len = ins->offset; - int qgroup_released; + u64 qgroup_released = 0; int ret; memset(&stack_fi, 0, sizeof(stack_fi)); @@ -9494,9 +9494,9 @@ static struct btrfs_trans_handle *insert_prealloc_file_extent( btrfs_set_stack_file_extent_compression(&stack_fi, BTRFS_COMPRESS_NONE); /* Encryption and other encoding is reserved and all 0 */ - qgroup_released = btrfs_qgroup_release_data(inode, file_offset, len); - if (qgroup_released < 0) - return ERR_PTR(qgroup_released); + ret = btrfs_qgroup_release_data(inode, file_offset, len, &qgroup_released); + if (ret < 0) + return ERR_PTR(ret); if (trans) { ret = insert_reserved_file_extent(trans, inode, @@ -10391,7 +10391,7 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from, btrfs_delalloc_release_metadata(inode, disk_num_bytes, ret < 0); out_qgroup_free_data: if (ret < 0) - btrfs_qgroup_free_data(inode, data_reserved, start, num_bytes); + btrfs_qgroup_free_data(inode, data_reserved, start, num_bytes, NULL); out_free_data_space: /* * If btrfs_reserve_extent() succeeded, then we already decremented diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 0fa0bc13e8ac..2b8ff8b53af0 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -153,11 +153,12 @@ static struct btrfs_ordered_extent *alloc_ordered_extent( { struct btrfs_ordered_extent *entry; int ret; + u64 qgroup_rsv = 0; if (flags & ((1 << BTRFS_ORDERED_NOCOW) | (1 << BTRFS_ORDERED_PREALLOC))) { /* For nocow write, we can release the qgroup rsv right now */ - ret = btrfs_qgroup_free_data(inode, NULL, file_offset, num_bytes); + ret = btrfs_qgroup_free_data(inode, NULL, file_offset, num_bytes, &qgroup_rsv); if (ret < 0) return ERR_PTR(ret); } else { @@ -165,7 +166,7 @@ static struct btrfs_ordered_extent *alloc_ordered_extent( * The ordered extent has reserved qgroup space, release now * and pass the reserved number for qgroup_record to free. */ - ret = btrfs_qgroup_release_data(inode, file_offset, num_bytes); + ret = btrfs_qgroup_release_data(inode, file_offset, num_bytes, &qgroup_rsv); if (ret < 0) return ERR_PTR(ret); } @@ -183,7 +184,7 @@ static struct btrfs_ordered_extent *alloc_ordered_extent( entry->inode = igrab(&inode->vfs_inode); entry->compress_type = compress_type; entry->truncated_len = (u64)-1; - entry->qgroup_rsv = ret; + entry->qgroup_rsv = qgroup_rsv; entry->flags = flags; refcount_set(&entry->refs, 1); init_waitqueue_head(&entry->wait); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index bdaebb9fc689..7c9249438154 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -3855,13 +3855,14 @@ int btrfs_qgroup_reserve_data(struct btrfs_inode *inode, /* Free ranges specified by @reserved, normally in error path */ static int qgroup_free_reserved_data(struct btrfs_inode *inode, - struct extent_changeset *reserved, u64 start, u64 len) + struct extent_changeset *reserved, + u64 start, u64 len, u64 *freed_ret) { struct btrfs_root *root = inode->root; struct ulist_node *unode; struct ulist_iterator uiter; struct extent_changeset changeset; - int freed = 0; + u64 freed = 0; int ret; extent_changeset_init(&changeset); @@ -3902,7 +3903,9 @@ static int qgroup_free_reserved_data(struct btrfs_inode *inode, } btrfs_qgroup_free_refroot(root->fs_info, root->root_key.objectid, freed, BTRFS_QGROUP_RSV_DATA); - ret = freed; + if (freed_ret) + *freed_ret = freed; + ret = 0; out: extent_changeset_release(&changeset); return ret; @@ -3910,7 +3913,7 @@ static int qgroup_free_reserved_data(struct btrfs_inode *inode, static int __btrfs_qgroup_release_data(struct btrfs_inode *inode, struct extent_changeset *reserved, u64 start, u64 len, - int free) + u64 *released, int free) { struct extent_changeset changeset; int trace_op = QGROUP_RELEASE; @@ -3922,7 +3925,7 @@ static int __btrfs_qgroup_release_data(struct btrfs_inode *inode, /* In release case, we shouldn't have @reserved */ WARN_ON(!free && reserved); if (free && reserved) - return qgroup_free_reserved_data(inode, reserved, start, len); + return qgroup_free_reserved_data(inode, reserved, start, len, released); extent_changeset_init(&changeset); ret = clear_record_extent_bits(&inode->io_tree, start, start + len -1, EXTENT_QGROUP_RESERVED, &changeset); @@ -3937,7 +3940,8 @@ static int __btrfs_qgroup_release_data(struct btrfs_inode *inode, btrfs_qgroup_free_refroot(inode->root->fs_info, inode->root->root_key.objectid, changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA); - ret = changeset.bytes_changed; + if (released) + *released = changeset.bytes_changed; out: extent_changeset_release(&changeset); return ret; @@ -3956,9 +3960,10 @@ static int __btrfs_qgroup_release_data(struct btrfs_inode *inode, * NOTE: This function may sleep for memory allocation. */ int btrfs_qgroup_free_data(struct btrfs_inode *inode, - struct extent_changeset *reserved, u64 start, u64 len) + struct extent_changeset *reserved, + u64 start, u64 len, u64 *freed) { - return __btrfs_qgroup_release_data(inode, reserved, start, len, 1); + return __btrfs_qgroup_release_data(inode, reserved, start, len, freed, 1); } /* @@ -3976,9 +3981,9 @@ int btrfs_qgroup_free_data(struct btrfs_inode *inode, * * NOTE: This function may sleep for memory allocation. */ -int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len) +int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len, u64 *released) { - return __btrfs_qgroup_release_data(inode, NULL, start, len, 0); + return __btrfs_qgroup_release_data(inode, NULL, start, len, released, 0); } static void add_root_meta_rsv(struct btrfs_root *root, int num_bytes, diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h index 7bffa10589d6..104c9bd3c337 100644 --- a/fs/btrfs/qgroup.h +++ b/fs/btrfs/qgroup.h @@ -363,10 +363,10 @@ int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid, /* New io_tree based accurate qgroup reserve API */ int btrfs_qgroup_reserve_data(struct btrfs_inode *inode, struct extent_changeset **reserved, u64 start, u64 len); -int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len); +int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len, u64 *released); int btrfs_qgroup_free_data(struct btrfs_inode *inode, struct extent_changeset *reserved, u64 start, - u64 len); + u64 len, u64 *freed); int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, enum btrfs_qgroup_rsv_type type, bool enforce); int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, From d3cf024353e213989a48399ed9ed24775e6472f0 Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Fri, 1 Dec 2023 13:00:12 -0800 Subject: [PATCH 1166/1397] btrfs: don't clear qgroup reserved bit in release_folio commit a86805504b88f636a6458520d85afdf0634e3c6b upstream. The EXTENT_QGROUP_RESERVED bit is used to "lock" regions of the file for duplicate reservations. That is two writes to that range in one transaction shouldn't create two reservations, as the reservation will only be freed once when the write finally goes down. Therefore, it is never OK to clear that bit without freeing the associated qgroup reserve. At this point, we don't want to be freeing the reserve, so mask off the bit. CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Qu Wenruo Signed-off-by: Boris Burkov Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent_io.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 1530df88370c..03c10e0ba0e2 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2303,7 +2303,8 @@ static int try_release_extent_state(struct extent_io_tree *tree, ret = 0; } else { u32 clear_bits = ~(EXTENT_LOCKED | EXTENT_NODATASUM | - EXTENT_DELALLOC_NEW | EXTENT_CTLBITS); + EXTENT_DELALLOC_NEW | EXTENT_CTLBITS | + EXTENT_QGROUP_RESERVED); /* * At this point we can safely clear everything except the From d50670681d8a14980a54238bad00a07fee122b5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 8 Dec 2023 13:43:09 +0100 Subject: [PATCH 1167/1397] drm/amdgpu: fix tear down order in amdgpu_vm_pt_free MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ceb9a321e7639700844aa3bf234a4e0884f13b77 upstream. When freeing PD/PT with shadows it can happen that the shadow destruction races with detaching the PD/PT from the VM causing a NULL pointer dereference in the invalidation code. Fix this by detaching the the PD/PT from the VM first and then freeing the shadow instead. Signed-off-by: Christian König Fixes: https://gitlab.freedesktop.org/drm/amd/-/issues/2867 Cc: Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index 96d601e209b8..0d51222f6f8e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -642,13 +642,14 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry) if (!entry->bo) return; + + entry->bo->vm_bo = NULL; shadow = amdgpu_bo_shadowed(entry->bo); if (shadow) { ttm_bo_set_bulk_move(&shadow->tbo, NULL); amdgpu_bo_unref(&shadow); } ttm_bo_set_bulk_move(&entry->bo->tbo, NULL); - entry->bo->vm_bo = NULL; spin_lock(&entry->vm->status_lock); list_del(&entry->vm_status); From a511e851d49e13835f0da022d48e873c8cc15827 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 7 Dec 2023 11:38:21 +0200 Subject: [PATCH 1168/1397] drm/edid: also call add modes in EDID connector update fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 759f14e20891de72e676d9d738eb2c573aa15f52 upstream. When the separate add modes call was added back in commit c533b5167c7e ("drm/edid: add separate drm_edid_connector_add_modes()"), it failed to address drm_edid_override_connector_update(). Also call add modes there. Reported-by: bbaa Closes: https://lore.kernel.org/r/930E9B4C7D91FDFF+29b34d89-8658-4910-966a-c772f320ea03@bbaa.fun Fixes: c533b5167c7e ("drm/edid: add separate drm_edid_connector_add_modes()") Cc: # v6.3+ Signed-off-by: Jani Nikula Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231207093821.2654267-1-jani.nikula@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_edid.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index b3e1b288fc0c..a491280ca48c 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -2308,7 +2308,8 @@ int drm_edid_override_connector_update(struct drm_connector *connector) override = drm_edid_override_get(connector); if (override) { - num_modes = drm_edid_connector_update(connector, override); + if (drm_edid_connector_update(connector, override) == 0) + num_modes = drm_edid_connector_add_modes(connector); drm_edid_free(override); From c7f6e836e675683e704a40e68eded505bdc30428 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 6 Dec 2023 12:08:26 -0600 Subject: [PATCH 1169/1397] drm/amd/display: Restore guard against default backlight value < 1 nit commit b96ab339ee50470d13a1faa6ad94d2218a7cd49f upstream. Mark reports that brightness is not restored after Xorg dpms screen blank. This behavior was introduced by commit d9e865826c20 ("drm/amd/display: Simplify brightness initialization") which dropped the cached backlight value in display code, but also removed code for when the default value read back was less than 1 nit. Restore this code so that the backlight brightness is restored to the correct default value in this circumstance. Reported-by: Mark Herbert Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3031 Cc: stable@vger.kernel.org Cc: Camille Cho Cc: Krunoslav Kovac Cc: Hamza Mahfooz Fixes: d9e865826c20 ("drm/amd/display: Simplify brightness initialization") Acked-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- .../amd/display/dc/link/protocols/link_edp_panel_control.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index fe74d4252a51..13c3d7ff6139 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -280,8 +280,8 @@ bool set_default_brightness_aux(struct dc_link *link) if (link && link->dpcd_sink_ext_caps.bits.oled == 1) { if (!read_default_bl_aux(link, &default_backlight)) default_backlight = 150000; - // if > 5000, it might be wrong readback - if (default_backlight > 5000000) + // if < 1 nits or > 5000, it might be wrong readback + if (default_backlight < 1000 || default_backlight > 5000000) default_backlight = 150000; return edp_set_backlight_level_nits(link, true, From a8f922ad2f76a53383982132ee44d123b72533c5 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 19 Jun 2023 15:04:24 -0500 Subject: [PATCH 1170/1397] drm/amd/display: Disable PSR-SU on Parade 0803 TCON again commit e7ab758741672acb21c5d841a9f0309d30e48a06 upstream. When screen brightness is rapidly changed and PSR-SU is enabled the display hangs on panels with this TCON even on the latest DCN 3.1.4 microcode (0x8002a81 at this time). This was disabled previously as commit 072030b17830 ("drm/amd: Disable PSR-SU on Parade 0803 TCON") but reverted as commit 1e66a17ce546 ("Revert "drm/amd: Disable PSR-SU on Parade 0803 TCON"") in favor of testing for a new enough microcode (commit cd2e31a9ab93 ("drm/amd/display: Set minimum requirement for using PSR-SU on Phoenix")). As hangs are still happening specifically with this TCON, disable PSR-SU again for it until it can be root caused. Cc: stable@vger.kernel.org Cc: aaron.ma@canonical.com Cc: binli@gnome.org Cc: Marc Rossi Cc: Hamza Mahfooz Signed-off-by: Mario Limonciello Link: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/2046131 Acked-by: Alex Deucher Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/modules/power/power_helpers.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index 73a2b37fbbd7..2b3d5183818a 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -839,6 +839,8 @@ bool is_psr_su_specific_panel(struct dc_link *link) ((dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x08) || (dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x07))) isPSRSUSupported = false; + else if (dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x03) + isPSRSUSupported = false; else if (dpcd_caps->psr_info.force_psrsu_cap == 0x1) isPSRSUSupported = true; } From b6295a167fa55fff2c0ac7ed534137342a422eb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 4 Dec 2023 22:24:43 +0200 Subject: [PATCH 1171/1397] drm/i915: Fix ADL+ tiled plane stride when the POT stride is smaller than the original MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 324b70e997aab0a7deab8cb90711faccda4e98c8 upstream. plane_view_scanout_stride() currently assumes that we had to pad the mapping stride with dummy pages in order to align it. But that is not the case if the original fb stride exceeds the aligned stride used to populate the remapped view, which is calculated from the user specified framebuffer width rather than the user specified framebuffer stride. Ignore the original fb stride in this case and just stick to the POT aligned stride. Getting this wrong will cause the plane to fetch the wrong data, and can lead to fault errors if the page tables at the bogus location aren't even populated. TODO: figure out if this is OK for CCS, or if we should instead increase the width of the view to cover the entire user specified fb stride instead... Cc: Imre Deak Cc: Juha-Pekka Heikkila Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231204202443.31247-1-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak Reviewed-by: Juha-Pekka Heikkila (cherry picked from commit 01a39f1c4f1220a4e6a25729fae87ff5794cbc52) Cc: stable@vger.kernel.org Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/display/intel_fb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index 446bbf7986b6..ca37f90eeafd 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -1370,7 +1370,8 @@ plane_view_scanout_stride(const struct intel_framebuffer *fb, int color_plane, struct drm_i915_private *i915 = to_i915(fb->base.dev); unsigned int stride_tiles; - if (IS_ALDERLAKE_P(i915) || DISPLAY_VER(i915) >= 14) + if ((IS_ALDERLAKE_P(i915) || DISPLAY_VER(i915) >= 14) && + src_stride_tiles < dst_stride_tiles) stride_tiles = src_stride_tiles; else stride_tiles = dst_stride_tiles; From a9d951b007904942e2519bc8ff7ba2d1b4121060 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 7 Dec 2023 21:34:34 +0200 Subject: [PATCH 1172/1397] drm/i915: Fix intel_atomic_setup_scalers() plane_state handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c3070f080f9ba18dea92eaa21730f7ab85b5c8f4 upstream. Since the plane_state variable is declared outside the scaler_users loop in intel_atomic_setup_scalers(), and it's never reset back to NULL inside the loop we may end up calling intel_atomic_setup_scaler() with a non-NULL plane state for the pipe scaling case. That is bad because intel_atomic_setup_scaler() determines whether we are doing plane scaling or pipe scaling based on plane_state!=NULL. The end result is that we may miscalculate the scaler mode for pipe scaling. The hardware becomes somewhat upset if we end up in this situation when scanning out a planar format on a SDR plane. We end up programming the pipe scaler into planar mode as well, and the result is a screenfull of garbage. Fix the situation by making sure we pass the correct plane_state==NULL when calculating the scaler mode for pipe scaling. Cc: stable@vger.kernel.org Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231207193441.20206-2-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit e81144106e21271c619f0c722a09e27ccb8c043d) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/display/skl_scaler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/skl_scaler.c b/drivers/gpu/drm/i915/display/skl_scaler.c index 1e7c97243fcf..8a934bada624 100644 --- a/drivers/gpu/drm/i915/display/skl_scaler.c +++ b/drivers/gpu/drm/i915/display/skl_scaler.c @@ -504,7 +504,6 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, { struct drm_plane *plane = NULL; struct intel_plane *intel_plane; - struct intel_plane_state *plane_state = NULL; struct intel_crtc_scaler_state *scaler_state = &crtc_state->scaler_state; struct drm_atomic_state *drm_state = crtc_state->uapi.state; @@ -536,6 +535,7 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, /* walkthrough scaler_users bits and start assigning scalers */ for (i = 0; i < sizeof(scaler_state->scaler_users) * 8; i++) { + struct intel_plane_state *plane_state = NULL; int *scaler_id; const char *name; int idx, ret; From 4029b025bedaee1cd819f4f9a8b2244266fd320b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 5 Dec 2023 20:03:08 +0200 Subject: [PATCH 1173/1397] drm/i915: Fix remapped stride with CCS on ADL+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 0ccd963fe555451b1f84e6d14d2b3ef03dd5c947 upstream. On ADL+ the hardware automagically calculates the CCS AUX surface stride from the main surface stride, so when remapping we can't really play a lot of tricks with the main surface stride, or else the AUX surface stride would get miscalculated and no longer match the actual data layout in memory. Supposedly we could remap in 256 main surface tile units (AUX page(4096)/cachline(64)*4(4x1 main surface tiles per AUX cacheline)=256 main surface tiles), but the extra complexity is probably not worth the hassle. So let's just make sure our mapping stride is calculated from the full framebuffer stride (instead of the framebuffer width). This way the stride we program into PLANE_STRIDE will be the original framebuffer stride, and thus there will be no change to the AUX stride/layout. Cc: stable@vger.kernel.org Cc: Imre Deak Cc: Juha-Pekka Heikkila Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231205180308.7505-1-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak (cherry picked from commit 2c12eb36f849256f5eb00ffaee9bf99396fd3814) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/display/intel_fb.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index ca37f90eeafd..689b7c16d300 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -1498,8 +1498,20 @@ static u32 calc_plane_remap_info(const struct intel_framebuffer *fb, int color_p size += remap_info->size; } else { - unsigned int dst_stride = plane_view_dst_stride_tiles(fb, color_plane, - remap_info->width); + unsigned int dst_stride; + + /* + * The hardware automagically calculates the CCS AUX surface + * stride from the main surface stride so can't really remap a + * smaller subset (unless we'd remap in whole AUX page units). + */ + if (intel_fb_needs_pot_stride_remap(fb) && + intel_fb_is_ccs_modifier(fb->base.modifier)) + dst_stride = remap_info->src_stride; + else + dst_stride = remap_info->width; + + dst_stride = plane_view_dst_stride_tiles(fb, color_plane, dst_stride); assign_chk_ovf(i915, remap_info->dst_stride, dst_stride); color_plane_info->mapping_stride = dst_stride * From 534733397da26de0303057ce0b93a22bda150365 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Mon, 11 Dec 2023 10:26:40 -0300 Subject: [PATCH 1174/1397] smb: client: fix OOB in receive_encrypted_standard() commit eec04ea119691e65227a97ce53c0da6b9b74b0b7 upstream. Fix potential OOB in receive_encrypted_standard() if server returned a large shdr->NextCommand that would end up writing off the end of @next_buffer. Fixes: b24df3e30cbf ("cifs: update receive_encrypted_standard to handle compounded responses") Cc: stable@vger.kernel.org Reported-by: Robert Morris Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/smb2ops.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 0ca6dcf20261..258a591f716a 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -4941,6 +4941,7 @@ receive_encrypted_standard(struct TCP_Server_Info *server, struct smb2_hdr *shdr; unsigned int pdu_length = server->pdu_size; unsigned int buf_size; + unsigned int next_cmd; struct mid_q_entry *mid_entry; int next_is_large; char *next_buffer = NULL; @@ -4969,14 +4970,15 @@ receive_encrypted_standard(struct TCP_Server_Info *server, next_is_large = server->large_buf; one_more: shdr = (struct smb2_hdr *)buf; - if (shdr->NextCommand) { + next_cmd = le32_to_cpu(shdr->NextCommand); + if (next_cmd) { + if (WARN_ON_ONCE(next_cmd > pdu_length)) + return -1; if (next_is_large) next_buffer = (char *)cifs_buf_get(); else next_buffer = (char *)cifs_small_buf_get(); - memcpy(next_buffer, - buf + le32_to_cpu(shdr->NextCommand), - pdu_length - le32_to_cpu(shdr->NextCommand)); + memcpy(next_buffer, buf + next_cmd, pdu_length - next_cmd); } mid_entry = smb2_find_mid(server, buf); @@ -5000,8 +5002,8 @@ receive_encrypted_standard(struct TCP_Server_Info *server, else ret = cifs_handle_standard(server, mid_entry); - if (ret == 0 && shdr->NextCommand) { - pdu_length -= le32_to_cpu(shdr->NextCommand); + if (ret == 0 && next_cmd) { + pdu_length -= next_cmd; server->large_buf = next_is_large; if (next_is_large) server->bigbuf = buf = next_buffer; From 17a0f64cc02d4972e21c733d9f21d1c512963afa Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Mon, 11 Dec 2023 10:26:41 -0300 Subject: [PATCH 1175/1397] smb: client: fix potential OOBs in smb2_parse_contexts() commit af1689a9b7701d9907dfc84d2a4b57c4bc907144 upstream. Validate offsets and lengths before dereferencing create contexts in smb2_parse_contexts(). This fixes following oops when accessing invalid create contexts from server: BUG: unable to handle page fault for address: ffff8881178d8cc3 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 4a01067 P4D 4a01067 PUD 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 3 PID: 1736 Comm: mount.cifs Not tainted 6.7.0-rc4 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.2-3-gd478f380-rebuilt.opensuse.org 04/01/2014 RIP: 0010:smb2_parse_contexts+0xa0/0x3a0 [cifs] Code: f8 10 75 13 48 b8 93 ad 25 50 9c b4 11 e7 49 39 06 0f 84 d2 00 00 00 8b 45 00 85 c0 74 61 41 29 c5 48 01 c5 41 83 fd 0f 76 55 <0f> b7 7d 04 0f b7 45 06 4c 8d 74 3d 00 66 83 f8 04 75 bc ba 04 00 RSP: 0018:ffffc900007939e0 EFLAGS: 00010216 RAX: ffffc90000793c78 RBX: ffff8880180cc000 RCX: ffffc90000793c90 RDX: ffffc90000793cc0 RSI: ffff8880178d8cc0 RDI: ffff8880180cc000 RBP: ffff8881178d8cbf R08: ffffc90000793c22 R09: 0000000000000000 R10: ffff8880180cc000 R11: 0000000000000024 R12: 0000000000000000 R13: 0000000000000020 R14: 0000000000000000 R15: ffffc90000793c22 FS: 00007f873753cbc0(0000) GS:ffff88806bc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff8881178d8cc3 CR3: 00000000181ca000 CR4: 0000000000750ef0 PKRU: 55555554 Call Trace: ? __die+0x23/0x70 ? page_fault_oops+0x181/0x480 ? search_module_extables+0x19/0x60 ? srso_alias_return_thunk+0x5/0xfbef5 ? exc_page_fault+0x1b6/0x1c0 ? asm_exc_page_fault+0x26/0x30 ? smb2_parse_contexts+0xa0/0x3a0 [cifs] SMB2_open+0x38d/0x5f0 [cifs] ? smb2_is_path_accessible+0x138/0x260 [cifs] smb2_is_path_accessible+0x138/0x260 [cifs] cifs_is_path_remote+0x8d/0x230 [cifs] cifs_mount+0x7e/0x350 [cifs] cifs_smb3_do_mount+0x128/0x780 [cifs] smb3_get_tree+0xd9/0x290 [cifs] vfs_get_tree+0x2c/0x100 ? capable+0x37/0x70 path_mount+0x2d7/0xb80 ? srso_alias_return_thunk+0x5/0xfbef5 ? _raw_spin_unlock_irqrestore+0x44/0x60 __x64_sys_mount+0x11a/0x150 do_syscall_64+0x47/0xf0 entry_SYSCALL_64_after_hwframe+0x6f/0x77 RIP: 0033:0x7f8737657b1e Reported-by: Robert Morris Cc: stable@vger.kernel.org Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/cached_dir.c | 17 +++++-- fs/smb/client/smb2pdu.c | 93 +++++++++++++++++++++++--------------- fs/smb/client/smb2proto.h | 12 +++-- 3 files changed, 75 insertions(+), 47 deletions(-) diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index 59f6b8e32cc9..d64a306a414b 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -291,16 +291,23 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, oparms.fid->mid = le64_to_cpu(o_rsp->hdr.MessageId); #endif /* CIFS_DEBUG2 */ - rc = -EINVAL; + if (o_rsp->OplockLevel != SMB2_OPLOCK_LEVEL_LEASE) { + spin_unlock(&cfids->cfid_list_lock); + rc = -EINVAL; + goto oshr_free; + } + + rc = smb2_parse_contexts(server, rsp_iov, + &oparms.fid->epoch, + oparms.fid->lease_key, + &oplock, NULL, NULL); + if (rc) { spin_unlock(&cfids->cfid_list_lock); goto oshr_free; } - smb2_parse_contexts(server, o_rsp, - &oparms.fid->epoch, - oparms.fid->lease_key, &oplock, - NULL, NULL); + rc = -EINVAL; if (!(oplock & SMB2_LEASE_READ_CACHING_HE)) { spin_unlock(&cfids->cfid_list_lock); goto oshr_free; diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index c75a80bb6d9e..2df118540e89 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -2141,17 +2141,18 @@ parse_posix_ctxt(struct create_context *cc, struct smb2_file_all_info *info, posix->nlink, posix->mode, posix->reparse_tag); } -void -smb2_parse_contexts(struct TCP_Server_Info *server, - struct smb2_create_rsp *rsp, - unsigned int *epoch, char *lease_key, __u8 *oplock, - struct smb2_file_all_info *buf, - struct create_posix_rsp *posix) +int smb2_parse_contexts(struct TCP_Server_Info *server, + struct kvec *rsp_iov, + unsigned int *epoch, + char *lease_key, __u8 *oplock, + struct smb2_file_all_info *buf, + struct create_posix_rsp *posix) { - char *data_offset; + struct smb2_create_rsp *rsp = rsp_iov->iov_base; struct create_context *cc; - unsigned int next; - unsigned int remaining; + size_t rem, off, len; + size_t doff, dlen; + size_t noff, nlen; char *name; static const char smb3_create_tag_posix[] = { 0x93, 0xAD, 0x25, 0x50, 0x9C, @@ -2160,45 +2161,63 @@ smb2_parse_contexts(struct TCP_Server_Info *server, }; *oplock = 0; - data_offset = (char *)rsp + le32_to_cpu(rsp->CreateContextsOffset); - remaining = le32_to_cpu(rsp->CreateContextsLength); - cc = (struct create_context *)data_offset; + + off = le32_to_cpu(rsp->CreateContextsOffset); + rem = le32_to_cpu(rsp->CreateContextsLength); + if (check_add_overflow(off, rem, &len) || len > rsp_iov->iov_len) + return -EINVAL; + cc = (struct create_context *)((u8 *)rsp + off); /* Initialize inode number to 0 in case no valid data in qfid context */ if (buf) buf->IndexNumber = 0; - while (remaining >= sizeof(struct create_context)) { - name = le16_to_cpu(cc->NameOffset) + (char *)cc; - if (le16_to_cpu(cc->NameLength) == 4 && - strncmp(name, SMB2_CREATE_REQUEST_LEASE, 4) == 0) - *oplock = server->ops->parse_lease_buf(cc, epoch, - lease_key); - else if (buf && (le16_to_cpu(cc->NameLength) == 4) && - strncmp(name, SMB2_CREATE_QUERY_ON_DISK_ID, 4) == 0) - parse_query_id_ctxt(cc, buf); - else if ((le16_to_cpu(cc->NameLength) == 16)) { - if (posix && - memcmp(name, smb3_create_tag_posix, 16) == 0) + while (rem >= sizeof(*cc)) { + doff = le16_to_cpu(cc->DataOffset); + dlen = le32_to_cpu(cc->DataLength); + if (check_add_overflow(doff, dlen, &len) || len > rem) + return -EINVAL; + + noff = le16_to_cpu(cc->NameOffset); + nlen = le16_to_cpu(cc->NameLength); + if (noff + nlen >= doff) + return -EINVAL; + + name = (char *)cc + noff; + switch (nlen) { + case 4: + if (!strncmp(name, SMB2_CREATE_REQUEST_LEASE, 4)) { + *oplock = server->ops->parse_lease_buf(cc, epoch, + lease_key); + } else if (buf && + !strncmp(name, SMB2_CREATE_QUERY_ON_DISK_ID, 4)) { + parse_query_id_ctxt(cc, buf); + } + break; + case 16: + if (posix && !memcmp(name, smb3_create_tag_posix, 16)) parse_posix_ctxt(cc, buf, posix); + break; + default: + cifs_dbg(FYI, "%s: unhandled context (nlen=%zu dlen=%zu)\n", + __func__, nlen, dlen); + if (IS_ENABLED(CONFIG_CIFS_DEBUG2)) + cifs_dump_mem("context data: ", cc, dlen); + break; } - /* else { - cifs_dbg(FYI, "Context not matched with len %d\n", - le16_to_cpu(cc->NameLength)); - cifs_dump_mem("Cctxt name: ", name, 4); - } */ - - next = le32_to_cpu(cc->Next); - if (!next) + + off = le32_to_cpu(cc->Next); + if (!off) break; - remaining -= next; - cc = (struct create_context *)((char *)cc + next); + if (check_sub_overflow(rem, off, &rem)) + return -EINVAL; + cc = (struct create_context *)((u8 *)cc + off); } if (rsp->OplockLevel != SMB2_OPLOCK_LEVEL_LEASE) *oplock = rsp->OplockLevel; - return; + return 0; } static int @@ -3029,8 +3048,8 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, } - smb2_parse_contexts(server, rsp, &oparms->fid->epoch, - oparms->fid->lease_key, oplock, buf, posix); + rc = smb2_parse_contexts(server, &rsp_iov, &oparms->fid->epoch, + oparms->fid->lease_key, oplock, buf, posix); creat_exit: SMB2_open_free(&rqst); free_rsp_buf(resp_buftype, rsp); diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h index 46eff9ec302a..0e371f7e2854 100644 --- a/fs/smb/client/smb2proto.h +++ b/fs/smb/client/smb2proto.h @@ -251,11 +251,13 @@ extern int smb3_validate_negotiate(const unsigned int, struct cifs_tcon *); extern enum securityEnum smb2_select_sectype(struct TCP_Server_Info *, enum securityEnum); -extern void smb2_parse_contexts(struct TCP_Server_Info *server, - struct smb2_create_rsp *rsp, - unsigned int *epoch, char *lease_key, - __u8 *oplock, struct smb2_file_all_info *buf, - struct create_posix_rsp *posix); +int smb2_parse_contexts(struct TCP_Server_Info *server, + struct kvec *rsp_iov, + unsigned int *epoch, + char *lease_key, __u8 *oplock, + struct smb2_file_all_info *buf, + struct create_posix_rsp *posix); + extern int smb3_encryption_required(const struct cifs_tcon *tcon); extern int smb2_validate_iov(unsigned int offset, unsigned int buffer_length, struct kvec *iov, unsigned int min_buf_size); From ef748d4a62a788e235b5443bef664b39e00444a8 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Mon, 11 Dec 2023 10:26:42 -0300 Subject: [PATCH 1176/1397] smb: client: fix NULL deref in asn1_ber_decoder() commit 90d025c2e953c11974e76637977c473200593a46 upstream. If server replied SMB2_NEGOTIATE with a zero SecurityBufferOffset, smb2_get_data_area() sets @len to non-zero but return NULL, so decode_negTokeninit() ends up being called with a NULL @security_blob: BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 2 PID: 871 Comm: mount.cifs Not tainted 6.7.0-rc4 #2 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.2-3-gd478f380-rebuilt.opensuse.org 04/01/2014 RIP: 0010:asn1_ber_decoder+0x173/0xc80 Code: 01 4c 39 2c 24 75 09 45 84 c9 0f 85 2f 03 00 00 48 8b 14 24 4c 29 ea 48 83 fa 01 0f 86 1e 07 00 00 48 8b 74 24 28 4d 8d 5d 01 <42> 0f b6 3c 2e 89 fa 40 88 7c 24 5c f7 d2 83 e2 1f 0f 84 3d 07 00 RSP: 0018:ffffc9000063f950 EFLAGS: 00010202 RAX: 0000000000000002 RBX: 0000000000000000 RCX: 000000000000004a RDX: 000000000000004a RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000002 R11: 0000000000000001 R12: 0000000000000000 R13: 0000000000000000 R14: 000000000000004d R15: 0000000000000000 FS: 00007fce52b0fbc0(0000) GS:ffff88806ba00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000001ae64000 CR4: 0000000000750ef0 PKRU: 55555554 Call Trace: ? __die+0x23/0x70 ? page_fault_oops+0x181/0x480 ? __stack_depot_save+0x1e6/0x480 ? exc_page_fault+0x6f/0x1c0 ? asm_exc_page_fault+0x26/0x30 ? asn1_ber_decoder+0x173/0xc80 ? check_object+0x40/0x340 decode_negTokenInit+0x1e/0x30 [cifs] SMB2_negotiate+0xc99/0x17c0 [cifs] ? smb2_negotiate+0x46/0x60 [cifs] ? srso_alias_return_thunk+0x5/0xfbef5 smb2_negotiate+0x46/0x60 [cifs] cifs_negotiate_protocol+0xae/0x130 [cifs] cifs_get_smb_ses+0x517/0x1040 [cifs] ? srso_alias_return_thunk+0x5/0xfbef5 ? srso_alias_return_thunk+0x5/0xfbef5 ? queue_delayed_work_on+0x5d/0x90 cifs_mount_get_session+0x78/0x200 [cifs] dfs_mount_share+0x13a/0x9f0 [cifs] ? srso_alias_return_thunk+0x5/0xfbef5 ? lock_acquire+0xbf/0x2b0 ? find_nls+0x16/0x80 ? srso_alias_return_thunk+0x5/0xfbef5 cifs_mount+0x7e/0x350 [cifs] cifs_smb3_do_mount+0x128/0x780 [cifs] smb3_get_tree+0xd9/0x290 [cifs] vfs_get_tree+0x2c/0x100 ? capable+0x37/0x70 path_mount+0x2d7/0xb80 ? srso_alias_return_thunk+0x5/0xfbef5 ? _raw_spin_unlock_irqrestore+0x44/0x60 __x64_sys_mount+0x11a/0x150 do_syscall_64+0x47/0xf0 entry_SYSCALL_64_after_hwframe+0x6f/0x77 RIP: 0033:0x7fce52c2ab1e Fix this by setting @len to zero when @off == 0 so callers won't attempt to dereference non-existing data areas. Reported-by: Robert Morris Cc: stable@vger.kernel.org Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/smb2misc.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/fs/smb/client/smb2misc.c b/fs/smb/client/smb2misc.c index 32dfa0f7a78c..e20b4354e703 100644 --- a/fs/smb/client/smb2misc.c +++ b/fs/smb/client/smb2misc.c @@ -313,6 +313,9 @@ static const bool has_smb2_data_area[NUMBER_OF_SMB2_COMMANDS] = { char * smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *shdr) { + const int max_off = 4096; + const int max_len = 128 * 1024; + *off = 0; *len = 0; @@ -384,29 +387,20 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *shdr) * Invalid length or offset probably means data area is invalid, but * we have little choice but to ignore the data area in this case. */ - if (*off > 4096) { - cifs_dbg(VFS, "offset %d too large, data area ignored\n", *off); - *len = 0; - *off = 0; - } else if (*off < 0) { - cifs_dbg(VFS, "negative offset %d to data invalid ignore data area\n", - *off); + if (unlikely(*off < 0 || *off > max_off || + *len < 0 || *len > max_len)) { + cifs_dbg(VFS, "%s: invalid data area (off=%d len=%d)\n", + __func__, *off, *len); *off = 0; *len = 0; - } else if (*len < 0) { - cifs_dbg(VFS, "negative data length %d invalid, data area ignored\n", - *len); - *len = 0; - } else if (*len > 128 * 1024) { - cifs_dbg(VFS, "data area larger than 128K: %d\n", *len); + } else if (*off == 0) { *len = 0; } /* return pointer to beginning of data area, ie offset from SMB start */ - if ((*off != 0) && (*len != 0)) + if (*off > 0 && *len > 0) return (char *)shdr + *off; - else - return NULL; + return NULL; } /* From e72ed491bc6ef97bec6a976cb3832f0e01da24f0 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Mon, 11 Dec 2023 10:26:43 -0300 Subject: [PATCH 1177/1397] smb: client: fix OOB in smb2_query_reparse_point() commit 3a42709fa909e22b0be4bb1e2795aa04ada732a3 upstream. Validate @ioctl_rsp->OutputOffset and @ioctl_rsp->OutputCount so that their sum does not wrap to a number that is smaller than @reparse_buf and we end up with a wild pointer as follows: BUG: unable to handle page fault for address: ffff88809c5cd45f #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 4a01067 P4D 4a01067 PUD 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 2 PID: 1260 Comm: mount.cifs Not tainted 6.7.0-rc4 #2 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.2-3-gd478f380-rebuilt.opensuse.org 04/01/2014 RIP: 0010:smb2_query_reparse_point+0x3e0/0x4c0 [cifs] Code: ff ff e8 f3 51 fe ff 41 89 c6 58 5a 45 85 f6 0f 85 14 fe ff ff 49 8b 57 48 8b 42 60 44 8b 42 64 42 8d 0c 00 49 39 4f 50 72 40 <8b> 04 02 48 8b 9d f0 fe ff ff 49 8b 57 50 89 03 48 8b 9d e8 fe ff RSP: 0018:ffffc90000347a90 EFLAGS: 00010212 RAX: 000000008000001f RBX: ffff88800ae11000 RCX: 00000000000000ec RDX: ffff88801c5cd440 RSI: 0000000000000000 RDI: ffffffff82004aa4 RBP: ffffc90000347bb0 R08: 00000000800000cd R09: 0000000000000001 R10: 0000000000000000 R11: 0000000000000024 R12: ffff8880114d4100 R13: ffff8880114d4198 R14: 0000000000000000 R15: ffff8880114d4000 FS: 00007f02c07babc0(0000) GS:ffff88806ba00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff88809c5cd45f CR3: 0000000011750000 CR4: 0000000000750ef0 PKRU: 55555554 Call Trace: ? __die+0x23/0x70 ? page_fault_oops+0x181/0x480 ? search_module_extables+0x19/0x60 ? srso_alias_return_thunk+0x5/0xfbef5 ? exc_page_fault+0x1b6/0x1c0 ? asm_exc_page_fault+0x26/0x30 ? _raw_spin_unlock_irqrestore+0x44/0x60 ? smb2_query_reparse_point+0x3e0/0x4c0 [cifs] cifs_get_fattr+0x16e/0xa50 [cifs] ? srso_alias_return_thunk+0x5/0xfbef5 ? lock_acquire+0xbf/0x2b0 cifs_root_iget+0x163/0x5f0 [cifs] cifs_smb3_do_mount+0x5bd/0x780 [cifs] smb3_get_tree+0xd9/0x290 [cifs] vfs_get_tree+0x2c/0x100 ? capable+0x37/0x70 path_mount+0x2d7/0xb80 ? srso_alias_return_thunk+0x5/0xfbef5 ? _raw_spin_unlock_irqrestore+0x44/0x60 __x64_sys_mount+0x11a/0x150 do_syscall_64+0x47/0xf0 entry_SYSCALL_64_after_hwframe+0x6f/0x77 RIP: 0033:0x7f02c08d5b1e Fixes: 2e4564b31b64 ("smb3: add support for stat of WSL reparse points for special file types") Cc: stable@vger.kernel.org Reported-by: Robert Morris Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/smb2ops.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 258a591f716a..dbcfdf7bc270 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -3001,7 +3001,7 @@ static int smb2_query_reparse_point(const unsigned int xid, struct kvec *rsp_iov; struct smb2_ioctl_rsp *ioctl_rsp; struct reparse_data_buffer *reparse_buf; - u32 plen; + u32 off, count, len; cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path); @@ -3082,16 +3082,22 @@ static int smb2_query_reparse_point(const unsigned int xid, */ if (rc == 0) { /* See MS-FSCC 2.3.23 */ + off = le32_to_cpu(ioctl_rsp->OutputOffset); + count = le32_to_cpu(ioctl_rsp->OutputCount); + if (check_add_overflow(off, count, &len) || + len > rsp_iov[1].iov_len) { + cifs_tcon_dbg(VFS, "%s: invalid ioctl: off=%d count=%d\n", + __func__, off, count); + rc = -EIO; + goto query_rp_exit; + } - reparse_buf = (struct reparse_data_buffer *) - ((char *)ioctl_rsp + - le32_to_cpu(ioctl_rsp->OutputOffset)); - plen = le32_to_cpu(ioctl_rsp->OutputCount); - - if (plen + le32_to_cpu(ioctl_rsp->OutputOffset) > - rsp_iov[1].iov_len) { - cifs_tcon_dbg(FYI, "srv returned invalid ioctl len: %d\n", - plen); + reparse_buf = (void *)((u8 *)ioctl_rsp + off); + len = sizeof(*reparse_buf); + if (count < len || + count < le16_to_cpu(reparse_buf->ReparseDataLength) + len) { + cifs_tcon_dbg(VFS, "%s: invalid ioctl: off=%d count=%d\n", + __func__, off, count); rc = -EIO; goto query_rp_exit; } From b02bf0d952ad237444d91881adc56a75912e5e7a Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Sun, 10 Dec 2023 22:12:50 -0500 Subject: [PATCH 1178/1397] ring-buffer: Fix memory leak of free page commit 17d801758157bec93f26faaf5ff1a8b9a552d67a upstream. Reading the ring buffer does a swap of a sub-buffer within the ring buffer with a empty sub-buffer. This allows the reader to have full access to the content of the sub-buffer that was swapped out without having to worry about contention with the writer. The readers call ring_buffer_alloc_read_page() to allocate a page that will be used to swap with the ring buffer. When the code is finished with the reader page, it calls ring_buffer_free_read_page(). Instead of freeing the page, it stores it as a spare. Then next call to ring_buffer_alloc_read_page() will return this spare instead of calling into the memory management system to allocate a new page. Unfortunately, on freeing of the ring buffer, this spare page is not freed, and causes a memory leak. Link: https://lore.kernel.org/linux-trace-kernel/20231210221250.7b9cc83c@rorschach.local.home Cc: stable@vger.kernel.org Cc: Mark Rutland Cc: Mathieu Desnoyers Fixes: 73a757e63114d ("ring-buffer: Return reader page back into existing ring buffer") Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ring_buffer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index f1ef4329343b..be41f483c16e 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1787,6 +1787,8 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) free_buffer_page(bpage); } + free_page((unsigned long)cpu_buffer->free_page); + kfree(cpu_buffer); } From 5062b8c5ae2fe11c52bbb824723445d9dd562b3e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Sun, 10 Dec 2023 22:54:47 -0500 Subject: [PATCH 1179/1397] tracing: Update snapshot buffer on resize if it is allocated commit d06aff1cb13d2a0d52b48e605462518149c98c81 upstream. The snapshot buffer is to mimic the main buffer so that when a snapshot is needed, the snapshot and main buffer are swapped. When the snapshot buffer is allocated, it is set to the minimal size that the ring buffer may be at and still functional. When it is allocated it becomes the same size as the main ring buffer, and when the main ring buffer changes in size, it should do. Currently, the resize only updates the snapshot buffer if it's used by the current tracer (ie. the preemptirqsoff tracer). But it needs to be updated anytime it is allocated. When changing the size of the main buffer, instead of looking to see if the current tracer is utilizing the snapshot buffer, just check if it is allocated to know if it should be updated or not. Also fix typo in comment just above the code change. Link: https://lore.kernel.org/linux-trace-kernel/20231210225447.48476a6a@rorschach.local.home Cc: stable@vger.kernel.org Cc: Mark Rutland Cc: Mathieu Desnoyers Fixes: ad909e21bbe69 ("tracing: Add internal tracing_snapshot() functions") Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b656cab67f67..d3664b803623 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6352,7 +6352,7 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr, if (!tr->array_buffer.buffer) return 0; - /* Do not allow tracing while resizng ring buffer */ + /* Do not allow tracing while resizing ring buffer */ tracing_stop_tr(tr); ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu); @@ -6360,7 +6360,7 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr, goto out_start; #ifdef CONFIG_TRACER_MAX_TRACE - if (!tr->current_trace->use_max_tr) + if (!tr->allocated_snapshot) goto out; ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu); From 5e584836779b15ff1f40b8eec0d261355dbd026f Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Mon, 11 Dec 2023 11:44:20 -0500 Subject: [PATCH 1180/1397] ring-buffer: Do not update before stamp when switching sub-buffers commit 9e45e39dc249c970d99d2681f6bcb55736fd725c upstream. The ring buffer timestamps are synchronized by two timestamp placeholders. One is the "before_stamp" and the other is the "write_stamp" (sometimes referred to as the "after stamp" but only in the comments. These two stamps are key to knowing how to handle nested events coming in with a lockless system. When moving across sub-buffers, the before stamp is updated but the write stamp is not. There's an effort to put back the before stamp to something that seems logical in case there's nested events. But as the current event is about to cross sub-buffers, and so will any new nested event that happens, updating the before stamp is useless, and could even introduce new race conditions. The first event on a sub-buffer simply uses the sub-buffer's timestamp and keeps a "delta" of zero. The "before_stamp" and "write_stamp" are not used in the algorithm in this case. There's no reason to try to fix the before_stamp when this happens. As a bonus, it removes a cmpxchg() when crossing sub-buffers! Link: https://lore.kernel.org/linux-trace-kernel/20231211114420.36dde01b@gandalf.local.home Cc: stable@vger.kernel.org Cc: Mark Rutland Cc: Mathieu Desnoyers Fixes: a389d86f7fd09 ("ring-buffer: Have nested events still record running time stamp") Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ring_buffer.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index be41f483c16e..c915a855ceeb 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3604,14 +3604,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, /* See if we shot pass the end of this buffer page */ if (unlikely(write > BUF_PAGE_SIZE)) { - /* before and after may now different, fix it up*/ - b_ok = rb_time_read(&cpu_buffer->before_stamp, &info->before); - a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after); - if (a_ok && b_ok && info->before != info->after) - (void)rb_time_cmpxchg(&cpu_buffer->before_stamp, - info->before, info->after); - if (a_ok && b_ok) - check_buffer(cpu_buffer, info, CHECK_FULL_PAGE); + check_buffer(cpu_buffer, info, CHECK_FULL_PAGE); return rb_move_tail(cpu_buffer, tail, info); } From 307ed139d7af56502698438f0a737e4c7dfcac82 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 12 Dec 2023 07:25:58 -0500 Subject: [PATCH 1181/1397] ring-buffer: Have saved event hold the entire event commit b049525855fdd0024881c9b14b8fbec61c3f53d3 upstream. For the ring buffer iterator (non-consuming read), the event needs to be copied into the iterator buffer to make sure that a writer does not overwrite it while the user is reading it. If a write happens during the copy, the buffer is simply discarded. But the temp buffer itself was not big enough. The allocation of the buffer was only BUF_MAX_DATA_SIZE, which is the maximum data size that can be passed into the ring buffer and saved. But the temp buffer needs to hold the meta data as well. That would be BUF_PAGE_SIZE and not BUF_MAX_DATA_SIZE. Link: https://lore.kernel.org/linux-trace-kernel/20231212072558.61f76493@gandalf.local.home Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Fixes: 785888c544e04 ("ring-buffer: Have rb_iter_head_event() handle concurrent writer") Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ring_buffer.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index c915a855ceeb..5de1e59f66a9 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2409,7 +2409,7 @@ rb_iter_head_event(struct ring_buffer_iter *iter) */ barrier(); - if ((iter->head + length) > commit || length > BUF_MAX_DATA_SIZE) + if ((iter->head + length) > commit || length > BUF_PAGE_SIZE) /* Writer corrupted the read? */ goto reset; @@ -5113,7 +5113,8 @@ ring_buffer_read_prepare(struct trace_buffer *buffer, int cpu, gfp_t flags) if (!iter) return NULL; - iter->event = kmalloc(BUF_MAX_DATA_SIZE, flags); + /* Holds the entire event: data and meta data */ + iter->event = kmalloc(BUF_PAGE_SIZE, flags); if (!iter->event) { kfree(iter); return NULL; From ae76d9bdf100e588109036c3b7db23b6693e46e3 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 12 Dec 2023 11:16:17 -0500 Subject: [PATCH 1182/1397] ring-buffer: Fix writing to the buffer with max_data_size commit b3ae7b67b87fed771fa5bf95389df06b0433603e upstream. The maximum ring buffer data size is the maximum size of data that can be recorded on the ring buffer. Events must be smaller than the sub buffer data size minus any meta data. This size is checked before trying to allocate from the ring buffer because the allocation assumes that the size will fit on the sub buffer. The maximum size was calculated as the size of a sub buffer page (which is currently PAGE_SIZE minus the sub buffer header) minus the size of the meta data of an individual event. But it missed the possible adding of a time stamp for events that are added long enough apart that the event meta data can't hold the time delta. When an event is added that is greater than the current BUF_MAX_DATA_SIZE minus the size of a time stamp, but still less than or equal to BUF_MAX_DATA_SIZE, the ring buffer would go into an infinite loop, looking for a page that can hold the event. Luckily, there's a check for this loop and after 1000 iterations and a warning is emitted and the ring buffer is disabled. But this should never happen. This can happen when a large event is added first, or after a long period where an absolute timestamp is prefixed to the event, increasing its size by 8 bytes. This passes the check and then goes into the algorithm that causes the infinite loop. For events that are the first event on the sub-buffer, it does not need to add a timestamp, because the sub-buffer itself contains an absolute timestamp, and adding one is redundant. The fix is to check if the event is to be the first event on the sub-buffer, and if it is, then do not add a timestamp. This also fixes 32 bit adding a timestamp when a read of before_stamp or write_stamp is interrupted. There's still no need to add that timestamp if the event is going to be the first event on the sub buffer. Also, if the buffer has "time_stamp_abs" set, then also check if the length plus the timestamp is greater than the BUF_MAX_DATA_SIZE. Link: https://lore.kernel.org/all/20231212104549.58863438@gandalf.local.home/ Link: https://lore.kernel.org/linux-trace-kernel/20231212071837.5fdd6c13@gandalf.local.home Link: https://lore.kernel.org/linux-trace-kernel/20231212111617.39e02849@gandalf.local.home Cc: stable@vger.kernel.org Cc: Mark Rutland Cc: Mathieu Desnoyers Fixes: a4543a2fa9ef3 ("ring-buffer: Get timestamp after event is allocated") Fixes: 58fbc3c63275c ("ring-buffer: Consolidate add_timestamp to remove some branches") Reported-by: Kent Overstreet # (on IRC) Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ring_buffer.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 5de1e59f66a9..0fe35ebd2509 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3581,7 +3581,10 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, * absolute timestamp. * Don't bother if this is the start of a new page (w == 0). */ - if (unlikely(!a_ok || !b_ok || (info->before != info->after && w))) { + if (!w) { + /* Use the sub-buffer timestamp */ + info->delta = 0; + } else if (unlikely(!a_ok || !b_ok || info->before != info->after)) { info->add_timestamp |= RB_ADD_STAMP_FORCE | RB_ADD_STAMP_EXTEND; info->length += RB_LEN_TIME_EXTEND; } else { @@ -3732,6 +3735,8 @@ rb_reserve_next_event(struct trace_buffer *buffer, if (ring_buffer_time_stamp_abs(cpu_buffer->buffer)) { add_ts_default = RB_ADD_STAMP_ABSOLUTE; info.length += RB_LEN_TIME_EXTEND; + if (info.length > BUF_MAX_DATA_SIZE) + goto out_fail; } else { add_ts_default = RB_ADD_STAMP_NONE; } From bc17bc96432868ee5d59c44556959306c220aea7 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 12 Dec 2023 11:53:01 -0500 Subject: [PATCH 1183/1397] ring-buffer: Fix a race in rb_time_cmpxchg() for 32 bit archs commit fff88fa0fbc7067ba46dde570912d63da42c59a9 upstream. Mathieu Desnoyers pointed out an issue in the rb_time_cmpxchg() for 32 bit architectures. That is: static bool rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set) { unsigned long cnt, top, bottom, msb; unsigned long cnt2, top2, bottom2, msb2; u64 val; /* The cmpxchg always fails if it interrupted an update */ if (!__rb_time_read(t, &val, &cnt2)) return false; if (val != expect) return false; <<<< interrupted here! cnt = local_read(&t->cnt); The problem is that the synchronization counter in the rb_time_t is read *after* the value of the timestamp is read. That means if an interrupt were to come in between the value being read and the counter being read, it can change the value and the counter and the interrupted process would be clueless about it! The counter needs to be read first and then the value. That way it is easy to tell if the value is stale or not. If the counter hasn't been updated, then the value is still good. Link: https://lore.kernel.org/linux-trace-kernel/20231211201324.652870-1-mathieu.desnoyers@efficios.com/ Link: https://lore.kernel.org/linux-trace-kernel/20231212115301.7a9c9a64@gandalf.local.home Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Fixes: 10464b4aa605e ("ring-buffer: Add rb_time_t 64 bit operations for speeding up 32 bit") Reported-by: Mathieu Desnoyers Reviewed-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ring_buffer.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 0fe35ebd2509..93a7b5b41cfd 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -706,6 +706,9 @@ static bool rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set) unsigned long cnt2, top2, bottom2, msb2; u64 val; + /* Any interruptions in this function should cause a failure */ + cnt = local_read(&t->cnt); + /* The cmpxchg always fails if it interrupted an update */ if (!__rb_time_read(t, &val, &cnt2)) return false; @@ -713,7 +716,6 @@ static bool rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set) if (val != expect) return false; - cnt = local_read(&t->cnt); if ((cnt & 3) != cnt2) return false; From b3778a2fa4a281a21ea0f1d12022cdcf90616f61 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 14 Dec 2023 22:29:21 -0500 Subject: [PATCH 1184/1397] ring-buffer: Do not try to put back write_stamp commit dd939425707898da992e59ab0fcfae4652546910 upstream. If an update to an event is interrupted by another event between the time the initial event allocated its buffer and where it wrote to the write_stamp, the code try to reset the write stamp back to the what it had just overwritten. It knows that it was overwritten via checking the before_stamp, and if it didn't match what it wrote to the before_stamp before it allocated its space, it knows it was overwritten. To put back the write_stamp, it uses the before_stamp it read. The problem here is that by writing the before_stamp to the write_stamp it makes the two equal again, which means that the write_stamp can be considered valid as the last timestamp written to the ring buffer. But this is not necessarily true. The event that interrupted the event could have been interrupted in a way that it was interrupted as well, and can end up leaving with an invalid write_stamp. But if this happens and returns to this context that uses the before_stamp to update the write_stamp again, it can possibly incorrectly make it valid, causing later events to have in correct time stamps. As it is OK to leave this function with an invalid write_stamp (one that doesn't match the before_stamp), there's no reason to try to make it valid again in this case. If this race happens, then just leave with the invalid write_stamp and the next event to come along will just add a absolute timestamp and validate everything again. Bonus points: This gets rid of another cmpxchg64! Link: https://lore.kernel.org/linux-trace-kernel/20231214222921.193037a7@gandalf.local.home Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Joel Fernandes Cc: Vincent Donnefort Fixes: a389d86f7fd09 ("ring-buffer: Have nested events still record running time stamp") Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ring_buffer.c | 29 ++++++----------------------- 1 file changed, 6 insertions(+), 23 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 93a7b5b41cfd..803f0cee4ab9 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3614,14 +3614,14 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, } if (likely(tail == w)) { - u64 save_before; - bool s_ok; - /* Nothing interrupted us between A and C */ /*D*/ rb_time_set(&cpu_buffer->write_stamp, info->ts); - barrier(); - /*E*/ s_ok = rb_time_read(&cpu_buffer->before_stamp, &save_before); - RB_WARN_ON(cpu_buffer, !s_ok); + /* + * If something came in between C and D, the write stamp + * may now not be in sync. But that's fine as the before_stamp + * will be different and then next event will just be forced + * to use an absolute timestamp. + */ if (likely(!(info->add_timestamp & (RB_ADD_STAMP_FORCE | RB_ADD_STAMP_ABSOLUTE)))) /* This did not interrupt any time update */ @@ -3629,24 +3629,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, else /* Just use full timestamp for interrupting event */ info->delta = info->ts; - barrier(); check_buffer(cpu_buffer, info, tail); - if (unlikely(info->ts != save_before)) { - /* SLOW PATH - Interrupted between C and E */ - - a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after); - RB_WARN_ON(cpu_buffer, !a_ok); - - /* Write stamp must only go forward */ - if (save_before > info->after) { - /* - * We do not care about the result, only that - * it gets updated atomically. - */ - (void)rb_time_cmpxchg(&cpu_buffer->write_stamp, - info->after, save_before); - } - } } else { u64 ts; /* SLOW PATH - Interrupted between A and C */ From 3432f9686a37a591da81bff73d15da3735e4bac8 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Fri, 15 Dec 2023 08:41:14 -0500 Subject: [PATCH 1185/1397] ring-buffer: Have rb_time_cmpxchg() set the msb counter too commit 0aa0e5289cfe984a8a9fdd79ccf46ccf080151f7 upstream. The rb_time_cmpxchg() on 32-bit architectures requires setting three 32-bit words to represent the 64-bit timestamp, with some salt for synchronization. Those are: msb, top, and bottom The issue is, the rb_time_cmpxchg() did not properly salt the msb portion, and the msb that was written was stale. Link: https://lore.kernel.org/linux-trace-kernel/20231215084114.20899342@rorschach.local.home Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Fixes: f03f2abce4f39 ("ring-buffer: Have 32 bit time stamps use all 64 bits") Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ring_buffer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 803f0cee4ab9..af08a1a411e3 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -722,10 +722,12 @@ static bool rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set) cnt2 = cnt + 1; rb_time_split(val, &top, &bottom, &msb); + msb = rb_time_val_cnt(msb, cnt); top = rb_time_val_cnt(top, cnt); bottom = rb_time_val_cnt(bottom, cnt); rb_time_split(set, &top2, &bottom2, &msb2); + msb2 = rb_time_val_cnt(msb2, cnt); top2 = rb_time_val_cnt(top2, cnt2); bottom2 = rb_time_val_cnt(bottom2, cnt2); From 06f61af8025452514945657e9b4cb1c9ba8967c5 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 19 Sep 2023 17:17:28 -0700 Subject: [PATCH 1186/1397] x86/speculation, objtool: Use absolute relocations for annotations commit b8ec60e1186cdcfce41e7db4c827cb107e459002 upstream. .discard.retpoline_safe sections do not have the SHF_ALLOC flag. These sections referencing text sections' STT_SECTION symbols with PC-relative relocations like R_386_PC32 [0] is conceptually not suitable. Newer LLD will report warnings for REL relocations even for relocatable links [1]: ld.lld: warning: vmlinux.a(drivers/i2c/busses/i2c-i801.o):(.discard.retpoline_safe+0x120): has non-ABS relocation R_386_PC32 against symbol '' Switch to absolute relocations instead, which indicate link-time addresses. In a relocatable link, these addresses are also output section offsets, used by checks in tools/objtool/check.c. When linking vmlinux, these .discard.* sections will be discarded, therefore it is not a problem that R_X86_64_32 cannot represent a kernel address. Alternatively, we could set the SHF_ALLOC flag for .discard.* sections, but I think non-SHF_ALLOC for sections to be discarded makes more sense. Note: if we decide to never support REL architectures (e.g. arm, i386), we can utilize R_*_NONE relocations (.reloc ., BFD_RELOC_NONE, sym), making .discard.* sections zero-sized. That said, the section content waste is 4 bytes per entry, much smaller than sizeof(Elf{32,64}_Rel). [0] commit 1c0c1faf5692 ("objtool: Use relative pointers for annotations") [1] https://github.com/ClangBuiltLinux/linux/issues/1937 Signed-off-by: Fangrui Song Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Cc: Josh Poimboeuf Link: https://lore.kernel.org/r/20230920001728.1439947-1-maskray@google.com Cc: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/alternative.h | 4 ++-- arch/x86/include/asm/nospec-branch.h | 4 ++-- include/linux/objtool.h | 10 +++++----- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 9c4da699e11a..65f79092c9d9 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -58,7 +58,7 @@ #define ANNOTATE_IGNORE_ALTERNATIVE \ "999:\n\t" \ ".pushsection .discard.ignore_alts\n\t" \ - ".long 999b - .\n\t" \ + ".long 999b\n\t" \ ".popsection\n\t" /* @@ -352,7 +352,7 @@ static inline int alternatives_text_reserved(void *start, void *end) .macro ANNOTATE_IGNORE_ALTERNATIVE .Lannotate_\@: .pushsection .discard.ignore_alts - .long .Lannotate_\@ - . + .long .Lannotate_\@ .popsection .endm diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 197ff4f4d1ce..0396458c201f 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -196,7 +196,7 @@ .macro ANNOTATE_RETPOLINE_SAFE .Lhere_\@: .pushsection .discard.retpoline_safe - .long .Lhere_\@ - . + .long .Lhere_\@ .popsection .endm @@ -334,7 +334,7 @@ #define ANNOTATE_RETPOLINE_SAFE \ "999:\n\t" \ ".pushsection .discard.retpoline_safe\n\t" \ - ".long 999b - .\n\t" \ + ".long 999b\n\t" \ ".popsection\n\t" typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; diff --git a/include/linux/objtool.h b/include/linux/objtool.h index b5440e7da55b..33212e93f4a6 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -48,13 +48,13 @@ #define ANNOTATE_NOENDBR \ "986: \n\t" \ ".pushsection .discard.noendbr\n\t" \ - ".long 986b - .\n\t" \ + ".long 986b\n\t" \ ".popsection\n\t" #define ASM_REACHABLE \ "998:\n\t" \ ".pushsection .discard.reachable\n\t" \ - ".long 998b - .\n\t" \ + ".long 998b\n\t" \ ".popsection\n\t" #else /* __ASSEMBLY__ */ @@ -66,7 +66,7 @@ #define ANNOTATE_INTRA_FUNCTION_CALL \ 999: \ .pushsection .discard.intra_function_calls; \ - .long 999b - .; \ + .long 999b; \ .popsection; /* @@ -118,7 +118,7 @@ .macro ANNOTATE_NOENDBR .Lhere_\@: .pushsection .discard.noendbr - .long .Lhere_\@ - . + .long .Lhere_\@ .popsection .endm @@ -142,7 +142,7 @@ .macro REACHABLE .Lhere_\@: .pushsection .discard.reachable - .long .Lhere_\@ - . + .long .Lhere_\@ .popsection .endm From 885faf3c7e5f53561c34104d140f2e321ea305a6 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Thu, 19 Oct 2023 11:47:05 +0300 Subject: [PATCH 1187/1397] RDMA/mlx5: Change the key being sent for MPV device affiliation commit 02e7d139e5e24abb5fde91934fc9dc0344ac1926 upstream. Change the key that we send from IB driver to EN driver regarding the MPV device affiliation, since at that stage the IB device is not yet initialized, so its index would be zero for different IB devices and cause wrong associations between unrelated master and slave devices. Instead use a unique value from inside the core device which is already initialized at this stage. Fixes: 0d293714ac32 ("RDMA/mlx5: Send events from IB driver about device affiliation state") Signed-off-by: Patrisious Haddad Link: https://lore.kernel.org/r/ac7e66357d963fc68d7a419515180212c96d137d.1697705185.git.leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx5/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 4c4233b9c8b0..102ead497196 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3263,7 +3263,7 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, mlx5_ib_init_cong_debugfs(ibdev, port_num); - key = ibdev->ib_dev.index; + key = mpi->mdev->priv.adev_idx; mlx5_core_mp_event_replay(mpi->mdev, MLX5_DRIVER_EVENT_AFFILIATION_DONE, &key); From 4c9646a796d66a2d81871a694e88e19a38b115a7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 20 Dec 2023 17:02:07 +0100 Subject: [PATCH 1188/1397] Linux 6.6.8 Link: https://lore.kernel.org/r/20231218135104.927894164@linuxfoundation.org Tested-by: Ronald Warsow Tested-by: SeongJae Park Tested-by: Conor Dooley Tested-by: Ricardo B. Marliere Tested-by: Salvatore Bonaccorso Tested-by: Takeshi Ogasawara Tested-by: Justin M. Forbes Tested-by: Shuah Khan Tested-by: Kelsey Steele Tested-by: Bagas Sanjaya Tested-by: Linux Kernel Functional Testing Tested-by: Ron Economos Tested-by: Florian Fainelli Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 707952172ece..891ef640396c 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 6 -SUBLEVEL = 7 +SUBLEVEL = 8 EXTRAVERSION = NAME = Hurr durr I'ma ninja sloth From f64b2dc8a44699f26a83c19dd77540ca7655dc24 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 6 Dec 2023 09:30:40 +0100 Subject: [PATCH 1189/1397] bpf: Fix prog_array_map_poke_run map poke update commit 4b7de801606e504e69689df71475d27e35336fb3 upstream. Lee pointed out issue found by syscaller [0] hitting BUG in prog array map poke update in prog_array_map_poke_run function due to error value returned from bpf_arch_text_poke function. There's race window where bpf_arch_text_poke can fail due to missing bpf program kallsym symbols, which is accounted for with check for -EINVAL in that BUG_ON call. The problem is that in such case we won't update the tail call jump and cause imbalance for the next tail call update check which will fail with -EBUSY in bpf_arch_text_poke. I'm hitting following race during the program load: CPU 0 CPU 1 bpf_prog_load bpf_check do_misc_fixups prog_array_map_poke_track map_update_elem bpf_fd_array_map_update_elem prog_array_map_poke_run bpf_arch_text_poke returns -EINVAL bpf_prog_kallsyms_add After bpf_arch_text_poke (CPU 1) fails to update the tail call jump, the next poke update fails on expected jump instruction check in bpf_arch_text_poke with -EBUSY and triggers the BUG_ON in prog_array_map_poke_run. Similar race exists on the program unload. Fixing this by moving the update to bpf_arch_poke_desc_update function which makes sure we call __bpf_arch_text_poke that skips the bpf address check. Each architecture has slightly different approach wrt looking up bpf address in bpf_arch_text_poke, so instead of splitting the function or adding new 'checkip' argument in previous version, it seems best to move the whole map_poke_run update as arch specific code. [0] https://syzkaller.appspot.com/bug?extid=97a4fe20470e9bc30810 Fixes: ebf7d1f508a7 ("bpf, x64: rework pro/epilogue and tailcall handling in JIT") Reported-by: syzbot+97a4fe20470e9bc30810@syzkaller.appspotmail.com Signed-off-by: Jiri Olsa Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Cc: Lee Jones Cc: Maciej Fijalkowski Link: https://lore.kernel.org/bpf/20231206083041.1306660-2-jolsa@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/net/bpf_jit_comp.c | 46 +++++++++++++++++++++++++++++ include/linux/bpf.h | 3 ++ kernel/bpf/arraymap.c | 58 +++++++------------------------------ 3 files changed, 59 insertions(+), 48 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 52f36c48c1b9..955133077c10 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -2929,3 +2929,49 @@ void bpf_jit_free(struct bpf_prog *prog) bpf_prog_unlock_free(prog); } + +void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke, + struct bpf_prog *new, struct bpf_prog *old) +{ + u8 *old_addr, *new_addr, *old_bypass_addr; + int ret; + + old_bypass_addr = old ? NULL : poke->bypass_addr; + old_addr = old ? (u8 *)old->bpf_func + poke->adj_off : NULL; + new_addr = new ? (u8 *)new->bpf_func + poke->adj_off : NULL; + + /* + * On program loading or teardown, the program's kallsym entry + * might not be in place, so we use __bpf_arch_text_poke to skip + * the kallsyms check. + */ + if (new) { + ret = __bpf_arch_text_poke(poke->tailcall_target, + BPF_MOD_JUMP, + old_addr, new_addr); + BUG_ON(ret < 0); + if (!old) { + ret = __bpf_arch_text_poke(poke->tailcall_bypass, + BPF_MOD_JUMP, + poke->bypass_addr, + NULL); + BUG_ON(ret < 0); + } + } else { + ret = __bpf_arch_text_poke(poke->tailcall_bypass, + BPF_MOD_JUMP, + old_bypass_addr, + poke->bypass_addr); + BUG_ON(ret < 0); + /* let other CPUs finish the execution of program + * so that it will not possible to expose them + * to invalid nop, stack unwind, nop state + */ + if (!ret) + synchronize_rcu(); + ret = __bpf_arch_text_poke(poke->tailcall_target, + BPF_MOD_JUMP, + old_addr, NULL); + BUG_ON(ret < 0); + } +} diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 392f581af2ce..75e039081f92 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -3143,6 +3143,9 @@ enum bpf_text_poke_type { int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, void *addr1, void *addr2); +void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke, + struct bpf_prog *new, struct bpf_prog *old); + void *bpf_arch_text_copy(void *dst, void *src, size_t len); int bpf_arch_text_invalidate(void *dst, size_t len); diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 2058e89b5ddd..c85ff9162a5c 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -1012,11 +1012,16 @@ static void prog_array_map_poke_untrack(struct bpf_map *map, mutex_unlock(&aux->poke_mutex); } +void __weak bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke, + struct bpf_prog *new, struct bpf_prog *old) +{ + WARN_ON_ONCE(1); +} + static void prog_array_map_poke_run(struct bpf_map *map, u32 key, struct bpf_prog *old, struct bpf_prog *new) { - u8 *old_addr, *new_addr, *old_bypass_addr; struct prog_poke_elem *elem; struct bpf_array_aux *aux; @@ -1025,7 +1030,7 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key, list_for_each_entry(elem, &aux->poke_progs, list) { struct bpf_jit_poke_descriptor *poke; - int i, ret; + int i; for (i = 0; i < elem->aux->size_poke_tab; i++) { poke = &elem->aux->poke_tab[i]; @@ -1044,21 +1049,10 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key, * activated, so tail call updates can arrive from here * while JIT is still finishing its final fixup for * non-activated poke entries. - * 3) On program teardown, the program's kallsym entry gets - * removed out of RCU callback, but we can only untrack - * from sleepable context, therefore bpf_arch_text_poke() - * might not see that this is in BPF text section and - * bails out with -EINVAL. As these are unreachable since - * RCU grace period already passed, we simply skip them. - * 4) Also programs reaching refcount of zero while patching + * 3) Also programs reaching refcount of zero while patching * is in progress is okay since we're protected under * poke_mutex and untrack the programs before the JIT - * buffer is freed. When we're still in the middle of - * patching and suddenly kallsyms entry of the program - * gets evicted, we just skip the rest which is fine due - * to point 3). - * 5) Any other error happening below from bpf_arch_text_poke() - * is a unexpected bug. + * buffer is freed. */ if (!READ_ONCE(poke->tailcall_target_stable)) continue; @@ -1068,39 +1062,7 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key, poke->tail_call.key != key) continue; - old_bypass_addr = old ? NULL : poke->bypass_addr; - old_addr = old ? (u8 *)old->bpf_func + poke->adj_off : NULL; - new_addr = new ? (u8 *)new->bpf_func + poke->adj_off : NULL; - - if (new) { - ret = bpf_arch_text_poke(poke->tailcall_target, - BPF_MOD_JUMP, - old_addr, new_addr); - BUG_ON(ret < 0 && ret != -EINVAL); - if (!old) { - ret = bpf_arch_text_poke(poke->tailcall_bypass, - BPF_MOD_JUMP, - poke->bypass_addr, - NULL); - BUG_ON(ret < 0 && ret != -EINVAL); - } - } else { - ret = bpf_arch_text_poke(poke->tailcall_bypass, - BPF_MOD_JUMP, - old_bypass_addr, - poke->bypass_addr); - BUG_ON(ret < 0 && ret != -EINVAL); - /* let other CPUs finish the execution of program - * so that it will not possible to expose them - * to invalid nop, stack unwind, nop state - */ - if (!ret) - synchronize_rcu(); - ret = bpf_arch_text_poke(poke->tailcall_target, - BPF_MOD_JUMP, - old_addr, NULL); - BUG_ON(ret < 0 && ret != -EINVAL); - } + bpf_arch_poke_desc_update(poke, new, old); } } } From c708a5e51b43797539d787f27ee947d95986c382 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 14 Sep 2023 02:15:23 +0000 Subject: [PATCH 1190/1397] mm/damon/core: use number of passed access sampling as a timer [ Upstream commit 4472edf63d6630e6cf65e205b4fc8c3c94d0afe5 ] DAMON sleeps for sampling interval after each sampling, and check if the aggregation interval and the ops update interval have passed using ktime_get_coarse_ts64() and baseline timestamps for the intervals. That design is for making the operations occur at deterministic timing regardless of the time that spend for each work. However, it turned out it is not that useful, and incur not-that-intuitive results. After all, timer functions, and especially sleep functions that DAMON uses to wait for specific timing, are not necessarily strictly accurate. It is legal design, so no problem. However, depending on such inaccuracies, the nr_accesses can be larger than aggregation interval divided by sampling interval. For example, with the default setting (5 ms sampling interval and 100 ms aggregation interval) we frequently show regions having nr_accesses larger than 20. Also, if the execution of a DAMOS scheme takes a long time, next aggregation could happen before enough number of samples are collected. This is not what usual users would intuitively expect. Since access check sampling is the smallest unit work of DAMON, using the number of passed sampling intervals as the DAMON-internal timer can easily avoid these problems. That is, convert aggregation and ops update intervals to numbers of sampling intervals that need to be passed before those operations be executed, count the number of passed sampling intervals, and invoke the operations as soon as the specific amount of sampling intervals passed. Make the change. Note that this could make a behavioral change to settings that using intervals that not aligned by the sampling interval. For example, if the sampling interval is 5 ms and the aggregation interval is 12 ms, DAMON effectively uses 15 ms as its aggregation interval, because it checks whether the aggregation interval after sleeping the sampling interval. This change will make DAMON to effectively use 10 ms as aggregation interval, since it uses 'aggregation interval / sampling interval * sampling interval' as the effective aggregation interval, and we don't use floating point types. Usual users would have used aligned intervals, so this behavioral change is not expected to make any meaningful impact, so just make this change. Link: https://lkml.kernel.org/r/20230914021523.60649-1-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton Stable-dep-of: 6376a8245956 ("mm/damon/core: make damon_start() waits until kdamond_fn() starts") Signed-off-by: Sasha Levin --- include/linux/damon.h | 14 ++++++- mm/damon/core.c | 96 +++++++++++++++++++++---------------------- 2 files changed, 59 insertions(+), 51 deletions(-) diff --git a/include/linux/damon.h b/include/linux/damon.h index c70cca8a839f..506118916378 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -522,8 +522,18 @@ struct damon_ctx { struct damon_attrs attrs; /* private: internal use only */ - struct timespec64 last_aggregation; - struct timespec64 last_ops_update; + /* number of sample intervals that passed since this context started */ + unsigned long passed_sample_intervals; + /* + * number of sample intervals that should be passed before next + * aggregation + */ + unsigned long next_aggregation_sis; + /* + * number of sample intervals that should be passed before next ops + * update + */ + unsigned long next_ops_update_sis; /* public: */ struct task_struct *kdamond; diff --git a/mm/damon/core.c b/mm/damon/core.c index fd5be73f699f..30c93de59475 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -427,8 +427,10 @@ struct damon_ctx *damon_new_ctx(void) ctx->attrs.aggr_interval = 100 * 1000; ctx->attrs.ops_update_interval = 60 * 1000 * 1000; - ktime_get_coarse_ts64(&ctx->last_aggregation); - ctx->last_ops_update = ctx->last_aggregation; + ctx->passed_sample_intervals = 0; + /* These will be set from kdamond_init_intervals_sis() */ + ctx->next_aggregation_sis = 0; + ctx->next_ops_update_sis = 0; mutex_init(&ctx->kdamond_lock); @@ -542,6 +544,9 @@ static void damon_update_monitoring_results(struct damon_ctx *ctx, */ int damon_set_attrs(struct damon_ctx *ctx, struct damon_attrs *attrs) { + unsigned long sample_interval = attrs->sample_interval ? + attrs->sample_interval : 1; + if (attrs->min_nr_regions < 3) return -EINVAL; if (attrs->min_nr_regions > attrs->max_nr_regions) @@ -549,6 +554,11 @@ int damon_set_attrs(struct damon_ctx *ctx, struct damon_attrs *attrs) if (attrs->sample_interval > attrs->aggr_interval) return -EINVAL; + ctx->next_aggregation_sis = ctx->passed_sample_intervals + + attrs->aggr_interval / sample_interval; + ctx->next_ops_update_sis = ctx->passed_sample_intervals + + attrs->ops_update_interval / sample_interval; + damon_update_monitoring_results(ctx, attrs); ctx->attrs = *attrs; return 0; @@ -722,38 +732,6 @@ int damon_stop(struct damon_ctx **ctxs, int nr_ctxs) return err; } -/* - * damon_check_reset_time_interval() - Check if a time interval is elapsed. - * @baseline: the time to check whether the interval has elapsed since - * @interval: the time interval (microseconds) - * - * See whether the given time interval has passed since the given baseline - * time. If so, it also updates the baseline to current time for next check. - * - * Return: true if the time interval has passed, or false otherwise. - */ -static bool damon_check_reset_time_interval(struct timespec64 *baseline, - unsigned long interval) -{ - struct timespec64 now; - - ktime_get_coarse_ts64(&now); - if ((timespec64_to_ns(&now) - timespec64_to_ns(baseline)) < - interval * 1000) - return false; - *baseline = now; - return true; -} - -/* - * Check whether it is time to flush the aggregated information - */ -static bool kdamond_aggregate_interval_passed(struct damon_ctx *ctx) -{ - return damon_check_reset_time_interval(&ctx->last_aggregation, - ctx->attrs.aggr_interval); -} - /* * Reset the aggregated monitoring results ('nr_accesses' of each region). */ @@ -1234,18 +1212,6 @@ static void kdamond_split_regions(struct damon_ctx *ctx) last_nr_regions = nr_regions; } -/* - * Check whether it is time to check and apply the operations-related data - * structures. - * - * Returns true if it is. - */ -static bool kdamond_need_update_operations(struct damon_ctx *ctx) -{ - return damon_check_reset_time_interval(&ctx->last_ops_update, - ctx->attrs.ops_update_interval); -} - /* * Check whether current monitoring should be stopped * @@ -1357,6 +1323,17 @@ static int kdamond_wait_activation(struct damon_ctx *ctx) return -EBUSY; } +static void kdamond_init_intervals_sis(struct damon_ctx *ctx) +{ + unsigned long sample_interval = ctx->attrs.sample_interval ? + ctx->attrs.sample_interval : 1; + + ctx->passed_sample_intervals = 0; + ctx->next_aggregation_sis = ctx->attrs.aggr_interval / sample_interval; + ctx->next_ops_update_sis = ctx->attrs.ops_update_interval / + sample_interval; +} + /* * The monitoring daemon that runs as a kernel thread */ @@ -1370,6 +1347,8 @@ static int kdamond_fn(void *data) pr_debug("kdamond (%d) starts\n", current->pid); + kdamond_init_intervals_sis(ctx); + if (ctx->ops.init) ctx->ops.init(ctx); if (ctx->callback.before_start && ctx->callback.before_start(ctx)) @@ -1378,6 +1357,17 @@ static int kdamond_fn(void *data) sz_limit = damon_region_sz_limit(ctx); while (!kdamond_need_stop(ctx)) { + /* + * ctx->attrs and ctx->next_{aggregation,ops_update}_sis could + * be changed from after_wmarks_check() or after_aggregation() + * callbacks. Read the values here, and use those for this + * iteration. That is, damon_set_attrs() updated new values + * are respected from next iteration. + */ + unsigned long next_aggregation_sis = ctx->next_aggregation_sis; + unsigned long next_ops_update_sis = ctx->next_ops_update_sis; + unsigned long sample_interval = ctx->attrs.sample_interval; + if (kdamond_wait_activation(ctx)) break; @@ -1387,12 +1377,17 @@ static int kdamond_fn(void *data) ctx->callback.after_sampling(ctx)) break; - kdamond_usleep(ctx->attrs.sample_interval); + kdamond_usleep(sample_interval); + ctx->passed_sample_intervals++; if (ctx->ops.check_accesses) max_nr_accesses = ctx->ops.check_accesses(ctx); - if (kdamond_aggregate_interval_passed(ctx)) { + sample_interval = ctx->attrs.sample_interval ? + ctx->attrs.sample_interval : 1; + if (ctx->passed_sample_intervals == next_aggregation_sis) { + ctx->next_aggregation_sis = next_aggregation_sis + + ctx->attrs.aggr_interval / sample_interval; kdamond_merge_regions(ctx, max_nr_accesses / 10, sz_limit); @@ -1407,7 +1402,10 @@ static int kdamond_fn(void *data) ctx->ops.reset_aggregated(ctx); } - if (kdamond_need_update_operations(ctx)) { + if (ctx->passed_sample_intervals == next_ops_update_sis) { + ctx->next_ops_update_sis = next_ops_update_sis + + ctx->attrs.ops_update_interval / + sample_interval; if (ctx->ops.update) ctx->ops.update(ctx); sz_limit = damon_region_sz_limit(ctx); From e93bcaebda90c237b2ce2b4e0ee7897b83b5cbf0 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 8 Dec 2023 17:50:18 +0000 Subject: [PATCH 1191/1397] mm/damon/core: make damon_start() waits until kdamond_fn() starts [ Upstream commit 6376a824595607e99d032a39ba3394988b4fce96 ] The cleanup tasks of kdamond threads including reset of corresponding DAMON context's ->kdamond field and decrease of global nr_running_ctxs counter is supposed to be executed by kdamond_fn(). However, commit 0f91d13366a4 ("mm/damon: simplify stop mechanism") made neither damon_start() nor damon_stop() ensure the corresponding kdamond has started the execution of kdamond_fn(). As a result, the cleanup can be skipped if damon_stop() is called fast enough after the previous damon_start(). Especially the skipped reset of ->kdamond could cause a use-after-free. Fix it by waiting for start of kdamond_fn() execution from damon_start(). Link: https://lkml.kernel.org/r/20231208175018.63880-1-sj@kernel.org Fixes: 0f91d13366a4 ("mm/damon: simplify stop mechanism") Signed-off-by: SeongJae Park Reported-by: Jakub Acs Cc: Changbin Du Cc: Jakub Acs Cc: # 5.15.x Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- include/linux/damon.h | 2 ++ mm/damon/core.c | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/include/linux/damon.h b/include/linux/damon.h index 506118916378..a953d7083cd5 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -534,6 +534,8 @@ struct damon_ctx { * update */ unsigned long next_ops_update_sis; + /* for waiting until the execution of the kdamond_fn is started */ + struct completion kdamond_started; /* public: */ struct task_struct *kdamond; diff --git a/mm/damon/core.c b/mm/damon/core.c index 30c93de59475..aff611b6eafe 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -423,6 +423,8 @@ struct damon_ctx *damon_new_ctx(void) if (!ctx) return NULL; + init_completion(&ctx->kdamond_started); + ctx->attrs.sample_interval = 5 * 1000; ctx->attrs.aggr_interval = 100 * 1000; ctx->attrs.ops_update_interval = 60 * 1000 * 1000; @@ -636,11 +638,14 @@ static int __damon_start(struct damon_ctx *ctx) mutex_lock(&ctx->kdamond_lock); if (!ctx->kdamond) { err = 0; + reinit_completion(&ctx->kdamond_started); ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d", nr_running_ctxs); if (IS_ERR(ctx->kdamond)) { err = PTR_ERR(ctx->kdamond); ctx->kdamond = NULL; + } else { + wait_for_completion(&ctx->kdamond_started); } } mutex_unlock(&ctx->kdamond_lock); @@ -1347,6 +1352,7 @@ static int kdamond_fn(void *data) pr_debug("kdamond (%d) starts\n", current->pid); + complete(&ctx->kdamond_started); kdamond_init_intervals_sis(ctx); if (ctx->ops.init) From 1c9a5c49504c3df282a8490f165ea9dc623deef6 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Sat, 2 Sep 2023 08:13:52 +0800 Subject: [PATCH 1192/1397] btrfs: qgroup: iterate qgroups without memory allocation for qgroup_reserve() [ Upstream commit 686c4a5a42635e0d2889e3eb461c554fd0b616b4 ] Qgroup heavily relies on ulist to go through all the involved qgroups, but since we're using ulist inside fs_info->qgroup_lock spinlock, this means we're doing a lot of GFP_ATOMIC allocations. This patch reduces the GFP_ATOMIC usage for qgroup_reserve() by eliminating the memory allocation completely. This is done by moving the needed memory to btrfs_qgroup::iterator list_head, so that we can put all the involved qgroup into a on-stack list, thus eliminating the need to allocate memory while holding spinlock. The only cost is the slightly higher memory usage, but considering the reduce GFP_ATOMIC during a hot path, it should still be acceptable. Function qgroup_reserve() is the perfect start point for this conversion. Reviewed-by: Boris Burkov Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Stable-dep-of: b321a52cce06 ("btrfs: free qgroup pertrans reserve on transaction abort") Signed-off-by: Sasha Levin --- fs/btrfs/qgroup.c | 61 ++++++++++++++++++++++------------------------- fs/btrfs/qgroup.h | 9 +++++++ 2 files changed, 38 insertions(+), 32 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 7c9249438154..32e5defe4eff 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -208,6 +208,7 @@ static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info, INIT_LIST_HEAD(&qgroup->groups); INIT_LIST_HEAD(&qgroup->members); INIT_LIST_HEAD(&qgroup->dirty); + INIT_LIST_HEAD(&qgroup->iterator); rb_link_node(&qgroup->node, parent, p); rb_insert_color(&qgroup->node, &fs_info->qgroup_tree); @@ -1342,6 +1343,24 @@ static void qgroup_dirty(struct btrfs_fs_info *fs_info, list_add(&qgroup->dirty, &fs_info->dirty_qgroups); } +static void qgroup_iterator_add(struct list_head *head, struct btrfs_qgroup *qgroup) +{ + if (!list_empty(&qgroup->iterator)) + return; + + list_add_tail(&qgroup->iterator, head); +} + +static void qgroup_iterator_clean(struct list_head *head) +{ + while (!list_empty(head)) { + struct btrfs_qgroup *qgroup; + + qgroup = list_first_entry(head, struct btrfs_qgroup, iterator); + list_del_init(&qgroup->iterator); + } +} + /* * The easy accounting, we're updating qgroup relationship whose child qgroup * only has exclusive extents. @@ -3125,8 +3144,7 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce, struct btrfs_fs_info *fs_info = root->fs_info; u64 ref_root = root->root_key.objectid; int ret = 0; - struct ulist_node *unode; - struct ulist_iterator uiter; + LIST_HEAD(qgroup_list); if (!is_fstree(ref_root)) return 0; @@ -3146,49 +3164,28 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce, if (!qgroup) goto out; - /* - * in a first step, we check all affected qgroups if any limits would - * be exceeded - */ - ulist_reinit(fs_info->qgroup_ulist); - ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid, - qgroup_to_aux(qgroup), GFP_ATOMIC); - if (ret < 0) - goto out; - ULIST_ITER_INIT(&uiter); - while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { - struct btrfs_qgroup *qg; + qgroup_iterator_add(&qgroup_list, qgroup); + list_for_each_entry(qgroup, &qgroup_list, iterator) { struct btrfs_qgroup_list *glist; - qg = unode_aux_to_qgroup(unode); - - if (enforce && !qgroup_check_limits(qg, num_bytes)) { + if (enforce && !qgroup_check_limits(qgroup, num_bytes)) { ret = -EDQUOT; goto out; } - list_for_each_entry(glist, &qg->groups, next_group) { - ret = ulist_add(fs_info->qgroup_ulist, - glist->group->qgroupid, - qgroup_to_aux(glist->group), GFP_ATOMIC); - if (ret < 0) - goto out; - } + list_for_each_entry(glist, &qgroup->groups, next_group) + qgroup_iterator_add(&qgroup_list, glist->group); } + ret = 0; /* * no limits exceeded, now record the reservation into all qgroups */ - ULIST_ITER_INIT(&uiter); - while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { - struct btrfs_qgroup *qg; - - qg = unode_aux_to_qgroup(unode); - - qgroup_rsv_add(fs_info, qg, num_bytes, type); - } + list_for_each_entry(qgroup, &qgroup_list, iterator) + qgroup_rsv_add(fs_info, qgroup, num_bytes, type); out: + qgroup_iterator_clean(&qgroup_list); spin_unlock(&fs_info->qgroup_lock); return ret; } diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h index 104c9bd3c337..1203f0632099 100644 --- a/fs/btrfs/qgroup.h +++ b/fs/btrfs/qgroup.h @@ -220,6 +220,15 @@ struct btrfs_qgroup { struct list_head groups; /* groups this group is member of */ struct list_head members; /* groups that are members of this group */ struct list_head dirty; /* dirty groups */ + + /* + * For qgroup iteration usage. + * + * The iteration list should always be empty until qgroup_iterator_add() + * is called. And should be reset to empty after the iteration is + * finished. + */ + struct list_head iterator; struct rb_node node; /* tree of qgroups */ /* From 1a80999ba3766b14f4b2f6128643816ea77e5c52 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Sat, 2 Sep 2023 08:13:54 +0800 Subject: [PATCH 1193/1397] btrfs: qgroup: use qgroup_iterator in qgroup_convert_meta() [ Upstream commit 0913445082496c2b29668ee26521401b273838b8 ] With the new qgroup_iterator_add() and qgroup_iterator_clean(), we can get rid of the ulist and its GFP_ATOMIC memory allocation. Reviewed-by: Boris Burkov Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Stable-dep-of: b321a52cce06 ("btrfs: free qgroup pertrans reserve on transaction abort") Signed-off-by: Sasha Levin --- fs/btrfs/qgroup.c | 32 ++++++++++---------------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 32e5defe4eff..0d2212fa0ce8 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -4106,9 +4106,7 @@ static void qgroup_convert_meta(struct btrfs_fs_info *fs_info, u64 ref_root, int num_bytes) { struct btrfs_qgroup *qgroup; - struct ulist_node *unode; - struct ulist_iterator uiter; - int ret = 0; + LIST_HEAD(qgroup_list); if (num_bytes == 0) return; @@ -4119,31 +4117,21 @@ static void qgroup_convert_meta(struct btrfs_fs_info *fs_info, u64 ref_root, qgroup = find_qgroup_rb(fs_info, ref_root); if (!qgroup) goto out; - ulist_reinit(fs_info->qgroup_ulist); - ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid, - qgroup_to_aux(qgroup), GFP_ATOMIC); - if (ret < 0) - goto out; - ULIST_ITER_INIT(&uiter); - while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { - struct btrfs_qgroup *qg; - struct btrfs_qgroup_list *glist; - qg = unode_aux_to_qgroup(unode); + qgroup_iterator_add(&qgroup_list, qgroup); + list_for_each_entry(qgroup, &qgroup_list, iterator) { + struct btrfs_qgroup_list *glist; - qgroup_rsv_release(fs_info, qg, num_bytes, + qgroup_rsv_release(fs_info, qgroup, num_bytes, BTRFS_QGROUP_RSV_META_PREALLOC); - qgroup_rsv_add(fs_info, qg, num_bytes, + qgroup_rsv_add(fs_info, qgroup, num_bytes, BTRFS_QGROUP_RSV_META_PERTRANS); - list_for_each_entry(glist, &qg->groups, next_group) { - ret = ulist_add(fs_info->qgroup_ulist, - glist->group->qgroupid, - qgroup_to_aux(glist->group), GFP_ATOMIC); - if (ret < 0) - goto out; - } + + list_for_each_entry(glist, &qgroup->groups, next_group) + qgroup_iterator_add(&qgroup_list, glist->group); } out: + qgroup_iterator_clean(&qgroup_list); spin_unlock(&fs_info->qgroup_lock); } From 624bc6f62c8595bef738fe4c6e44bbc79c556609 Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Fri, 1 Dec 2023 13:00:11 -0800 Subject: [PATCH 1194/1397] btrfs: free qgroup pertrans reserve on transaction abort [ Upstream commit b321a52cce062ec7ed385333a33905d22159ce36 ] If we abort a transaction, we never run the code that frees the pertrans qgroup reservation. This results in warnings on unmount as that reservation has been leaked. The leak isn't a huge issue since the fs is read-only, but it's better to clean it up when we know we can/should. Do it during the cleanup_transaction step of aborting. CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Qu Wenruo Signed-off-by: Boris Burkov Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/disk-io.c | 28 ++++++++++++++++++++++++++++ fs/btrfs/qgroup.c | 5 +++-- fs/btrfs/transaction.c | 2 -- fs/btrfs/transaction.h | 3 +++ 4 files changed, 34 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 71efb6883f30..b79781df7071 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4836,6 +4836,32 @@ void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *cur_trans, } } +static void btrfs_free_all_qgroup_pertrans(struct btrfs_fs_info *fs_info) +{ + struct btrfs_root *gang[8]; + int i; + int ret; + + spin_lock(&fs_info->fs_roots_radix_lock); + while (1) { + ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix, + (void **)gang, 0, + ARRAY_SIZE(gang), + BTRFS_ROOT_TRANS_TAG); + if (ret == 0) + break; + for (i = 0; i < ret; i++) { + struct btrfs_root *root = gang[i]; + + btrfs_qgroup_free_meta_all_pertrans(root); + radix_tree_tag_clear(&fs_info->fs_roots_radix, + (unsigned long)root->root_key.objectid, + BTRFS_ROOT_TRANS_TAG); + } + } + spin_unlock(&fs_info->fs_roots_radix_lock); +} + void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, struct btrfs_fs_info *fs_info) { @@ -4864,6 +4890,8 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, EXTENT_DIRTY); btrfs_destroy_pinned_extent(fs_info, &cur_trans->pinned_extents); + btrfs_free_all_qgroup_pertrans(fs_info); + cur_trans->state =TRANS_STATE_COMPLETED; wake_up(&cur_trans->commit_wait); } diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 0d2212fa0ce8..a006f5160e6b 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -4124,8 +4124,9 @@ static void qgroup_convert_meta(struct btrfs_fs_info *fs_info, u64 ref_root, qgroup_rsv_release(fs_info, qgroup, num_bytes, BTRFS_QGROUP_RSV_META_PREALLOC); - qgroup_rsv_add(fs_info, qgroup, num_bytes, - BTRFS_QGROUP_RSV_META_PERTRANS); + if (!sb_rdonly(fs_info->sb)) + qgroup_rsv_add(fs_info, qgroup, num_bytes, + BTRFS_QGROUP_RSV_META_PERTRANS); list_for_each_entry(glist, &qgroup->groups, next_group) qgroup_iterator_add(&qgroup_list, glist->group); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index c780d3729463..0ac2d191cd34 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -37,8 +37,6 @@ static struct kmem_cache *btrfs_trans_handle_cachep; -#define BTRFS_ROOT_TRANS_TAG 0 - /* * Transaction states and transitions * diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 93869cda6af9..238a0ab85df9 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -12,6 +12,9 @@ #include "ctree.h" #include "misc.h" +/* Radix-tree tag for roots that are part of the trasaction. */ +#define BTRFS_ROOT_TRANS_TAG 0 + enum btrfs_trans_state { TRANS_STATE_RUNNING, TRANS_STATE_COMMIT_PREP, From cd8b639700ab5423d07583ed9b8bd11c3f71a4b5 Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Tue, 5 Dec 2023 14:55:04 -0500 Subject: [PATCH 1195/1397] drm/amd/display: fix hw rotated modes when PSR-SU is enabled [ Upstream commit f528ee145bd0076cd0ed7e7b2d435893e6329e98 ] We currently don't support dirty rectangles on hardware rotated modes. So, if a user is using hardware rotated modes with PSR-SU enabled, use PSR-SU FFU for all rotated planes (including cursor planes). Cc: stable@vger.kernel.org Fixes: 30ebe41582d1 ("drm/amd/display: add FB_DAMAGE_CLIPS support") Reported-by: Kai-Heng Feng Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/2952 Tested-by: Kai-Heng Feng Tested-by: Bin Li Reviewed-by: Mario Limonciello Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +++ drivers/gpu/drm/amd/display/dc/dc_hw_types.h | 1 + .../drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 3 ++- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c | 12 ++++++++++-- 4 files changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index f5fdb61c821d..d63360127834 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5170,6 +5170,9 @@ static void fill_dc_dirty_rects(struct drm_plane *plane, if (plane->type == DRM_PLANE_TYPE_CURSOR) return; + if (new_plane_state->rotation != DRM_MODE_ROTATE_0) + goto ffu; + num_clips = drm_plane_get_damage_clips_count(new_plane_state); clips = drm_plane_get_damage_clips(new_plane_state); diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index 100d62162b71..99880b08cda0 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -465,6 +465,7 @@ struct dc_cursor_mi_param { struct fixed31_32 v_scale_ratio; enum dc_rotation_angle rotation; bool mirror; + struct dc_stream_state *stream; }; /* IPP related types */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 79befa17bb03..13ccb57379c7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -3407,7 +3407,8 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) .h_scale_ratio = pipe_ctx->plane_res.scl_data.ratios.horz, .v_scale_ratio = pipe_ctx->plane_res.scl_data.ratios.vert, .rotation = pipe_ctx->plane_state->rotation, - .mirror = pipe_ctx->plane_state->horizontal_mirror + .mirror = pipe_ctx->plane_state->horizontal_mirror, + .stream = pipe_ctx->stream, }; bool pipe_split_on = false; bool odm_combine_on = (pipe_ctx->next_odm_pipe != NULL) || diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c index 4566bc7abf17..aa252dc26326 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c @@ -1075,8 +1075,16 @@ void hubp2_cursor_set_position( if (src_y_offset < 0) src_y_offset = 0; /* Save necessary cursor info x, y position. w, h is saved in attribute func. */ - hubp->cur_rect.x = src_x_offset + param->viewport.x; - hubp->cur_rect.y = src_y_offset + param->viewport.y; + if (param->stream->link->psr_settings.psr_version >= DC_PSR_VERSION_SU_1 && + param->rotation != ROTATION_ANGLE_0) { + hubp->cur_rect.x = 0; + hubp->cur_rect.y = 0; + hubp->cur_rect.w = param->stream->timing.h_addressable; + hubp->cur_rect.h = param->stream->timing.v_addressable; + } else { + hubp->cur_rect.x = src_x_offset + param->viewport.x; + hubp->cur_rect.y = src_y_offset + param->viewport.y; + } } void hubp2_clk_cntl(struct hubp *hubp, bool enable) From 1f17934815bc68e3a6155f60db2c97d5c2e6f003 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 2 May 2023 17:39:01 +0300 Subject: [PATCH 1196/1397] drm/i915: Fix FEC state dump MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3dfeb80b308882cc6e1f5f6c36fd9a7f4cae5fc6 ] Stop dumping state while reading it out. We have a proper place for that stuff. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20230502143906.2401-7-ville.syrjala@linux.intel.com Reviewed-by: Luca Coelho Stable-dep-of: e6861d8264cd ("drm/i915/edp: don't write to DP_LINK_BW_SET when using rate select") Signed-off-by: Sasha Levin --- .../gpu/drm/i915/display/intel_crtc_state_dump.c | 2 ++ drivers/gpu/drm/i915/display/intel_ddi.c | 13 +++---------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c b/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c index 8d4640d0fd34..8b34fa55fa1b 100644 --- a/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c +++ b/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c @@ -258,6 +258,8 @@ void intel_crtc_state_dump(const struct intel_crtc_state *pipe_config, intel_dump_m_n_config(pipe_config, "dp m2_n2", pipe_config->lane_count, &pipe_config->dp_m2_n2); + drm_dbg_kms(&i915->drm, "fec: %s\n", + str_enabled_disabled(pipe_config->fec_enable)); } drm_dbg_kms(&i915->drm, "framestart delay: %d, MSA timing delay: %d\n", diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 84bbf854337a..85e2263e688d 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -3724,17 +3724,10 @@ static void intel_ddi_read_func_ctl(struct intel_encoder *encoder, intel_cpu_transcoder_get_m2_n2(crtc, cpu_transcoder, &pipe_config->dp_m2_n2); - if (DISPLAY_VER(dev_priv) >= 11) { - i915_reg_t dp_tp_ctl = dp_tp_ctl_reg(encoder, pipe_config); - + if (DISPLAY_VER(dev_priv) >= 11) pipe_config->fec_enable = - intel_de_read(dev_priv, dp_tp_ctl) & DP_TP_CTL_FEC_ENABLE; - - drm_dbg_kms(&dev_priv->drm, - "[ENCODER:%d:%s] Fec status: %u\n", - encoder->base.base.id, encoder->base.name, - pipe_config->fec_enable); - } + intel_de_read(dev_priv, + dp_tp_ctl_reg(encoder, pipe_config)) & DP_TP_CTL_FEC_ENABLE; if (dig_port->lspcon.active && intel_dp_has_hdmi_sink(&dig_port->dp)) pipe_config->infoframes.enable |= From e4c16db9daf15986e971d1b1e46953c4e171f6be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 3 May 2023 14:36:59 +0300 Subject: [PATCH 1197/1397] drm/i915: Introduce crtc_state->enhanced_framing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3072a24c778a7102d70692af5556e47363114c67 ] Track DP enhanced framing properly in the crtc state instead of relying just on the cached DPCD everywhere, and hook it up into the state check and dump. v2: Actually set enhanced_framing in .compute_config() Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20230503113659.16305-1-ville.syrjala@linux.intel.com Reviewed-by: Luca Coelho Stable-dep-of: e6861d8264cd ("drm/i915/edp: don't write to DP_LINK_BW_SET when using rate select") Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/display/g4x_dp.c | 10 ++++++++-- drivers/gpu/drm/i915/display/intel_crt.c | 2 ++ drivers/gpu/drm/i915/display/intel_crtc_state_dump.c | 5 +++-- drivers/gpu/drm/i915/display/intel_ddi.c | 11 +++++++++-- drivers/gpu/drm/i915/display/intel_display.c | 1 + drivers/gpu/drm/i915/display/intel_display_types.h | 2 ++ drivers/gpu/drm/i915/display/intel_dp.c | 3 +++ drivers/gpu/drm/i915/display/intel_dp_link_training.c | 2 +- 8 files changed, 29 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/display/g4x_dp.c b/drivers/gpu/drm/i915/display/g4x_dp.c index 4c7187f7913e..e8ee0a08947e 100644 --- a/drivers/gpu/drm/i915/display/g4x_dp.c +++ b/drivers/gpu/drm/i915/display/g4x_dp.c @@ -141,7 +141,7 @@ static void intel_dp_prepare(struct intel_encoder *encoder, intel_de_rmw(dev_priv, TRANS_DP_CTL(crtc->pipe), TRANS_DP_ENH_FRAMING, - drm_dp_enhanced_frame_cap(intel_dp->dpcd) ? + pipe_config->enhanced_framing ? TRANS_DP_ENH_FRAMING : 0); } else { if (IS_G4X(dev_priv) && pipe_config->limited_color_range) @@ -153,7 +153,7 @@ static void intel_dp_prepare(struct intel_encoder *encoder, intel_dp->DP |= DP_SYNC_VS_HIGH; intel_dp->DP |= DP_LINK_TRAIN_OFF; - if (drm_dp_enhanced_frame_cap(intel_dp->dpcd)) + if (pipe_config->enhanced_framing) intel_dp->DP |= DP_ENHANCED_FRAMING; if (IS_CHERRYVIEW(dev_priv)) @@ -351,6 +351,9 @@ static void intel_dp_get_config(struct intel_encoder *encoder, u32 trans_dp = intel_de_read(dev_priv, TRANS_DP_CTL(crtc->pipe)); + if (trans_dp & TRANS_DP_ENH_FRAMING) + pipe_config->enhanced_framing = true; + if (trans_dp & TRANS_DP_HSYNC_ACTIVE_HIGH) flags |= DRM_MODE_FLAG_PHSYNC; else @@ -361,6 +364,9 @@ static void intel_dp_get_config(struct intel_encoder *encoder, else flags |= DRM_MODE_FLAG_NVSYNC; } else { + if (tmp & DP_ENHANCED_FRAMING) + pipe_config->enhanced_framing = true; + if (tmp & DP_SYNC_HS_HIGH) flags |= DRM_MODE_FLAG_PHSYNC; else diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c index d23020eb87f4..4352f9017761 100644 --- a/drivers/gpu/drm/i915/display/intel_crt.c +++ b/drivers/gpu/drm/i915/display/intel_crt.c @@ -456,6 +456,8 @@ static int hsw_crt_compute_config(struct intel_encoder *encoder, /* FDI must always be 2.7 GHz */ pipe_config->port_clock = 135000 * 2; + pipe_config->enhanced_framing = true; + adjusted_mode->crtc_clock = lpt_iclkip(pipe_config); return 0; diff --git a/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c b/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c index 8b34fa55fa1b..66fe880af8f3 100644 --- a/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c +++ b/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c @@ -258,8 +258,9 @@ void intel_crtc_state_dump(const struct intel_crtc_state *pipe_config, intel_dump_m_n_config(pipe_config, "dp m2_n2", pipe_config->lane_count, &pipe_config->dp_m2_n2); - drm_dbg_kms(&i915->drm, "fec: %s\n", - str_enabled_disabled(pipe_config->fec_enable)); + drm_dbg_kms(&i915->drm, "fec: %s, enhanced framing: %s\n", + str_enabled_disabled(pipe_config->fec_enable), + str_enabled_disabled(pipe_config->enhanced_framing)); } drm_dbg_kms(&i915->drm, "framestart delay: %d, MSA timing delay: %d\n", diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 85e2263e688d..c7e00f57cb7a 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -3432,7 +3432,7 @@ static void mtl_ddi_prepare_link_retrain(struct intel_dp *intel_dp, dp_tp_ctl |= DP_TP_CTL_MODE_MST; } else { dp_tp_ctl |= DP_TP_CTL_MODE_SST; - if (drm_dp_enhanced_frame_cap(intel_dp->dpcd)) + if (crtc_state->enhanced_framing) dp_tp_ctl |= DP_TP_CTL_ENHANCED_FRAME_ENABLE; } intel_de_write(dev_priv, dp_tp_ctl_reg(encoder, crtc_state), dp_tp_ctl); @@ -3489,7 +3489,7 @@ static void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp, dp_tp_ctl |= DP_TP_CTL_MODE_MST; } else { dp_tp_ctl |= DP_TP_CTL_MODE_SST; - if (drm_dp_enhanced_frame_cap(intel_dp->dpcd)) + if (crtc_state->enhanced_framing) dp_tp_ctl |= DP_TP_CTL_ENHANCED_FRAME_ENABLE; } intel_de_write(dev_priv, dp_tp_ctl_reg(encoder, crtc_state), dp_tp_ctl); @@ -3724,6 +3724,10 @@ static void intel_ddi_read_func_ctl(struct intel_encoder *encoder, intel_cpu_transcoder_get_m2_n2(crtc, cpu_transcoder, &pipe_config->dp_m2_n2); + pipe_config->enhanced_framing = + intel_de_read(dev_priv, dp_tp_ctl_reg(encoder, pipe_config)) & + DP_TP_CTL_ENHANCED_FRAME_ENABLE; + if (DISPLAY_VER(dev_priv) >= 11) pipe_config->fec_enable = intel_de_read(dev_priv, @@ -3740,6 +3744,9 @@ static void intel_ddi_read_func_ctl(struct intel_encoder *encoder, if (!HAS_DP20(dev_priv)) { /* FDI */ pipe_config->output_types |= BIT(INTEL_OUTPUT_ANALOG); + pipe_config->enhanced_framing = + intel_de_read(dev_priv, dp_tp_ctl_reg(encoder, pipe_config)) & + DP_TP_CTL_ENHANCED_FRAME_ENABLE; break; } fallthrough; /* 128b/132b */ diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 1e2b09ae09b9..2d9d96ecbb25 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -5255,6 +5255,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, PIPE_CONF_CHECK_BOOL(hdmi_scrambling); PIPE_CONF_CHECK_BOOL(hdmi_high_tmds_clock_ratio); PIPE_CONF_CHECK_BOOL(has_infoframe); + PIPE_CONF_CHECK_BOOL(enhanced_framing); PIPE_CONF_CHECK_BOOL(fec_enable); PIPE_CONF_CHECK_BOOL_INCOMPLETE(has_audio); diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 731f2ec04d5c..7fc92b1474cc 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -1362,6 +1362,8 @@ struct intel_crtc_state { u16 linetime; u16 ips_linetime; + bool enhanced_framing; + /* Forward Error correction State */ bool fec_enable; diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 66e35f8443e1..b4fb7ce39d06 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -2312,6 +2312,9 @@ intel_dp_compute_config(struct intel_encoder *encoder, pipe_config->limited_color_range = intel_dp_limited_color_range(pipe_config, conn_state); + pipe_config->enhanced_framing = + drm_dp_enhanced_frame_cap(intel_dp->dpcd); + if (pipe_config->dsc.compression_enable) output_bpp = pipe_config->dsc.compressed_bpp; else diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index a263773f4d68..d09e43a38fa6 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -655,7 +655,7 @@ intel_dp_update_link_bw_set(struct intel_dp *intel_dp, /* Write the link configuration data */ link_config[0] = link_bw; link_config[1] = crtc_state->lane_count; - if (drm_dp_enhanced_frame_cap(intel_dp->dpcd)) + if (crtc_state->enhanced_framing) link_config[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN; drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_BW_SET, link_config, 2); From a2abe532ecd02cfeb4b218c755c656a9b181600f Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 5 Dec 2023 20:05:51 +0200 Subject: [PATCH 1198/1397] drm/i915/edp: don't write to DP_LINK_BW_SET when using rate select MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e6861d8264cd43c5eb20196e53df36fd71ec5698 ] The eDP 1.5 spec adds a clarification for eDP 1.4x: > For eDP v1.4x, if the Source device chooses the Main-Link rate by way > of DPCD 00100h, the Sink device shall ignore DPCD 00115h[2:0]. We write 0 to DP_LINK_BW_SET (DPCD 100h) even when using DP_LINK_RATE_SET (DPCD 114h). Stop doing that, as it can cause the panel to ignore the rate set method. Moreover, 0 is a reserved value for DP_LINK_BW_SET, and should not be used. v2: Improve the comments (Ville) Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/9081 Tested-by: Animesh Manna Reviewed-by: Uma Shankar Cc: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20231205180551.2476228-1-jani.nikula@intel.com (cherry picked from commit 23b392b94acb0499f69706c5808c099f590ebcf4) Cc: stable@vger.kernel.org Signed-off-by: Jani Nikula Signed-off-by: Sasha Levin --- .../drm/i915/display/intel_dp_link_training.c | 31 +++++++++++++------ 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index d09e43a38fa6..a62bca622b0a 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -650,19 +650,30 @@ intel_dp_update_link_bw_set(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state, u8 link_bw, u8 rate_select) { - u8 link_config[2]; + u8 lane_count = crtc_state->lane_count; - /* Write the link configuration data */ - link_config[0] = link_bw; - link_config[1] = crtc_state->lane_count; if (crtc_state->enhanced_framing) - link_config[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN; - drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_BW_SET, link_config, 2); + lane_count |= DP_LANE_COUNT_ENHANCED_FRAME_EN; + + if (link_bw) { + /* DP and eDP v1.3 and earlier link bw set method. */ + u8 link_config[] = { link_bw, lane_count }; - /* eDP 1.4 rate select method. */ - if (!link_bw) - drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_RATE_SET, - &rate_select, 1); + drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_BW_SET, link_config, + ARRAY_SIZE(link_config)); + } else { + /* + * eDP v1.4 and later link rate set method. + * + * eDP v1.4x sinks shall ignore DP_LINK_RATE_SET if + * DP_LINK_BW_SET is set. Avoid writing DP_LINK_BW_SET. + * + * eDP v1.5 sinks allow choosing either, and the last choice + * shall be active. + */ + drm_dp_dpcd_writeb(&intel_dp->aux, DP_LANE_COUNT_SET, lane_count); + drm_dp_dpcd_writeb(&intel_dp->aux, DP_LINK_RATE_SET, rate_select); + } } /* From 031ddd28008971cce0b5626379b910d0a05fb4dd Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 21 Jun 2023 10:48:24 +0100 Subject: [PATCH 1199/1397] drm: Update file owner during use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 1c7a387ffef894b1ab3942f0482dac7a6e0a909c ] With the typical model where the display server opens the file descriptor and then hands it over to the client(*), we were showing stale data in debugfs. Fix it by updating the drm_file->pid on ioctl access from a different process. The field is also made RCU protected to allow for lockless readers. Update side is protected with dev->filelist_mutex. Before: $ cat /sys/kernel/debug/dri/0/clients command pid dev master a uid magic Xorg 2344 0 y y 0 0 Xorg 2344 0 n y 0 2 Xorg 2344 0 n y 0 3 Xorg 2344 0 n y 0 4 After: $ cat /sys/kernel/debug/dri/0/clients command tgid dev master a uid magic Xorg 830 0 y y 0 0 xfce4-session 880 0 n y 0 1 xfwm4 943 0 n y 0 2 neverball 1095 0 n y 0 3 *) More detailed and historically accurate description of various handover implementation kindly provided by Emil Velikov: """ The traditional model, the server was the orchestrator managing the primary device node. From the fd, to the master status and authentication. But looking at the fd alone, this has varied across the years. IIRC in the DRI1 days, Xorg (libdrm really) would have a list of open fd(s) and reuse those whenever needed, DRI2 the client was responsible for open() themselves and with DRI3 the fd was passed to the client. Around the inception of DRI3 and systemd-logind, the latter became another possible orchestrator. Whereby Xorg and Wayland compositors could ask it for the fd. For various reasons (hysterical and genuine ones) Xorg has a fallback path going the open(), whereas Wayland compositors are moving to solely relying on logind... some never had fallback even. Over the past few years, more projects have emerged which provide functionality similar (be that on API level, Dbus, or otherwise) to systemd-logind. """ v2: * Fixed typo in commit text and added a fine historical explanation from Emil. Signed-off-by: Tvrtko Ursulin Cc: "Christian König" Cc: Daniel Vetter Acked-by: Christian König Reviewed-by: Emil Velikov Reviewed-by: Rob Clark Tested-by: Rob Clark Link: https://patchwork.freedesktop.org/patch/msgid/20230621094824.2348732-1-tvrtko.ursulin@linux.intel.com Signed-off-by: Christian König Stable-dep-of: 5a6c9a05e55c ("drm: Fix FD ownership check in drm_master_check_perm()") Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 6 ++-- drivers/gpu/drm/drm_auth.c | 3 +- drivers/gpu/drm/drm_debugfs.c | 10 ++++--- drivers/gpu/drm/drm_file.c | 40 +++++++++++++++++++++++-- drivers/gpu/drm/drm_ioctl.c | 3 ++ drivers/gpu/drm/nouveau/nouveau_drm.c | 5 +++- drivers/gpu/drm/vmwgfx/vmwgfx_gem.c | 6 ++-- include/drm/drm_file.h | 13 ++++++-- 8 files changed, 71 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index ca4d2d430e28..a1b15d0d6c48 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -962,6 +962,7 @@ static int amdgpu_debugfs_gem_info_show(struct seq_file *m, void *unused) list_for_each_entry(file, &dev->filelist, lhead) { struct task_struct *task; struct drm_gem_object *gobj; + struct pid *pid; int id; /* @@ -971,8 +972,9 @@ static int amdgpu_debugfs_gem_info_show(struct seq_file *m, void *unused) * Therefore, we need to protect this ->comm access using RCU. */ rcu_read_lock(); - task = pid_task(file->pid, PIDTYPE_TGID); - seq_printf(m, "pid %8d command %s:\n", pid_nr(file->pid), + pid = rcu_dereference(file->pid); + task = pid_task(pid, PIDTYPE_TGID); + seq_printf(m, "pid %8d command %s:\n", pid_nr(pid), task ? task->comm : ""); rcu_read_unlock(); diff --git a/drivers/gpu/drm/drm_auth.c b/drivers/gpu/drm/drm_auth.c index cf92a9ae8034..2ed2585ded37 100644 --- a/drivers/gpu/drm/drm_auth.c +++ b/drivers/gpu/drm/drm_auth.c @@ -235,7 +235,8 @@ static int drm_new_set_master(struct drm_device *dev, struct drm_file *fpriv) static int drm_master_check_perm(struct drm_device *dev, struct drm_file *file_priv) { - if (file_priv->pid == task_pid(current) && file_priv->was_master) + if (file_priv->was_master && + rcu_access_pointer(file_priv->pid) == task_pid(current)) return 0; if (!capable(CAP_SYS_ADMIN)) diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c index 2de43ff3ce0a..41b0682c638e 100644 --- a/drivers/gpu/drm/drm_debugfs.c +++ b/drivers/gpu/drm/drm_debugfs.c @@ -92,15 +92,17 @@ static int drm_clients_info(struct seq_file *m, void *data) */ mutex_lock(&dev->filelist_mutex); list_for_each_entry_reverse(priv, &dev->filelist, lhead) { - struct task_struct *task; bool is_current_master = drm_is_current_master(priv); + struct task_struct *task; + struct pid *pid; - rcu_read_lock(); /* locks pid_task()->comm */ - task = pid_task(priv->pid, PIDTYPE_TGID); + rcu_read_lock(); /* Locks priv->pid and pid_task()->comm! */ + pid = rcu_dereference(priv->pid); + task = pid_task(pid, PIDTYPE_TGID); uid = task ? __task_cred(task)->euid : GLOBAL_ROOT_UID; seq_printf(m, "%20s %5d %3d %c %c %5d %10u\n", task ? task->comm : "", - pid_vnr(priv->pid), + pid_vnr(pid), priv->minor->index, is_current_master ? 'y' : 'n', priv->authenticated ? 'y' : 'n', diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c index 883d83bc0e3d..e692770ef6d3 100644 --- a/drivers/gpu/drm/drm_file.c +++ b/drivers/gpu/drm/drm_file.c @@ -160,7 +160,7 @@ struct drm_file *drm_file_alloc(struct drm_minor *minor) /* Get a unique identifier for fdinfo: */ file->client_id = atomic64_inc_return(&ident); - file->pid = get_pid(task_tgid(current)); + rcu_assign_pointer(file->pid, get_pid(task_tgid(current))); file->minor = minor; /* for compatibility root is always authenticated */ @@ -200,7 +200,7 @@ struct drm_file *drm_file_alloc(struct drm_minor *minor) drm_syncobj_release(file); if (drm_core_check_feature(dev, DRIVER_GEM)) drm_gem_release(dev, file); - put_pid(file->pid); + put_pid(rcu_access_pointer(file->pid)); kfree(file); return ERR_PTR(ret); @@ -291,7 +291,7 @@ void drm_file_free(struct drm_file *file) WARN_ON(!list_empty(&file->event_list)); - put_pid(file->pid); + put_pid(rcu_access_pointer(file->pid)); kfree(file); } @@ -505,6 +505,40 @@ int drm_release(struct inode *inode, struct file *filp) } EXPORT_SYMBOL(drm_release); +void drm_file_update_pid(struct drm_file *filp) +{ + struct drm_device *dev; + struct pid *pid, *old; + + /* + * Master nodes need to keep the original ownership in order for + * drm_master_check_perm to keep working correctly. (See comment in + * drm_auth.c.) + */ + if (filp->was_master) + return; + + pid = task_tgid(current); + + /* + * Quick unlocked check since the model is a single handover followed by + * exclusive repeated use. + */ + if (pid == rcu_access_pointer(filp->pid)) + return; + + dev = filp->minor->dev; + mutex_lock(&dev->filelist_mutex); + old = rcu_replace_pointer(filp->pid, pid, 1); + mutex_unlock(&dev->filelist_mutex); + + if (pid != old) { + get_pid(pid); + synchronize_rcu(); + put_pid(old); + } +} + /** * drm_release_noglobal - release method for DRM file * @inode: device inode diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index f03ffbacfe9b..77590b0f38fa 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -776,6 +776,9 @@ long drm_ioctl_kernel(struct file *file, drm_ioctl_t *func, void *kdata, struct drm_device *dev = file_priv->minor->dev; int retcode; + /* Update drm_file owner if fd was passed along. */ + drm_file_update_pid(file_priv); + if (drm_dev_is_unplugged(dev)) return -ENODEV; diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 4396f501b16a..50589f982d1a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -1133,7 +1133,10 @@ nouveau_drm_open(struct drm_device *dev, struct drm_file *fpriv) } get_task_comm(tmpname, current); - snprintf(name, sizeof(name), "%s[%d]", tmpname, pid_nr(fpriv->pid)); + rcu_read_lock(); + snprintf(name, sizeof(name), "%s[%d]", + tmpname, pid_nr(rcu_dereference(fpriv->pid))); + rcu_read_unlock(); if (!(cli = kzalloc(sizeof(*cli), GFP_KERNEL))) { ret = -ENOMEM; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c index 8b1eb0061610..12787bb9c111 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c @@ -244,6 +244,7 @@ static int vmw_debugfs_gem_info_show(struct seq_file *m, void *unused) list_for_each_entry(file, &dev->filelist, lhead) { struct task_struct *task; struct drm_gem_object *gobj; + struct pid *pid; int id; /* @@ -253,8 +254,9 @@ static int vmw_debugfs_gem_info_show(struct seq_file *m, void *unused) * Therefore, we need to protect this ->comm access using RCU. */ rcu_read_lock(); - task = pid_task(file->pid, PIDTYPE_TGID); - seq_printf(m, "pid %8d command %s:\n", pid_nr(file->pid), + pid = rcu_dereference(file->pid); + task = pid_task(pid, PIDTYPE_TGID); + seq_printf(m, "pid %8d command %s:\n", pid_nr(pid), task ? task->comm : ""); rcu_read_unlock(); diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h index 010239392adf..9c47a43f42a6 100644 --- a/include/drm/drm_file.h +++ b/include/drm/drm_file.h @@ -256,8 +256,15 @@ struct drm_file { /** @master_lookup_lock: Serializes @master. */ spinlock_t master_lookup_lock; - /** @pid: Process that opened this file. */ - struct pid *pid; + /** + * @pid: Process that is using this file. + * + * Must only be dereferenced under a rcu_read_lock or equivalent. + * + * Updates are guarded with dev->filelist_mutex and reference must be + * dropped after a RCU grace period to accommodate lockless readers. + */ + struct pid __rcu *pid; /** @client_id: A unique id for fdinfo */ u64 client_id; @@ -420,6 +427,8 @@ static inline bool drm_is_accel_client(const struct drm_file *file_priv) return file_priv->minor->type == DRM_MINOR_ACCEL; } +void drm_file_update_pid(struct drm_file *); + int drm_open(struct inode *inode, struct file *filp); int drm_open_helper(struct file *filp, struct drm_minor *minor); ssize_t drm_read(struct file *filp, char __user *buffer, From c9b26d9e438e449a8ccf68ba3df9de741a7e689d Mon Sep 17 00:00:00 2001 From: Lingkai Dong Date: Wed, 6 Dec 2023 13:51:58 +0000 Subject: [PATCH 1200/1397] drm: Fix FD ownership check in drm_master_check_perm() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5a6c9a05e55cb2972396cc991af9d74c8c15029a ] The DRM subsystem keeps a record of the owner of a DRM device file descriptor using thread group ID (TGID) instead of process ID (PID), to ensures all threads within the same userspace process are considered the owner. However, the DRM master ownership check compares the current thread's PID against the record, so the thread is incorrectly considered to be not the FD owner if the PID is not equal to the TGID. This causes DRM ioctls to be denied master privileges, even if the same thread that opened the FD performs an ioctl. Fix this by checking TGID. Fixes: 4230cea89cafb ("drm: Track clients by tgid and not tid") Signed-off-by: Lingkai Dong Reviewed-by: Christian König Reviewed-by: Tvrtko Ursulin Cc: # v6.4+ Link: https://patchwork.freedesktop.org/patch/msgid/PA6PR08MB107665920BE9A96658CDA04CE8884A@PA6PR08MB10766.eurprd08.prod.outlook.com Signed-off-by: Christian König Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_auth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_auth.c b/drivers/gpu/drm/drm_auth.c index 2ed2585ded37..6899b3dc1f12 100644 --- a/drivers/gpu/drm/drm_auth.c +++ b/drivers/gpu/drm/drm_auth.c @@ -236,7 +236,7 @@ static int drm_master_check_perm(struct drm_device *dev, struct drm_file *file_priv) { if (file_priv->was_master && - rcu_access_pointer(file_priv->pid) == task_pid(current)) + rcu_access_pointer(file_priv->pid) == task_tgid(current)) return 0; if (!capable(CAP_SYS_ADMIN)) From a8b655ac35be588df0bec9c56b565bc72a188682 Mon Sep 17 00:00:00 2001 From: Benjamin Bigler Date: Sat, 9 Dec 2023 23:23:26 +0100 Subject: [PATCH 1201/1397] spi: spi-imx: correctly configure burst length when using dma [ Upstream commit e9b220aeacf109684cce36a94fc24ed37be92b05 ] If DMA is used, burst length should be set to the bus width of the DMA. Otherwise, the SPI hardware will transmit/receive one word per DMA request. Since this issue affects both transmission and reception, it cannot be detected with a loopback test. Replace magic numbers 512 and 0xfff with MX51_ECSPI_CTRL_MAX_BURST. Reported-by Stefan Bigler Signed-off-by: Benjamin Bigler Fixes: 15a6af94a277 ("spi: Increase imx51 ecspi burst length based on transfer length") Link: https://lore.kernel.org/r/8a415902c751cdbb4b20ce76569216ed@mail.infomaniak.com Link: https://lore.kernel.org/r/20231209222338.5564-1-benjamin@bigler.one Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-imx.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index 498e35c8db2c..272bc871a848 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -659,11 +659,18 @@ static int mx51_ecspi_prepare_transfer(struct spi_imx_data *spi_imx, ctrl |= (spi_imx->target_burst * 8 - 1) << MX51_ECSPI_CTRL_BL_OFFSET; else { - if (spi_imx->count >= 512) - ctrl |= 0xFFF << MX51_ECSPI_CTRL_BL_OFFSET; - else - ctrl |= (spi_imx->count * spi_imx->bits_per_word - 1) + if (spi_imx->usedma) { + ctrl |= (spi_imx->bits_per_word * + spi_imx_bytes_per_word(spi_imx->bits_per_word) - 1) << MX51_ECSPI_CTRL_BL_OFFSET; + } else { + if (spi_imx->count >= MX51_ECSPI_CTRL_MAX_BURST) + ctrl |= (MX51_ECSPI_CTRL_MAX_BURST - 1) + << MX51_ECSPI_CTRL_BL_OFFSET; + else + ctrl |= (spi_imx->count * spi_imx->bits_per_word - 1) + << MX51_ECSPI_CTRL_BL_OFFSET; + } } /* set clock speed */ From b352ebe373dd2d51ba615c17d01f6f1fe282c643 Mon Sep 17 00:00:00 2001 From: Chukun Pan Date: Sun, 29 Oct 2023 15:40:09 +0800 Subject: [PATCH 1202/1397] arm64: dts: allwinner: h616: update emac for Orange Pi Zero 3 [ Upstream commit b9622937d95809ef89904583191571a9fa326402 ] The current emac setting is not suitable for Orange Pi Zero 3, move it back to Orange Pi Zero 2 DT. Also update phy mode and delay values for emac on Orange Pi Zero 3. With these changes, Ethernet now looks stable. Fixes: 322bf103204b ("arm64: dts: allwinner: h616: Split Orange Pi Zero 2 DT") Signed-off-by: Chukun Pan Reviewed-by: Jernej Skrabec Link: https://lore.kernel.org/r/20231029074009.7820-2-amadeus@jmu.edu.cn Signed-off-by: Jernej Skrabec Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero.dtsi | 3 --- arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero2.dts | 3 +++ arch/arm64/boot/dts/allwinner/sun50i-h618-orangepi-zero3.dts | 2 ++ 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero.dtsi index 15290e6892fc..fc7315b94406 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero.dtsi @@ -68,10 +68,7 @@ &emac0 { pinctrl-names = "default"; pinctrl-0 = <&ext_rgmii_pins>; - phy-mode = "rgmii"; phy-handle = <&ext_rgmii_phy>; - allwinner,rx-delay-ps = <3100>; - allwinner,tx-delay-ps = <700>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero2.dts b/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero2.dts index d83852e72f06..b5d713926a34 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero2.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero2.dts @@ -13,6 +13,9 @@ }; &emac0 { + allwinner,rx-delay-ps = <3100>; + allwinner,tx-delay-ps = <700>; + phy-mode = "rgmii"; phy-supply = <®_dcdce>; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h618-orangepi-zero3.dts b/arch/arm64/boot/dts/allwinner/sun50i-h618-orangepi-zero3.dts index 00fe28caac93..b3b1b8692125 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h618-orangepi-zero3.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h618-orangepi-zero3.dts @@ -13,6 +13,8 @@ }; &emac0 { + allwinner,tx-delay-ps = <700>; + phy-mode = "rgmii-rxid"; phy-supply = <®_dldo1>; }; From bc9ca01ef814faf91deca5e658932f7d6f9589c8 Mon Sep 17 00:00:00 2001 From: Andrew Davis Date: Mon, 13 Nov 2023 12:16:04 -0600 Subject: [PATCH 1203/1397] ARM: dts: dra7: Fix DRA7 L3 NoC node register size [ Upstream commit 1e5caee2ba8f1426e8098afb4ca38dc40a0ca71b ] This node can access any part of the L3 configuration registers space, including CLK1 and CLK2 which are 0x800000 offset. Restore this area size to include these areas. Fixes: 7f2659ce657e ("ARM: dts: Move dra7 l3 noc to a separate node") Signed-off-by: Andrew Davis Message-ID: <20231113181604.546444-1-afd@ti.com> Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/boot/dts/ti/omap/dra7.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/ti/omap/dra7.dtsi b/arch/arm/boot/dts/ti/omap/dra7.dtsi index 3f3e52e3b375..6509c742fb58 100644 --- a/arch/arm/boot/dts/ti/omap/dra7.dtsi +++ b/arch/arm/boot/dts/ti/omap/dra7.dtsi @@ -147,7 +147,7 @@ l3-noc@44000000 { compatible = "ti,dra7-l3-noc"; - reg = <0x44000000 0x1000>, + reg = <0x44000000 0x1000000>, <0x45000000 0x1000>; interrupts-extended = <&crossbar_mpu GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>, <&wakeupgen GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>; From 1e4f9b7abf54801e70350149d561c03d72966698 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Thu, 23 Nov 2023 22:52:37 +0800 Subject: [PATCH 1204/1397] ARM: OMAP2+: Fix null pointer dereference and memory leak in omap_soc_device_init [ Upstream commit c72b9c33ef9695ad7ce7a6eb39a9df8a01b70796 ] kasprintf() returns a pointer to dynamically allocated memory which can be NULL upon failure. When 'soc_dev_attr->family' is NULL,it'll trigger the null pointer dereference issue, such as in 'soc_info_show'. And when 'soc_device_register' fails, it's necessary to release 'soc_dev_attr->family' to avoid memory leaks. Fixes: 6770b2114325 ("ARM: OMAP2+: Export SoC information to userspace") Signed-off-by: Kunwu Chan Message-ID: <20231123145237.609442-1-chentao@kylinos.cn> Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/mach-omap2/id.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm/mach-omap2/id.c b/arch/arm/mach-omap2/id.c index 98999aa8cc0c..7f387706368a 100644 --- a/arch/arm/mach-omap2/id.c +++ b/arch/arm/mach-omap2/id.c @@ -793,11 +793,16 @@ void __init omap_soc_device_init(void) soc_dev_attr->machine = soc_name; soc_dev_attr->family = omap_get_family(); + if (!soc_dev_attr->family) { + kfree(soc_dev_attr); + return; + } soc_dev_attr->revision = soc_rev; soc_dev_attr->custom_attr_group = omap_soc_groups[0]; soc_dev = soc_device_register(soc_dev_attr); if (IS_ERR(soc_dev)) { + kfree(soc_dev_attr->family); kfree(soc_dev_attr); return; } From 11dfea080f65a712f9f6e453badf27aefa2f2ba4 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 29 Nov 2023 17:55:33 +0100 Subject: [PATCH 1205/1397] reset: Fix crash when freeing non-existent optional resets [ Upstream commit 4a6756f56bcf8e64c87144a626ce53aea4899c0e ] When obtaining one or more optional resets, non-existent resets are stored as NULL pointers, and all related error and cleanup paths need to take this into account. Currently only reset_control_put() and reset_control_bulk_put() get this right. All of __reset_control_bulk_get(), of_reset_control_array_get(), and reset_control_array_put() lack the proper checking, causing NULL pointer dereferences on failure or release. Fix this by moving the existing check from reset_control_bulk_put() to __reset_control_put_internal(), so it applies to all callers. The double check in reset_control_put() doesn't hurt. Fixes: 17c82e206d2a3cd8 ("reset: Add APIs to manage array of resets") Fixes: 48d71395896d54ee ("reset: Add reset_control_bulk API") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/2440edae7ca8534628cdbaf559ded288f2998178.1701276806.git.geert+renesas@glider.be Signed-off-by: Philipp Zabel Signed-off-by: Sasha Levin --- drivers/reset/core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/reset/core.c b/drivers/reset/core.c index f0a076e94118..92cc13ef3e56 100644 --- a/drivers/reset/core.c +++ b/drivers/reset/core.c @@ -807,6 +807,9 @@ static void __reset_control_put_internal(struct reset_control *rstc) { lockdep_assert_held(&reset_list_mutex); + if (IS_ERR_OR_NULL(rstc)) + return; + kref_put(&rstc->refcnt, __reset_control_release); } @@ -1017,11 +1020,8 @@ EXPORT_SYMBOL_GPL(reset_control_put); void reset_control_bulk_put(int num_rstcs, struct reset_control_bulk_data *rstcs) { mutex_lock(&reset_list_mutex); - while (num_rstcs--) { - if (IS_ERR_OR_NULL(rstcs[num_rstcs].rstc)) - continue; + while (num_rstcs--) __reset_control_put_internal(rstcs[num_rstcs].rstc); - } mutex_unlock(&reset_list_mutex); } EXPORT_SYMBOL_GPL(reset_control_bulk_put); From 602490b469e344a5f6f504caf1ce652ee5223df4 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 8 Dec 2023 15:03:15 +0100 Subject: [PATCH 1206/1397] s390/vx: fix save/restore of fpu kernel context [ Upstream commit e6b2dab41888332bf83f592131e7ea07756770a4 ] The KERNEL_FPR mask only contains a flag for the first eight vector registers. However floating point registers overlay parts of the first sixteen vector registers. This could lead to vector register corruption if a kernel fpu context uses any of the vector registers 8 to 15 and is interrupted or calls a KERNEL_FPR context. If that context uses also vector registers 8 to 15, their contents will be corrupted on return. Luckily this is currently not a real bug, since the kernel has only one KERNEL_FPR user with s390_adjust_jiffies() and it is only using floating point registers 0 to 2. Fix this by using the correct bits for KERNEL_FPR. Fixes: 7f79695cc1b6 ("s390/fpu: improve kernel_fpu_[begin|end]") Signed-off-by: Heiko Carstens Reviewed-by: Hendrik Brueckner Signed-off-by: Alexander Gordeev Signed-off-by: Sasha Levin --- arch/s390/include/asm/fpu/api.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h index b714ed0ef688..9acf48e53a87 100644 --- a/arch/s390/include/asm/fpu/api.h +++ b/arch/s390/include/asm/fpu/api.h @@ -79,7 +79,7 @@ static inline int test_fp_ctl(u32 fpc) #define KERNEL_VXR_HIGH (KERNEL_VXR_V16V23|KERNEL_VXR_V24V31) #define KERNEL_VXR (KERNEL_VXR_LOW|KERNEL_VXR_HIGH) -#define KERNEL_FPR (KERNEL_FPC|KERNEL_VXR_V0V7) +#define KERNEL_FPR (KERNEL_FPC|KERNEL_VXR_LOW) struct kernel_fpu; From dd691e300d52502ca73b61e2ee18ad226e519a52 Mon Sep 17 00:00:00 2001 From: Rajvi Jingar Date: Fri, 15 Dec 2023 17:16:50 -0800 Subject: [PATCH 1207/1397] platform/x86/intel/pmc: Fix hang in pmc_core_send_ltr_ignore() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit fbcf67ce5a9e2831c14bdfb895be05213e611724 ] For input value 0, PMC stays unassigned which causes crash while trying to access PMC for register read/write. Include LTR index 0 in pmc_index and ltr_index calculation. Fixes: 2bcef4529222 ("platform/x86:intel/pmc: Enable debugfs multiple PMC support") Signed-off-by: Rajvi Jingar Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20231216011650.1973941-1-rajvi.jingar@linux.intel.com Signed-off-by: Ilpo Järvinen Signed-off-by: Sasha Levin --- drivers/platform/x86/intel/pmc/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c index 84c175b9721a..e95d3011b999 100644 --- a/drivers/platform/x86/intel/pmc/core.c +++ b/drivers/platform/x86/intel/pmc/core.c @@ -472,7 +472,7 @@ int pmc_core_send_ltr_ignore(struct pmc_dev *pmcdev, u32 value) * is based on the contiguous indexes from ltr_show output. * pmc index and ltr index needs to be calculated from it. */ - for (pmc_index = 0; pmc_index < ARRAY_SIZE(pmcdev->pmcs) && ltr_index > 0; pmc_index++) { + for (pmc_index = 0; pmc_index < ARRAY_SIZE(pmcdev->pmcs) && ltr_index >= 0; pmc_index++) { pmc = pmcdev->pmcs[pmc_index]; if (!pmc) From f685ef2c9ae8866dfafc1e32f378618e47e68019 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 18 Dec 2023 17:05:40 -0500 Subject: [PATCH 1208/1397] SUNRPC: Revert 5f7fc5d69f6e92ec0b38774c387f5cf7812c5806 [ Upstream commit bd018b98ba84ca0c80abac1ef23ce726a809e58c ] Guillaume says: > I believe commit 5f7fc5d69f6e ("SUNRPC: Resupply rq_pages from > node-local memory") in Linux 6.5+ is incorrect. It passes > unconditionally rq_pool->sp_id as the NUMA node. > > While the comment in the svc_pool declaration in sunrpc/svc.h says > that sp_id is also the NUMA node id, it might not be the case if > the svc is created using svc_create_pooled(). svc_created_pooled() > can use the per-cpu pool mode therefore in this case sp_id would > be the cpu id. Fix this by reverting now. At a later point this minor optimization, and the deceptive labeling of the sp_id field, can be revisited. Reported-by: Guillaume Morin Closes: https://lore.kernel.org/linux-nfs/ZYC9rsno8qYggVt9@bender.morinfr.org/T/#u Fixes: 5f7fc5d69f6e ("SUNRPC: Resupply rq_pages from node-local memory") Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- net/sunrpc/svc_xprt.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 4cfe9640df48..5cfe5c7408b7 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -666,9 +666,8 @@ static bool svc_alloc_arg(struct svc_rqst *rqstp) } for (filled = 0; filled < pages; filled = ret) { - ret = alloc_pages_bulk_array_node(GFP_KERNEL, - rqstp->rq_pool->sp_id, - pages, rqstp->rq_pages); + ret = alloc_pages_bulk_array(GFP_KERNEL, pages, + rqstp->rq_pages); if (ret > filled) /* Made progress, don't sleep yet */ continue; From 968ed600024cc25c31135b004b17387ef6671f9e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 6 Dec 2023 22:37:57 +0100 Subject: [PATCH 1209/1397] wifi: ieee80211: don't require protected vendor action frames [ Upstream commit 98fb9b9680c9f3895ced02d6a73e27f5d7b5892b ] For vendor action frames, whether a protected one should be used or not is clearly up to the individual vendor and frame, so even though a protected dual is defined, it may not get used. Thus, don't require protection for vendor action frames when they're used in a connection. Since we obviously don't process frames unknown to the kernel in the kernel, it may makes sense to invert this list to have all the ones the kernel processes and knows to be requiring protection, but that'd be a different change. Fixes: 91535613b609 ("wifi: mac80211: don't drop all unprotected public action frames") Reported-by: Jouni Malinen Link: https://msgid.link/20231206223801.f6a2cf4e67ec.Ifa6acc774bd67801d3dafb405278f297683187aa@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- include/linux/ieee80211.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index b24fb80782c5..2b0a73cb7cbb 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -4381,7 +4381,8 @@ ieee80211_is_protected_dual_of_public_action(struct sk_buff *skb) action != WLAN_PUB_ACTION_LOC_TRACK_NOTI && action != WLAN_PUB_ACTION_FTM_REQUEST && action != WLAN_PUB_ACTION_FTM_RESPONSE && - action != WLAN_PUB_ACTION_FILS_DISCOVERY; + action != WLAN_PUB_ACTION_FILS_DISCOVERY && + action != WLAN_PUB_ACTION_VENDOR_SPECIFIC; } /** From 1caf92e77c0007b230daaf89d80ba98f811f0b3d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 8 Dec 2023 18:32:02 +0200 Subject: [PATCH 1210/1397] wifi: iwlwifi: pcie: add another missing bh-disable for rxq->lock [ Upstream commit a4754182dc936b97ec7e9f6b08cdf7ed97ef9069 ] Evidently I had only looked at all the ones in rx.c, and missed this. Add bh-disable to this use of the rxq->lock as well. Fixes: 25edc8f259c7 ("iwlwifi: pcie: properly implement NAPI") Reported-by: Brian Norris Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://msgid.link/20231208183100.e79ad3dae649.I8f19713c4383707f8be7fc20ff5cc1ecf12429bb@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 583d1011963e..2e23ccd7d793 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -3088,7 +3088,7 @@ static u32 iwl_trans_pcie_dump_rbs(struct iwl_trans *trans, struct iwl_rxq *rxq = &trans_pcie->rxq[0]; u32 i, r, j, rb_len = 0; - spin_lock(&rxq->lock); + spin_lock_bh(&rxq->lock); r = le16_to_cpu(iwl_get_closed_rb_stts(trans, rxq)) & 0x0FFF; @@ -3112,7 +3112,7 @@ static u32 iwl_trans_pcie_dump_rbs(struct iwl_trans *trans, *data = iwl_fw_error_next_data(*data); } - spin_unlock(&rxq->lock); + spin_unlock_bh(&rxq->lock); return rb_len; } From b31a33ad4f55b95ca8c1dbcc25787bf0fbc7def4 Mon Sep 17 00:00:00 2001 From: Edward Adam Davis Date: Wed, 29 Nov 2023 20:17:47 +0800 Subject: [PATCH 1211/1397] wifi: mac80211: check if the existing link config remains unchanged [ Upstream commit c1393c132b906fbdf91f6d1c9eb2ef7a00cce64e ] [Syz report] WARNING: CPU: 1 PID: 5067 at net/mac80211/rate.c:48 rate_control_rate_init+0x540/0x690 net/mac80211/rate.c:48 Modules linked in: CPU: 1 PID: 5067 Comm: syz-executor413 Not tainted 6.7.0-rc3-syzkaller-00014-gdf60cee26a2e #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/10/2023 RIP: 0010:rate_control_rate_init+0x540/0x690 net/mac80211/rate.c:48 Code: 48 c7 c2 00 46 0c 8c be 08 03 00 00 48 c7 c7 c0 45 0c 8c c6 05 70 79 0b 05 01 e8 1b a0 6f f7 e9 e0 fd ff ff e8 61 b3 8f f7 90 <0f> 0b 90 e9 36 ff ff ff e8 53 b3 8f f7 e8 5e 0b 78 f7 31 ff 89 c3 RSP: 0018:ffffc90003c57248 EFLAGS: 00010293 RAX: 0000000000000000 RBX: ffff888016bc4000 RCX: ffffffff89f7d519 RDX: ffff888076d43b80 RSI: ffffffff89f7d6df RDI: 0000000000000005 RBP: ffff88801daaae20 R08: 0000000000000005 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000002 R12: 0000000000000001 R13: 0000000000000000 R14: ffff888020030e20 R15: ffff888078f08000 FS: 0000555556b94380(0000) GS:ffff8880b9900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000005fdeb8 CR3: 0000000076d22000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: sta_apply_auth_flags.constprop.0+0x4b7/0x510 net/mac80211/cfg.c:1674 sta_apply_parameters+0xaf1/0x16c0 net/mac80211/cfg.c:2002 ieee80211_add_station+0x3fa/0x6c0 net/mac80211/cfg.c:2068 rdev_add_station net/wireless/rdev-ops.h:201 [inline] nl80211_new_station+0x13ba/0x1a70 net/wireless/nl80211.c:7603 genl_family_rcv_msg_doit+0x1fc/0x2e0 net/netlink/genetlink.c:972 genl_family_rcv_msg net/netlink/genetlink.c:1052 [inline] genl_rcv_msg+0x561/0x800 net/netlink/genetlink.c:1067 netlink_rcv_skb+0x16b/0x440 net/netlink/af_netlink.c:2545 genl_rcv+0x28/0x40 net/netlink/genetlink.c:1076 netlink_unicast_kernel net/netlink/af_netlink.c:1342 [inline] netlink_unicast+0x53b/0x810 net/netlink/af_netlink.c:1368 netlink_sendmsg+0x93c/0xe40 net/netlink/af_netlink.c:1910 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0xd5/0x180 net/socket.c:745 ____sys_sendmsg+0x6ac/0x940 net/socket.c:2584 ___sys_sendmsg+0x135/0x1d0 net/socket.c:2638 __sys_sendmsg+0x117/0x1e0 net/socket.c:2667 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x40/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b [Analysis] It is inappropriate to make a link configuration change judgment on an non-existent and non new link. [Fix] Quickly exit when there is a existent link and the link configuration has not changed. Fixes: b303835dabe0 ("wifi: mac80211: accept STA changes without link changes") Reported-and-tested-by: syzbot+62d7eef57b09bfebcd84@syzkaller.appspotmail.com Signed-off-by: Edward Adam Davis Link: https://msgid.link/tencent_DE67FF86DB92ED465489A36ECD2EDDCC8C06@qq.com Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/cfg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 715da615f035..f7cb50b0dd4e 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1806,10 +1806,10 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, lockdep_is_held(&local->sta_mtx)); /* - * If there are no changes, then accept a link that doesn't exist, + * If there are no changes, then accept a link that exist, * unless it's a new link. */ - if (params->link_id < 0 && !new_link && + if (params->link_id >= 0 && !new_link && !params->link_mac && !params->txpwr_set && !params->supported_rates_len && !params->ht_capa && !params->vht_capa && From 35de90d7fab8289d5fbd31406dde0b8944304113 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 11 Dec 2023 09:05:19 +0200 Subject: [PATCH 1212/1397] wifi: mac80211: don't re-add debugfs during reconfig [ Upstream commit 63bafd9d5421959b2124dd940ed8d7462d99f449 ] If we're doing reconfig, then we cannot add the debugfs files that are already there from before the reconfig. Skip that in drv_change_sta_links() during reconfig. Fixes: d2caad527c19 ("wifi: mac80211: add API to show the link STAs in debugfs") Signed-off-by: Johannes Berg Reviewed-by: Gregory Greenman Reviewed-by: Benjamin Berg Signed-off-by: Miri Korenblit Link: https://msgid.link/20231211085121.88a950f43e16.Id71181780994649219685887c0fcad33d387cc78@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/driver-ops.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index aa37a1410f37..f8af0c3d405a 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2015 Intel Deutschland GmbH - * Copyright (C) 2022 Intel Corporation + * Copyright (C) 2022-2023 Intel Corporation */ #include #include "ieee80211_i.h" @@ -564,6 +564,10 @@ int drv_change_sta_links(struct ieee80211_local *local, if (ret) return ret; + /* during reconfig don't add it to debugfs again */ + if (local->in_reconfig) + return 0; + for_each_set_bit(link_id, &links_to_add, IEEE80211_MLD_MAX_NUM_LINKS) { link_sta = rcu_dereference_protected(info->link[link_id], lockdep_is_held(&local->sta_mtx)); From 1ac3318338c446597675bc22912b83d068b44bf3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 11 Dec 2023 09:05:30 +0200 Subject: [PATCH 1213/1397] wifi: mac80211: check defragmentation succeeded [ Upstream commit 98849ba2aa9db46e62720fb686a9d63ed9887806 ] We need to check that cfg80211_defragment_element() didn't return an error, since it can fail due to bad input, and we didn't catch that before. Fixes: 8eb8dd2ffbbb ("wifi: mac80211: Support link removal using Reconfiguration ML element") Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://msgid.link/20231211085121.8595a6b67fc0.I1225edd8f98355e007f96502e358e476c7971d8c@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/mlme.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 0c9198997482..73f8df03d159 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -5805,7 +5805,7 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata, { const struct ieee80211_multi_link_elem *ml; const struct element *sub; - size_t ml_len; + ssize_t ml_len; unsigned long removed_links = 0; u16 link_removal_timeout[IEEE80211_MLD_MAX_NUM_LINKS] = {}; u8 link_id; @@ -5821,6 +5821,8 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata, elems->scratch + elems->scratch_len - elems->scratch_pos, WLAN_EID_FRAGMENT); + if (ml_len < 0) + return; elems->ml_reconf = (const void *)elems->scratch_pos; elems->ml_reconf_len = ml_len; From 1c8d80125142294052e808df6aedce53fb2d0ed8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 11 Dec 2023 09:05:32 +0200 Subject: [PATCH 1214/1397] wifi: mac80211: mesh: check element parsing succeeded [ Upstream commit 1fc4a3eec50d726f4663ad3c0bb0158354d6647a ] ieee802_11_parse_elems() can return NULL, so we must check for the return value. Fixes: 5d24828d05f3 ("mac80211: always allocate struct ieee802_11_elems") Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://msgid.link/20231211085121.93dea364f3d3.Ie87781c6c48979fb25a744b90af4a33dc2d83a28@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/mesh_plink.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index a1e526419e9d..5c8a3ff0ae0c 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -1243,6 +1243,8 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, return; } elems = ieee802_11_parse_elems(baseaddr, len - baselen, true, NULL); - mesh_process_plink_frame(sdata, mgmt, elems, rx_status); - kfree(elems); + if (elems) { + mesh_process_plink_frame(sdata, mgmt, elems, rx_status); + kfree(elems); + } } From 264796091cc69851ebf77968ddf4f683157302a2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 11 Dec 2023 09:05:31 +0200 Subject: [PATCH 1215/1397] wifi: mac80211: mesh_plink: fix matches_local logic [ Upstream commit 8c386b166e2517cf3a123018e77941ec22625d0f ] During refactoring the "else" here got lost, add it back. Fixes: c99a89edb106 ("mac80211: factor out plink event gathering") Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://msgid.link/20231211085121.795480fa0e0b.I017d501196a5bbdcd9afd33338d342d6fe1edd79@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/mesh_plink.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 5c8a3ff0ae0c..cc62c2a01f54 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -1064,8 +1064,8 @@ mesh_plink_get_event(struct ieee80211_sub_if_data *sdata, case WLAN_SP_MESH_PEERING_OPEN: if (!matches_local) event = OPN_RJCT; - if (!mesh_plink_free_count(sdata) || - (sta->mesh->plid && sta->mesh->plid != plid)) + else if (!mesh_plink_free_count(sdata) || + (sta->mesh->plid && sta->mesh->plid != plid)) event = OPN_IGNR; else event = OPN_ACPT; @@ -1073,9 +1073,9 @@ mesh_plink_get_event(struct ieee80211_sub_if_data *sdata, case WLAN_SP_MESH_PEERING_CONFIRM: if (!matches_local) event = CNF_RJCT; - if (!mesh_plink_free_count(sdata) || - sta->mesh->llid != llid || - (sta->mesh->plid && sta->mesh->plid != plid)) + else if (!mesh_plink_free_count(sdata) || + sta->mesh->llid != llid || + (sta->mesh->plid && sta->mesh->plid != plid)) event = CNF_IGNR; else event = CNF_ACPT; From 7c1e6f8f4626147b25243c29b532cad4ae1399bb Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Thu, 30 Nov 2023 17:58:06 +0100 Subject: [PATCH 1216/1397] ice: fix theoretical out-of-bounds access in ethtool link modes [ Upstream commit 91f9181c738101a276d9da333e0ab665ad806e6d ] To map phy types reported by the hardware to ethtool link mode bits, ice uses two lookup tables (phy_type_low_lkup, phy_type_high_lkup). The "low" table has 64 elements to cover every possible bit the hardware may report, but the "high" table has only 13. If the hardware reports a higher bit in phy_types_high, the driver would access memory beyond the lookup table's end. Instead of iterating through all 64 bits of phy_types_{low,high}, use the sizes of the respective lookup tables. Fixes: 9136e1f1e5c3 ("ice: refactor PHY type to ethtool link mode") Signed-off-by: Michal Schmidt Reviewed-by: Przemek Kitszel Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_ethtool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index ad4d4702129f..9be13e984091 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -1757,14 +1757,14 @@ ice_phy_type_to_ethtool(struct net_device *netdev, linkmode_zero(ks->link_modes.supported); linkmode_zero(ks->link_modes.advertising); - for (i = 0; i < BITS_PER_TYPE(u64); i++) { + for (i = 0; i < ARRAY_SIZE(phy_type_low_lkup); i++) { if (phy_types_low & BIT_ULL(i)) ice_linkmode_set_bit(&phy_type_low_lkup[i], ks, req_speeds, advert_phy_type_lo, i); } - for (i = 0; i < BITS_PER_TYPE(u64); i++) { + for (i = 0; i < ARRAY_SIZE(phy_type_high_lkup); i++) { if (phy_types_high & BIT_ULL(i)) ice_linkmode_set_bit(&phy_type_high_lkup[i], ks, req_speeds, advert_phy_type_hi, From bcc5b2d8a339f089572b0e6360916177a596344b Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 1 Dec 2023 10:01:38 -0800 Subject: [PATCH 1217/1397] bpf: syzkaller found null ptr deref in unix_bpf proto add [ Upstream commit 8d6650646ce49e9a5b8c5c23eb94f74b1749f70f ] I added logic to track the sock pair for stream_unix sockets so that we ensure lifetime of the sock matches the time a sockmap could reference the sock (see fixes tag). I forgot though that we allow af_unix unconnected sockets into a sock{map|hash} map. This is problematic because previous fixed expected sk_pair() to exist and did not NULL check it. Because unconnected sockets have a NULL sk_pair this resulted in the NULL ptr dereference found by syzkaller. BUG: KASAN: null-ptr-deref in unix_stream_bpf_update_proto+0x72/0x430 net/unix/unix_bpf.c:171 Write of size 4 at addr 0000000000000080 by task syz-executor360/5073 Call Trace: ... sock_hold include/net/sock.h:777 [inline] unix_stream_bpf_update_proto+0x72/0x430 net/unix/unix_bpf.c:171 sock_map_init_proto net/core/sock_map.c:190 [inline] sock_map_link+0xb87/0x1100 net/core/sock_map.c:294 sock_map_update_common+0xf6/0x870 net/core/sock_map.c:483 sock_map_update_elem_sys+0x5b6/0x640 net/core/sock_map.c:577 bpf_map_update_value+0x3af/0x820 kernel/bpf/syscall.c:167 We considered just checking for the null ptr and skipping taking a ref on the NULL peer sock. But, if the socket is then connected() after being added to the sockmap we can cause the original issue again. So instead this patch blocks adding af_unix sockets that are not in the ESTABLISHED state. Reported-by: Eric Dumazet Reported-by: syzbot+e8030702aefd3444fb9e@syzkaller.appspotmail.com Fixes: 8866730aed51 ("bpf, sockmap: af_unix stream sockets need to hold ref for pair sock") Acked-by: Jakub Sitnicki Signed-off-by: John Fastabend Link: https://lore.kernel.org/r/20231201180139.328529-2-john.fastabend@gmail.com Signed-off-by: Martin KaFai Lau Signed-off-by: Sasha Levin --- include/net/sock.h | 5 +++++ net/core/sock_map.c | 2 ++ 2 files changed, 7 insertions(+) diff --git a/include/net/sock.h b/include/net/sock.h index 7753354d59c0..1b7ca8f35dd6 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2798,6 +2798,11 @@ static inline bool sk_is_tcp(const struct sock *sk) return sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP; } +static inline bool sk_is_stream_unix(const struct sock *sk) +{ + return sk->sk_family == AF_UNIX && sk->sk_type == SOCK_STREAM; +} + /** * sk_eat_skb - Release a skb if it is no longer needed * @sk: socket to eat this skb from diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 4292c2ed1828..27d733c0f65e 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -536,6 +536,8 @@ static bool sock_map_sk_state_allowed(const struct sock *sk) { if (sk_is_tcp(sk)) return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN); + if (sk_is_stream_unix(sk)) + return (1 << sk->sk_state) & TCPF_ESTABLISHED; return true; } From c47e9c569176cfe52d1cfc1572d6232999e65f35 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Tue, 21 Nov 2023 13:51:52 +0100 Subject: [PATCH 1218/1397] Revert "net/mlx5e: fix double free of encap_header in update funcs" [ Upstream commit 66ca8d4deca09bce3fc7bcf8ea7997fa1a51c33c ] This reverts commit 3a4aa3cb83563df942be49d145ee3b7ddf17d6bb. This patch is causing a null ptr issue, the proper fix is in the next patch. Fixes: 3a4aa3cb8356 ("net/mlx5e: fix double free of encap_header in update funcs") Signed-off-by: Vlad Buslov Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- .../ethernet/mellanox/mlx5/core/en/tc_tun.c | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index 668da5c70e63..8bca696b6658 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -403,12 +403,16 @@ int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv, if (err) goto free_encap; + e->encap_size = ipv4_encap_size; + kfree(e->encap_header); + e->encap_header = encap_header; + if (!(nud_state & NUD_VALID)) { neigh_event_send(attr.n, NULL); /* the encap entry will be made valid on neigh update event * and not used before that. */ - goto free_encap; + goto release_neigh; } memset(&reformat_params, 0, sizeof(reformat_params)); @@ -422,10 +426,6 @@ int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv, goto free_encap; } - e->encap_size = ipv4_encap_size; - kfree(e->encap_header); - e->encap_header = encap_header; - e->flags |= MLX5_ENCAP_ENTRY_VALID; mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev)); mlx5e_route_lookup_ipv4_put(&attr); @@ -669,12 +669,16 @@ int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv, if (err) goto free_encap; + e->encap_size = ipv6_encap_size; + kfree(e->encap_header); + e->encap_header = encap_header; + if (!(nud_state & NUD_VALID)) { neigh_event_send(attr.n, NULL); /* the encap entry will be made valid on neigh update event * and not used before that. */ - goto free_encap; + goto release_neigh; } memset(&reformat_params, 0, sizeof(reformat_params)); @@ -688,10 +692,6 @@ int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv, goto free_encap; } - e->encap_size = ipv6_encap_size; - kfree(e->encap_header); - e->encap_header = encap_header; - e->flags |= MLX5_ENCAP_ENTRY_VALID; mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev)); mlx5e_route_lookup_ipv6_put(&attr); From 9c8d46bc054964ca10238893ce2acd42d97e891b Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Tue, 21 Nov 2023 13:52:28 +0100 Subject: [PATCH 1219/1397] Revert "net/mlx5e: fix double free of encap_header" [ Upstream commit 5d089684dc434a31e08d32f0530066d0025c52e4 ] This reverts commit 6f9b1a0731662648949a1c0587f6acb3b7f8acf1. This patch is causing a null ptr issue, the proper fix is in the next patch. Fixes: 6f9b1a073166 ("net/mlx5e: fix double free of encap_header") Signed-off-by: Vlad Buslov Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index 8bca696b6658..00a04fdd756f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -300,6 +300,9 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, if (err) goto destroy_neigh_entry; + e->encap_size = ipv4_encap_size; + e->encap_header = encap_header; + if (!(nud_state & NUD_VALID)) { neigh_event_send(attr.n, NULL); /* the encap entry will be made valid on neigh update event @@ -319,8 +322,6 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, goto destroy_neigh_entry; } - e->encap_size = ipv4_encap_size; - e->encap_header = encap_header; e->flags |= MLX5_ENCAP_ENTRY_VALID; mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev)); mlx5e_route_lookup_ipv4_put(&attr); @@ -567,6 +568,9 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, if (err) goto destroy_neigh_entry; + e->encap_size = ipv6_encap_size; + e->encap_header = encap_header; + if (!(nud_state & NUD_VALID)) { neigh_event_send(attr.n, NULL); /* the encap entry will be made valid on neigh update event @@ -586,8 +590,6 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, goto destroy_neigh_entry; } - e->encap_size = ipv6_encap_size; - e->encap_header = encap_header; e->flags |= MLX5_ENCAP_ENTRY_VALID; mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev)); mlx5e_route_lookup_ipv6_put(&attr); From c428f4934c980d970ebda6bd77bb54b657e7c243 Mon Sep 17 00:00:00 2001 From: Shifeng Li Date: Thu, 30 Nov 2023 01:46:56 -0800 Subject: [PATCH 1220/1397] net/mlx5e: Fix slab-out-of-bounds in mlx5_query_nic_vport_mac_list() [ Upstream commit ddb38ddff9c71026bad481b791a94d446ee37603 ] Out_sz that the size of out buffer is calculated using query_nic_vport _context_in structure when driver query the MAC list. However query_nic _vport_context_in structure is smaller than query_nic_vport_context_out. When allowed_list_size is greater than 96, calling ether_addr_copy() will trigger an slab-out-of-bounds. [ 1170.055866] BUG: KASAN: slab-out-of-bounds in mlx5_query_nic_vport_mac_list+0x481/0x4d0 [mlx5_core] [ 1170.055869] Read of size 4 at addr ffff88bdbc57d912 by task kworker/u128:1/461 [ 1170.055870] [ 1170.055932] Workqueue: mlx5_esw_wq esw_vport_change_handler [mlx5_core] [ 1170.055936] Call Trace: [ 1170.055949] dump_stack+0x8b/0xbb [ 1170.055958] print_address_description+0x6a/0x270 [ 1170.055961] kasan_report+0x179/0x2c0 [ 1170.056061] mlx5_query_nic_vport_mac_list+0x481/0x4d0 [mlx5_core] [ 1170.056162] esw_update_vport_addr_list+0x2c5/0xcd0 [mlx5_core] [ 1170.056257] esw_vport_change_handle_locked+0xd08/0x1a20 [mlx5_core] [ 1170.056377] esw_vport_change_handler+0x6b/0x90 [mlx5_core] [ 1170.056381] process_one_work+0x65f/0x12d0 [ 1170.056383] worker_thread+0x87/0xb50 [ 1170.056390] kthread+0x2e9/0x3a0 [ 1170.056394] ret_from_fork+0x1f/0x40 Fixes: e16aea2744ab ("net/mlx5: Introduce access functions to modify/query vport mac lists") Cc: Ding Hui Signed-off-by: Shifeng Li Reviewed-by: Simon Horman Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/vport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 5a31fb47ffa5..21753f327868 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -277,7 +277,7 @@ int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, req_list_size = max_list_size; } - out_sz = MLX5_ST_SZ_BYTES(query_nic_vport_context_in) + + out_sz = MLX5_ST_SZ_BYTES(query_nic_vport_context_out) + req_list_size * MLX5_ST_SZ_BYTES(mac_address_layout); out = kvzalloc(out_sz, GFP_KERNEL); From 5f760d1eb75d1085186050e90ca597118f00f64a Mon Sep 17 00:00:00 2001 From: Shifeng Li Date: Sat, 2 Dec 2023 00:01:26 -0800 Subject: [PATCH 1221/1397] net/mlx5e: Fix a race in command alloc flow [ Upstream commit 8f5100da56b3980276234e812ce98d8f075194cd ] Fix a cmd->ent use after free due to a race on command entry. Such race occurs when one of the commands releases its last refcount and frees its index and entry while another process running command flush flow takes refcount to this command entry. The process which handles commands flush may see this command as needed to be flushed if the other process allocated a ent->idx but didn't set ent to cmd->ent_arr in cmd_work_handler(). Fix it by moving the assignment of cmd->ent_arr into the spin lock. [70013.081955] BUG: KASAN: use-after-free in mlx5_cmd_trigger_completions+0x1e2/0x4c0 [mlx5_core] [70013.081967] Write of size 4 at addr ffff88880b1510b4 by task kworker/26:1/1433361 [70013.081968] [70013.082028] Workqueue: events aer_isr [70013.082053] Call Trace: [70013.082067] dump_stack+0x8b/0xbb [70013.082086] print_address_description+0x6a/0x270 [70013.082102] kasan_report+0x179/0x2c0 [70013.082173] mlx5_cmd_trigger_completions+0x1e2/0x4c0 [mlx5_core] [70013.082267] mlx5_cmd_flush+0x80/0x180 [mlx5_core] [70013.082304] mlx5_enter_error_state+0x106/0x1d0 [mlx5_core] [70013.082338] mlx5_try_fast_unload+0x2ea/0x4d0 [mlx5_core] [70013.082377] remove_one+0x200/0x2b0 [mlx5_core] [70013.082409] pci_device_remove+0xf3/0x280 [70013.082439] device_release_driver_internal+0x1c3/0x470 [70013.082453] pci_stop_bus_device+0x109/0x160 [70013.082468] pci_stop_and_remove_bus_device+0xe/0x20 [70013.082485] pcie_do_fatal_recovery+0x167/0x550 [70013.082493] aer_isr+0x7d2/0x960 [70013.082543] process_one_work+0x65f/0x12d0 [70013.082556] worker_thread+0x87/0xb50 [70013.082571] kthread+0x2e9/0x3a0 [70013.082592] ret_from_fork+0x1f/0x40 The logical relationship of this error is as follows: aer_recover_work | ent->work -------------------------------------------+------------------------------ aer_recover_work_func | |- pcie_do_recovery | |- report_error_detected | |- mlx5_pci_err_detected |cmd_work_handler |- mlx5_enter_error_state | |- cmd_alloc_index |- enter_error_state | |- lock cmd->alloc_lock |- mlx5_cmd_flush | |- clear_bit |- mlx5_cmd_trigger_completions| |- unlock cmd->alloc_lock |- lock cmd->alloc_lock | |- vector = ~dev->cmd.vars.bitmask |- for_each_set_bit | |- cmd_ent_get(cmd->ent_arr[i]) (UAF) |- unlock cmd->alloc_lock | |- cmd->ent_arr[ent->idx]=ent The cmd->ent_arr[ent->idx] assignment and the bit clearing are not protected by the cmd->alloc_lock in cmd_work_handler(). Fixes: 50b2412b7e78 ("net/mlx5: Avoid possible free of command entry while timeout comp handler") Reviewed-by: Moshe Shemesh Signed-off-by: Shifeng Li Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index c22b0ad0c870..7013e1c8741a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -156,15 +156,18 @@ static u8 alloc_token(struct mlx5_cmd *cmd) return token; } -static int cmd_alloc_index(struct mlx5_cmd *cmd) +static int cmd_alloc_index(struct mlx5_cmd *cmd, struct mlx5_cmd_work_ent *ent) { unsigned long flags; int ret; spin_lock_irqsave(&cmd->alloc_lock, flags); ret = find_first_bit(&cmd->vars.bitmask, cmd->vars.max_reg_cmds); - if (ret < cmd->vars.max_reg_cmds) + if (ret < cmd->vars.max_reg_cmds) { clear_bit(ret, &cmd->vars.bitmask); + ent->idx = ret; + cmd->ent_arr[ent->idx] = ent; + } spin_unlock_irqrestore(&cmd->alloc_lock, flags); return ret < cmd->vars.max_reg_cmds ? ret : -ENOMEM; @@ -977,7 +980,7 @@ static void cmd_work_handler(struct work_struct *work) sem = ent->page_queue ? &cmd->vars.pages_sem : &cmd->vars.sem; down(sem); if (!ent->page_queue) { - alloc_ret = cmd_alloc_index(cmd); + alloc_ret = cmd_alloc_index(cmd, ent); if (alloc_ret < 0) { mlx5_core_err_rl(dev, "failed to allocate command entry\n"); if (ent->callback) { @@ -992,15 +995,14 @@ static void cmd_work_handler(struct work_struct *work) up(sem); return; } - ent->idx = alloc_ret; } else { ent->idx = cmd->vars.max_reg_cmds; spin_lock_irqsave(&cmd->alloc_lock, flags); clear_bit(ent->idx, &cmd->vars.bitmask); + cmd->ent_arr[ent->idx] = ent; spin_unlock_irqrestore(&cmd->alloc_lock, flags); } - cmd->ent_arr[ent->idx] = ent; lay = get_inst(cmd, ent->idx); ent->lay = lay; memset(lay, 0, sizeof(*lay)); From 2f4d6328364514affa3105de6b4d54a378eb259e Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Tue, 28 Nov 2023 17:40:53 +0800 Subject: [PATCH 1222/1397] net/mlx5e: fix a potential double-free in fs_udp_create_groups [ Upstream commit e75efc6466ae289e599fb12a5a86545dff245c65 ] When kcalloc() for ft->g succeeds but kvzalloc() for in fails, fs_udp_create_groups() will free ft->g. However, its caller fs_udp_create_table() will free ft->g again through calling mlx5e_destroy_flow_table(), which will lead to a double-free. Fix this by setting ft->g to NULL in fs_udp_create_groups(). Fixes: 1c80bd684388 ("net/mlx5e: Introduce Flow Steering UDP API") Signed-off-by: Dinghao Liu Reviewed-by: Tariq Toukan Reviewed-by: Simon Horman Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c index be83ad9db82a..e1283531e0b8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c @@ -154,6 +154,7 @@ static int fs_udp_create_groups(struct mlx5e_flow_table *ft, enum fs_udp_type ty in = kvzalloc(inlen, GFP_KERNEL); if (!in || !ft->g) { kfree(ft->g); + ft->g = NULL; kvfree(in); return -ENOMEM; } From 595d51b29511073390e5c5d3afaa483926499a9e Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 14 Nov 2023 01:25:21 +0000 Subject: [PATCH 1223/1397] net/mlx5e: Fix overrun reported by coverity [ Upstream commit da75fa542873e5f7d7f615566c0b00042d8a0437 ] Coverity Scan reports the following issue. But it's impossible that mlx5_get_dev_index returns 7 for PF, even if the index is calculated from PCI FUNC ID. So add the checking to make coverity slience. CID 610894 (#2 of 2): Out-of-bounds write (OVERRUN) Overrunning array esw->fdb_table.offloads.peer_miss_rules of 4 8-byte elements at element index 7 (byte offset 63) using index mlx5_get_dev_index(peer_dev) (which evaluates to 7). Fixes: 9bee385a6e39 ("net/mlx5: E-switch, refactor FDB miss rule add/remove") Signed-off-by: Jianbo Liu Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index bb8bcb448ae9..9bd5609cf659 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1177,9 +1177,9 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, struct mlx5_flow_handle *flow; struct mlx5_flow_spec *spec; struct mlx5_vport *vport; + int err, pfindex; unsigned long i; void *misc; - int err; if (!MLX5_VPORT_MANAGER(esw->dev) && !mlx5_core_is_ecpf_esw_manager(esw->dev)) return 0; @@ -1255,7 +1255,15 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, flows[vport->index] = flow; } } - esw->fdb_table.offloads.peer_miss_rules[mlx5_get_dev_index(peer_dev)] = flows; + + pfindex = mlx5_get_dev_index(peer_dev); + if (pfindex >= MLX5_MAX_PORTS) { + esw_warn(esw->dev, "Peer dev index(%d) is over the max num defined(%d)\n", + pfindex, MLX5_MAX_PORTS); + err = -EINVAL; + goto add_ec_vf_flow_err; + } + esw->fdb_table.offloads.peer_miss_rules[pfindex] = flows; kvfree(spec); return 0; From 2da82046dfd9b9d69ac94ee6faddb73d5c05d181 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Wed, 29 Nov 2023 04:53:32 +0200 Subject: [PATCH 1224/1397] net/mlx5e: Decrease num_block_tc when unblock tc offload [ Upstream commit be86106fd74a145f24c56c9bc18d658e8fe6d4f4 ] The cited commit increases num_block_tc when unblock tc offload. Actually should decrease it. Fixes: c8e350e62fc5 ("net/mlx5e: Make TC and IPsec offloads mutually exclusive on a netdev") Signed-off-by: Chris Mi Reviewed-by: Jianbo Liu Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index 03f69c485a00..81e6aa6434cf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -1866,7 +1866,7 @@ static int mlx5e_ipsec_block_tc_offload(struct mlx5_core_dev *mdev) static void mlx5e_ipsec_unblock_tc_offload(struct mlx5_core_dev *mdev) { - mdev->num_block_tc++; + mdev->num_block_tc--; } int mlx5e_accel_ipsec_fs_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) From 8bcb51d06a473ab71aceb6597e4acc972a35ed3e Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Thu, 23 Nov 2023 16:11:20 +0200 Subject: [PATCH 1225/1397] net/mlx5e: XDP, Drop fragmented packets larger than MTU size [ Upstream commit bcaf109f794744c14da0e9123b31d1f4571b0a35 ] XDP transmits fragmented packets that are larger than MTU size instead of dropping those packets. The drop check that checks whether a packet is larger than MTU is comparing MTU size against the linear part length only. Adjust the drop check to compare MTU size against both linear and non-linear part lengths to avoid transmitting fragmented packets larger than MTU size. Fixes: 39a1665d16a2 ("net/mlx5e: Implement sending multi buffer XDP frames") Signed-off-by: Carolina Jubran Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 8bed17d8fe56..b723ff5e5249 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -493,6 +493,7 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, dma_addr_t dma_addr = xdptxd->dma_addr; u32 dma_len = xdptxd->len; u16 ds_cnt, inline_hdr_sz; + unsigned int frags_size; u8 num_wqebbs = 1; int num_frags = 0; bool inline_ok; @@ -503,8 +504,9 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, inline_ok = sq->min_inline_mode == MLX5_INLINE_MODE_NONE || dma_len >= MLX5E_XDP_MIN_INLINE; + frags_size = xdptxd->has_frags ? xdptxdf->sinfo->xdp_frags_size : 0; - if (unlikely(!inline_ok || sq->hw_mtu < dma_len)) { + if (unlikely(!inline_ok || sq->hw_mtu < dma_len + frags_size)) { stats->err++; return false; } From d4d25b7b32694ac33b82c5b4f23f2b1024108577 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Thu, 30 Nov 2023 11:30:34 +0200 Subject: [PATCH 1226/1397] net/mlx5: Fix fw tracer first block check [ Upstream commit 4261edf11cb7c9224af713a102e5616329306932 ] While handling new traces, to verify it is not the first block being written, last_timestamp is checked. But instead of checking it is non zero it is verified to be zero. Fix to verify last_timestamp is not zero. Fixes: c71ad41ccb0c ("net/mlx5: FW tracer, events handling") Signed-off-by: Moshe Shemesh Reviewed-by: Feras Daoud Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index ad789349c06e..85d3bfa0780c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -718,7 +718,7 @@ static void mlx5_fw_tracer_handle_traces(struct work_struct *work) while (block_timestamp > tracer->last_timestamp) { /* Check block override if it's not the first block */ - if (!tracer->last_timestamp) { + if (tracer->last_timestamp) { u64 *ts_event; /* To avoid block override be the HW in case of buffer * wraparound, the time stamp of the previous block From 96c8c465f77ff5fae23a0e6d839ffb1737941cc6 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Fri, 6 Oct 2023 15:22:22 +0200 Subject: [PATCH 1227/1397] net/mlx5: Refactor mlx5_flow_destination->rep pointer to vport num [ Upstream commit 04ad04e4fdd10f92ef4f2b3f6227ec9824682197 ] Currently the destination rep pointer is only used for comparisons or to obtain vport number from it. Since it is used both during flow creation and deletion it may point to representor of another eswitch instance which can be deallocated during driver unload even when there are rules pointing to it[0]. Refactor the code to store vport number and 'valid' flag instead of the representor pointer. [0]: [176805.886303] ================================================================== [176805.889433] BUG: KASAN: slab-use-after-free in esw_cleanup_dests+0x390/0x440 [mlx5_core] [176805.892981] Read of size 2 at addr ffff888155090aa0 by task modprobe/27280 [176805.895462] CPU: 3 PID: 27280 Comm: modprobe Tainted: G B 6.6.0-rc3+ #1 [176805.896771] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [176805.898514] Call Trace: [176805.899026] [176805.899519] dump_stack_lvl+0x33/0x50 [176805.900221] print_report+0xc2/0x610 [176805.900893] ? mlx5_chains_put_table+0x33d/0x8d0 [mlx5_core] [176805.901897] ? esw_cleanup_dests+0x390/0x440 [mlx5_core] [176805.902852] kasan_report+0xac/0xe0 [176805.903509] ? esw_cleanup_dests+0x390/0x440 [mlx5_core] [176805.904461] esw_cleanup_dests+0x390/0x440 [mlx5_core] [176805.905223] __mlx5_eswitch_del_rule+0x1ae/0x460 [mlx5_core] [176805.906044] ? esw_cleanup_dests+0x440/0x440 [mlx5_core] [176805.906822] ? xas_find_conflict+0x420/0x420 [176805.907496] ? down_read+0x11e/0x200 [176805.908046] mlx5e_tc_rule_unoffload+0xc4/0x2a0 [mlx5_core] [176805.908844] mlx5e_tc_del_fdb_flow+0x7da/0xb10 [mlx5_core] [176805.909597] mlx5e_flow_put+0x4b/0x80 [mlx5_core] [176805.910275] mlx5e_delete_flower+0x5b4/0xb70 [mlx5_core] [176805.911010] tc_setup_cb_reoffload+0x27/0xb0 [176805.911648] fl_reoffload+0x62d/0x900 [cls_flower] [176805.912313] ? mlx5e_rep_indr_block_unbind+0xd0/0xd0 [mlx5_core] [176805.913151] ? __fl_put+0x230/0x230 [cls_flower] [176805.913768] ? filter_irq_stacks+0x90/0x90 [176805.914335] ? kasan_save_stack+0x1e/0x40 [176805.914893] ? kasan_set_track+0x21/0x30 [176805.915484] ? kasan_save_free_info+0x27/0x40 [176805.916105] tcf_block_playback_offloads+0x79/0x1f0 [176805.916773] ? mlx5e_rep_indr_block_unbind+0xd0/0xd0 [mlx5_core] [176805.917647] tcf_block_unbind+0x12d/0x330 [176805.918239] tcf_block_offload_cmd.isra.0+0x24e/0x320 [176805.918953] ? tcf_block_bind+0x770/0x770 [176805.919551] ? _raw_read_unlock_irqrestore+0x30/0x30 [176805.920236] ? mutex_lock+0x7d/0xd0 [176805.920735] ? mutex_unlock+0x80/0xd0 [176805.921255] tcf_block_offload_unbind+0xa5/0x120 [176805.921909] __tcf_block_put+0xc2/0x2d0 [176805.922467] ingress_destroy+0xf4/0x3d0 [sch_ingress] [176805.923178] __qdisc_destroy+0x9d/0x280 [176805.923741] dev_shutdown+0x1c6/0x330 [176805.924295] unregister_netdevice_many_notify+0x6ef/0x1500 [176805.925034] ? netdev_freemem+0x50/0x50 [176805.925610] ? _raw_spin_lock_irq+0x7b/0xd0 [176805.926235] ? _raw_spin_lock_bh+0xe0/0xe0 [176805.926849] unregister_netdevice_queue+0x1e0/0x280 [176805.927592] ? unregister_netdevice_many+0x10/0x10 [176805.928275] unregister_netdev+0x18/0x20 [176805.928835] mlx5e_vport_rep_unload+0xc0/0x200 [mlx5_core] [176805.929608] mlx5_esw_offloads_unload_rep+0x9d/0xc0 [mlx5_core] [176805.930492] mlx5_eswitch_unload_vf_vports+0x108/0x1a0 [mlx5_core] [176805.931422] ? mlx5_eswitch_unload_sf_vport+0x50/0x50 [mlx5_core] [176805.932304] ? rwsem_down_write_slowpath+0x11f0/0x11f0 [176805.932987] mlx5_eswitch_disable_sriov+0x6f9/0xa60 [mlx5_core] [176805.933807] ? mlx5_core_disable_hca+0xe1/0x130 [mlx5_core] [176805.934576] ? mlx5_eswitch_disable_locked+0x580/0x580 [mlx5_core] [176805.935463] mlx5_device_disable_sriov+0x138/0x490 [mlx5_core] [176805.936308] mlx5_sriov_disable+0x8c/0xb0 [mlx5_core] [176805.937063] remove_one+0x7f/0x210 [mlx5_core] [176805.937711] pci_device_remove+0x96/0x1c0 [176805.938289] device_release_driver_internal+0x361/0x520 [176805.938981] ? kobject_put+0x5c/0x330 [176805.939553] driver_detach+0xd7/0x1d0 [176805.940101] bus_remove_driver+0x11f/0x290 [176805.943847] pci_unregister_driver+0x23/0x1f0 [176805.944505] mlx5_cleanup+0xc/0x20 [mlx5_core] [176805.945189] __x64_sys_delete_module+0x2b3/0x450 [176805.945837] ? module_flags+0x300/0x300 [176805.946377] ? dput+0xc2/0x830 [176805.946848] ? __kasan_record_aux_stack+0x9c/0xb0 [176805.947555] ? __call_rcu_common.constprop.0+0x46c/0xb50 [176805.948338] ? fpregs_assert_state_consistent+0x1d/0xa0 [176805.949055] ? exit_to_user_mode_prepare+0x30/0x120 [176805.949713] do_syscall_64+0x3d/0x90 [176805.950226] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [176805.950904] RIP: 0033:0x7f7f42c3f5ab [176805.951462] Code: 73 01 c3 48 8b 0d 75 a8 1b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa b8 b0 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 45 a8 1b 00 f7 d8 64 89 01 48 [176805.953710] RSP: 002b:00007fff07dc9d08 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 [176805.954691] RAX: ffffffffffffffda RBX: 000055b6e91c01e0 RCX: 00007f7f42c3f5ab [176805.955691] RDX: 0000000000000000 RSI: 0000000000000800 RDI: 000055b6e91c0248 [176805.956662] RBP: 000055b6e91c01e0 R08: 0000000000000000 R09: 0000000000000000 [176805.957601] R10: 00007f7f42d9eac0 R11: 0000000000000206 R12: 000055b6e91c0248 [176805.958593] R13: 0000000000000000 R14: 000055b6e91bfb38 R15: 0000000000000000 [176805.959599] [176805.960324] Allocated by task 20490: [176805.960893] kasan_save_stack+0x1e/0x40 [176805.961463] kasan_set_track+0x21/0x30 [176805.962019] __kasan_kmalloc+0x77/0x90 [176805.962554] esw_offloads_init+0x1bb/0x480 [mlx5_core] [176805.963318] mlx5_eswitch_init+0xc70/0x15c0 [mlx5_core] [176805.964092] mlx5_init_one_devl_locked+0x366/0x1230 [mlx5_core] [176805.964902] probe_one+0x6f7/0xc90 [mlx5_core] [176805.965541] local_pci_probe+0xd7/0x180 [176805.966075] pci_device_probe+0x231/0x6f0 [176805.966631] really_probe+0x1d4/0xb50 [176805.967179] __driver_probe_device+0x18d/0x450 [176805.967810] driver_probe_device+0x49/0x120 [176805.968431] __driver_attach+0x1fb/0x490 [176805.968976] bus_for_each_dev+0xed/0x170 [176805.969560] bus_add_driver+0x21a/0x570 [176805.970124] driver_register+0x133/0x460 [176805.970684] 0xffffffffa0678065 [176805.971180] do_one_initcall+0x92/0x2b0 [176805.971744] do_init_module+0x22d/0x720 [176805.972318] load_module+0x58c3/0x63b0 [176805.972847] init_module_from_file+0xd2/0x130 [176805.973441] __x64_sys_finit_module+0x389/0x7c0 [176805.974045] do_syscall_64+0x3d/0x90 [176805.974556] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [176805.975566] Freed by task 27280: [176805.976077] kasan_save_stack+0x1e/0x40 [176805.976655] kasan_set_track+0x21/0x30 [176805.977221] kasan_save_free_info+0x27/0x40 [176805.977834] ____kasan_slab_free+0x11a/0x1b0 [176805.978505] __kmem_cache_free+0x163/0x2d0 [176805.979113] esw_offloads_cleanup_reps+0xb8/0x120 [mlx5_core] [176805.979963] mlx5_eswitch_cleanup+0x182/0x270 [mlx5_core] [176805.980763] mlx5_cleanup_once+0x9a/0x1e0 [mlx5_core] [176805.981477] mlx5_uninit_one+0xa9/0x180 [mlx5_core] [176805.982196] remove_one+0x8f/0x210 [mlx5_core] [176805.982868] pci_device_remove+0x96/0x1c0 [176805.983461] device_release_driver_internal+0x361/0x520 [176805.984169] driver_detach+0xd7/0x1d0 [176805.984702] bus_remove_driver+0x11f/0x290 [176805.985261] pci_unregister_driver+0x23/0x1f0 [176805.985847] mlx5_cleanup+0xc/0x20 [mlx5_core] [176805.986483] __x64_sys_delete_module+0x2b3/0x450 [176805.987126] do_syscall_64+0x3d/0x90 [176805.987665] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [176805.988667] Last potentially related work creation: [176805.989305] kasan_save_stack+0x1e/0x40 [176805.989839] __kasan_record_aux_stack+0x9c/0xb0 [176805.990443] kvfree_call_rcu+0x84/0xa30 [176805.990973] clean_xps_maps+0x265/0x6e0 [176805.991547] netif_reset_xps_queues.part.0+0x3f/0x80 [176805.992226] unregister_netdevice_many_notify+0xfcf/0x1500 [176805.992966] unregister_netdevice_queue+0x1e0/0x280 [176805.993638] unregister_netdev+0x18/0x20 [176805.994205] mlx5e_remove+0xba/0x1e0 [mlx5_core] [176805.994872] auxiliary_bus_remove+0x52/0x70 [176805.995490] device_release_driver_internal+0x361/0x520 [176805.996196] bus_remove_device+0x1e1/0x3d0 [176805.996767] device_del+0x390/0x980 [176805.997270] mlx5_rescan_drivers_locked.part.0+0x130/0x540 [mlx5_core] [176805.998195] mlx5_unregister_device+0x77/0xc0 [mlx5_core] [176805.998989] mlx5_uninit_one+0x41/0x180 [mlx5_core] [176805.999719] remove_one+0x8f/0x210 [mlx5_core] [176806.000387] pci_device_remove+0x96/0x1c0 [176806.000938] device_release_driver_internal+0x361/0x520 [176806.001612] unbind_store+0xd8/0xf0 [176806.002108] kernfs_fop_write_iter+0x2c0/0x440 [176806.002748] vfs_write+0x725/0xba0 [176806.003294] ksys_write+0xed/0x1c0 [176806.003823] do_syscall_64+0x3d/0x90 [176806.004357] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [176806.005317] The buggy address belongs to the object at ffff888155090a80 which belongs to the cache kmalloc-64 of size 64 [176806.006774] The buggy address is located 32 bytes inside of freed 64-byte region [ffff888155090a80, ffff888155090ac0) [176806.008773] The buggy address belongs to the physical page: [176806.009480] page:00000000a407e0e6 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x155090 [176806.010633] flags: 0x200000000000800(slab|node=0|zone=2) [176806.011352] page_type: 0xffffffff() [176806.011905] raw: 0200000000000800 ffff888100042640 ffffea000422b1c0 dead000000000004 [176806.012949] raw: 0000000000000000 0000000000200020 00000001ffffffff 0000000000000000 [176806.013933] page dumped because: kasan: bad access detected [176806.014935] Memory state around the buggy address: [176806.015601] ffff888155090980: fa fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [176806.016568] ffff888155090a00: fa fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [176806.017497] >ffff888155090a80: fa fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [176806.018438] ^ [176806.019007] ffff888155090b00: fa fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [176806.020001] ffff888155090b80: fa fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [176806.020996] ================================================================== Fixes: a508728a4c8b ("net/mlx5e: VF tunnel RX traffic offloading") Signed-off-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- .../mellanox/mlx5/core/en/tc/act/mirred.c | 5 +++-- .../mellanox/mlx5/core/en/tc_tun_encap.c | 3 ++- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 3 ++- .../mellanox/mlx5/core/eswitch_offloads.c | 19 +++++++++---------- .../mlx5/core/eswitch_offloads_termtbl.c | 4 ++-- 5 files changed, 18 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c index f63402c48028..1b418095b79a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c @@ -197,7 +197,7 @@ parse_mirred_encap(struct mlx5e_tc_act_parse_state *parse_state, } esw_attr->dests[esw_attr->out_count].flags |= MLX5_ESW_DEST_ENCAP; esw_attr->out_count++; - /* attr->dests[].rep is resolved when we handle encap */ + /* attr->dests[].vport is resolved when we handle encap */ return 0; } @@ -270,7 +270,8 @@ parse_mirred(struct mlx5e_tc_act_parse_state *parse_state, out_priv = netdev_priv(out_dev); rpriv = out_priv->ppriv; - esw_attr->dests[esw_attr->out_count].rep = rpriv->rep; + esw_attr->dests[esw_attr->out_count].vport_valid = true; + esw_attr->dests[esw_attr->out_count].vport = rpriv->rep->vport; esw_attr->dests[esw_attr->out_count].mdev = out_priv->mdev; esw_attr->out_count++; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index b10e40e1a9c1..f1d1e1542e81 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -1064,7 +1064,8 @@ int mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv *priv, out_priv = netdev_priv(encap_dev); rpriv = out_priv->ppriv; - esw_attr->dests[out_index].rep = rpriv->rep; + esw_attr->dests[out_index].vport_valid = true; + esw_attr->dests[out_index].vport = rpriv->rep->vport; esw_attr->dests[out_index].mdev = out_priv->mdev; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index b674b57d05aa..b4eb17141edf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -526,7 +526,8 @@ struct mlx5_esw_flow_attr { u8 total_vlan; struct { u32 flags; - struct mlx5_eswitch_rep *rep; + bool vport_valid; + u16 vport; struct mlx5_pkt_reformat *pkt_reformat; struct mlx5_core_dev *mdev; struct mlx5_termtbl_handle *termtbl; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 9bd5609cf659..b0455134c98e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -287,10 +287,9 @@ static void esw_put_dest_tables_loop(struct mlx5_eswitch *esw, struct mlx5_flow_ for (i = from; i < to; i++) if (esw_attr->dests[i].flags & MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE) mlx5_chains_put_table(chains, 0, 1, 0); - else if (mlx5_esw_indir_table_needed(esw, attr, esw_attr->dests[i].rep->vport, + else if (mlx5_esw_indir_table_needed(esw, attr, esw_attr->dests[i].vport, esw_attr->dests[i].mdev)) - mlx5_esw_indir_table_put(esw, esw_attr->dests[i].rep->vport, - false); + mlx5_esw_indir_table_put(esw, esw_attr->dests[i].vport, false); } static bool @@ -358,8 +357,8 @@ esw_is_indir_table(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr) * this criteria. */ for (i = esw_attr->split_count; i < esw_attr->out_count; i++) { - if (esw_attr->dests[i].rep && - mlx5_esw_indir_table_needed(esw, attr, esw_attr->dests[i].rep->vport, + if (esw_attr->dests[i].vport_valid && + mlx5_esw_indir_table_needed(esw, attr, esw_attr->dests[i].vport, esw_attr->dests[i].mdev)) { result = true; } else { @@ -388,7 +387,7 @@ esw_setup_indir_table(struct mlx5_flow_destination *dest, dest[*i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest[*i].ft = mlx5_esw_indir_table_get(esw, attr, - esw_attr->dests[j].rep->vport, false); + esw_attr->dests[j].vport, false); if (IS_ERR(dest[*i].ft)) { err = PTR_ERR(dest[*i].ft); goto err_indir_tbl_get; @@ -432,11 +431,11 @@ static bool esw_setup_uplink_fwd_ipsec_needed(struct mlx5_eswitch *esw, int attr_idx) { if (esw->offloads.ft_ipsec_tx_pol && - esw_attr->dests[attr_idx].rep && - esw_attr->dests[attr_idx].rep->vport == MLX5_VPORT_UPLINK && + esw_attr->dests[attr_idx].vport_valid && + esw_attr->dests[attr_idx].vport == MLX5_VPORT_UPLINK && /* To be aligned with software, encryption is needed only for tunnel device */ (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP_VALID) && - esw_attr->dests[attr_idx].rep != esw_attr->in_rep && + esw_attr->dests[attr_idx].vport != esw_attr->in_rep->vport && esw_same_vhca_id(esw_attr->dests[attr_idx].mdev, esw->dev)) return true; @@ -469,7 +468,7 @@ esw_setup_dest_fwd_vport(struct mlx5_flow_destination *dest, struct mlx5_flow_ac int attr_idx, int dest_idx, bool pkt_reformat) { dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest[dest_idx].vport.num = esw_attr->dests[attr_idx].rep->vport; + dest[dest_idx].vport.num = esw_attr->dests[attr_idx].vport; if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) { dest[dest_idx].vport.vhca_id = MLX5_CAP_GEN(esw_attr->dests[attr_idx].mdev, vhca_id); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c index edd910258314..40bdc677f051 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -233,8 +233,8 @@ mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, /* hairpin */ for (i = esw_attr->split_count; i < esw_attr->out_count; i++) - if (!esw_attr->dest_int_port && esw_attr->dests[i].rep && - esw_attr->dests[i].rep->vport == MLX5_VPORT_UPLINK) + if (!esw_attr->dest_int_port && esw_attr->dests[i].vport_valid && + esw_attr->dests[i].vport == MLX5_VPORT_UPLINK) return true; return false; From 186854bdbac653d656645d5566cc35bb977aea7f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 13 Dec 2023 17:08:17 +0300 Subject: [PATCH 1228/1397] net/mlx5e: Fix error code in mlx5e_tc_action_miss_mapping_get() [ Upstream commit 86d5922679f3b6d02a64df66cdd777fdd4ea5c0d ] Preserve the error code if esw_add_restore_rule() fails. Don't return success. Fixes: 6702782845a5 ("net/mlx5e: TC, Set CT miss to the specific ct action instance") Signed-off-by: Dan Carpenter Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 1bead98f73bf..2cfbacf77535 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -5734,8 +5734,10 @@ int mlx5e_tc_action_miss_mapping_get(struct mlx5e_priv *priv, struct mlx5_flow_a esw = priv->mdev->priv.eswitch; attr->act_id_restore_rule = esw_add_restore_rule(esw, *act_miss_mapping); - if (IS_ERR(attr->act_id_restore_rule)) + if (IS_ERR(attr->act_id_restore_rule)) { + err = PTR_ERR(attr->act_id_restore_rule); goto err_rule; + } return 0; From 46538a6f57f920f76b9b1551bc85a794e2b7c10e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 13 Dec 2023 17:08:57 +0300 Subject: [PATCH 1229/1397] net/mlx5e: Fix error codes in alloc_branch_attr() [ Upstream commit d792e5f7f19b95f5ce41ac49df5ead4d280238f4 ] Set the error code if set_branch_dest_ft() fails. Fixes: ccbe33003b10 ("net/mlx5e: TC, Don't offload post action rule if not supported") Signed-off-by: Dan Carpenter Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 2cfbacf77535..25e44ee5121a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3776,7 +3776,8 @@ alloc_branch_attr(struct mlx5e_tc_flow *flow, break; case FLOW_ACTION_ACCEPT: case FLOW_ACTION_PIPE: - if (set_branch_dest_ft(flow->priv, attr)) + err = set_branch_dest_ft(flow->priv, attr); + if (err) goto out_err; break; case FLOW_ACTION_JUMP: @@ -3786,7 +3787,8 @@ alloc_branch_attr(struct mlx5e_tc_flow *flow, goto out_err; } *jump_count = cond->extval; - if (set_branch_dest_ft(flow->priv, attr)) + err = set_branch_dest_ft(flow->priv, attr); + if (err) goto out_err; break; default: From 952446adc20154df089374066e4c32cde840245d Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Tue, 21 Nov 2023 15:00:21 -0800 Subject: [PATCH 1230/1397] net/mlx5e: Correct snprintf truncation handling for fw_version buffer [ Upstream commit ad436b9c1270c40554e274f067f1b78fcc06a004 ] snprintf returns the length of the formatted string, excluding the trailing null, without accounting for truncation. This means that is the return value is greater than or equal to the size parameter, the fw_version string was truncated. Reported-by: David Laight Closes: https://lore.kernel.org/netdev/81cae734ee1b4cde9b380a9a31006c1a@AcuMS.aculab.com/ Link: https://docs.kernel.org/core-api/kernel-api.html#c.snprintf Fixes: 41e63c2baa11 ("net/mlx5e: Check return value of snprintf writing to fw_version buffer") Signed-off-by: Rahul Rameshbabu Reviewed-by: Simon Horman Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 7c66bd73ddfa..38263d5c98b3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -49,7 +49,7 @@ void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, count = snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), "%d.%d.%04d (%.16s)", fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev), mdev->board_id); - if (count == sizeof(drvinfo->fw_version)) + if (count >= sizeof(drvinfo->fw_version)) snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), "%d.%d.%04d", fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev)); From 38de00323785129a1c888dbcf7de007607464261 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Tue, 21 Nov 2023 15:00:22 -0800 Subject: [PATCH 1231/1397] net/mlx5e: Correct snprintf truncation handling for fw_version buffer used by representors [ Upstream commit b13559b76157de9d74f04d3ca0e49d69de3b5675 ] snprintf returns the length of the formatted string, excluding the trailing null, without accounting for truncation. This means that is the return value is greater than or equal to the size parameter, the fw_version string was truncated. Link: https://docs.kernel.org/core-api/kernel-api.html#c.snprintf Fixes: 1b2bd0c0264f ("net/mlx5e: Check return value of snprintf writing to fw_version buffer for representors") Signed-off-by: Rahul Rameshbabu Reviewed-by: Simon Horman Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 007cb167cabc..751d3ffcd2f6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -78,7 +78,7 @@ static void mlx5e_rep_get_drvinfo(struct net_device *dev, count = snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), "%d.%d.%04d (%.16s)", fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev), mdev->board_id); - if (count == sizeof(drvinfo->fw_version)) + if (count >= sizeof(drvinfo->fw_version)) snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), "%d.%d.%04d", fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev)); From 30108546942ee14673800e163a7697b4bae610a3 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 14 Dec 2023 02:09:01 +0200 Subject: [PATCH 1232/1397] net: mscc: ocelot: fix eMAC TX RMON stats for bucket 256-511 and above [ Upstream commit 52eda4641d041667fa059f4855c5f88dcebd8afe ] There is a typo in the driver due to which we report incorrect TX RMON counters for the 256-511 octet bucket and all the other buckets larger than that. Bug found with the selftest at https://patchwork.kernel.org/project/netdevbpf/patch/20231211223346.2497157-9-tobias@waldekranz.com/ Fixes: e32036e1ae7b ("net: mscc: ocelot: add support for all sorts of standardized counters present in DSA") Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20231214000902.545625-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mscc/ocelot_stats.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot_stats.c b/drivers/net/ethernet/mscc/ocelot_stats.c index 5c55197c7327..f29fa37263da 100644 --- a/drivers/net/ethernet/mscc/ocelot_stats.c +++ b/drivers/net/ethernet/mscc/ocelot_stats.c @@ -582,10 +582,10 @@ static void ocelot_port_rmon_stats_cb(struct ocelot *ocelot, int port, void *pri rmon_stats->hist_tx[0] = s[OCELOT_STAT_TX_64]; rmon_stats->hist_tx[1] = s[OCELOT_STAT_TX_65_127]; rmon_stats->hist_tx[2] = s[OCELOT_STAT_TX_128_255]; - rmon_stats->hist_tx[3] = s[OCELOT_STAT_TX_128_255]; - rmon_stats->hist_tx[4] = s[OCELOT_STAT_TX_256_511]; - rmon_stats->hist_tx[5] = s[OCELOT_STAT_TX_512_1023]; - rmon_stats->hist_tx[6] = s[OCELOT_STAT_TX_1024_1526]; + rmon_stats->hist_tx[3] = s[OCELOT_STAT_TX_256_511]; + rmon_stats->hist_tx[4] = s[OCELOT_STAT_TX_512_1023]; + rmon_stats->hist_tx[5] = s[OCELOT_STAT_TX_1024_1526]; + rmon_stats->hist_tx[6] = s[OCELOT_STAT_TX_1527_MAX]; } static void ocelot_port_pmac_rmon_stats_cb(struct ocelot *ocelot, int port, From fd0f5c1a314c352aac1b45a464edcd4cd323e1b4 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 14 Dec 2023 02:09:02 +0200 Subject: [PATCH 1233/1397] net: mscc: ocelot: fix pMAC TX RMON stats for bucket 256-511 and above [ Upstream commit 70f010da00f90415296f93fb47a561977eae41cb ] The typo from ocelot_port_rmon_stats_cb() was also carried over to ocelot_port_pmac_rmon_stats_cb() as well, leading to incorrect TX RMON stats for the pMAC too. Fixes: ab3f97a9610a ("net: mscc: ocelot: export ethtool MAC Merge stats for Felix VSC9959") Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20231214000902.545625-2-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mscc/ocelot_stats.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot_stats.c b/drivers/net/ethernet/mscc/ocelot_stats.c index f29fa37263da..c018783757fb 100644 --- a/drivers/net/ethernet/mscc/ocelot_stats.c +++ b/drivers/net/ethernet/mscc/ocelot_stats.c @@ -610,10 +610,10 @@ static void ocelot_port_pmac_rmon_stats_cb(struct ocelot *ocelot, int port, rmon_stats->hist_tx[0] = s[OCELOT_STAT_TX_PMAC_64]; rmon_stats->hist_tx[1] = s[OCELOT_STAT_TX_PMAC_65_127]; rmon_stats->hist_tx[2] = s[OCELOT_STAT_TX_PMAC_128_255]; - rmon_stats->hist_tx[3] = s[OCELOT_STAT_TX_PMAC_128_255]; - rmon_stats->hist_tx[4] = s[OCELOT_STAT_TX_PMAC_256_511]; - rmon_stats->hist_tx[5] = s[OCELOT_STAT_TX_PMAC_512_1023]; - rmon_stats->hist_tx[6] = s[OCELOT_STAT_TX_PMAC_1024_1526]; + rmon_stats->hist_tx[3] = s[OCELOT_STAT_TX_PMAC_256_511]; + rmon_stats->hist_tx[4] = s[OCELOT_STAT_TX_PMAC_512_1023]; + rmon_stats->hist_tx[5] = s[OCELOT_STAT_TX_PMAC_1024_1526]; + rmon_stats->hist_tx[6] = s[OCELOT_STAT_TX_PMAC_1527_MAX]; } void ocelot_port_get_rmon_stats(struct ocelot *ocelot, int port, From 1e5283b960194bf5cb7b9f581d9bcbdf2af58b7f Mon Sep 17 00:00:00 2001 From: Suman Ghosh Date: Wed, 13 Dec 2023 23:40:44 +0530 Subject: [PATCH 1234/1397] octeontx2-pf: Fix graceful exit during PFC configuration failure [ Upstream commit 8c97ab5448f2096daba11edf8d18a44e1eb6f31d ] During PFC configuration failure the code was not handling a graceful exit. This patch fixes the same and add proper code for a graceful exit. Fixes: 99c969a83d82 ("octeontx2-pf: Add egress PFC support") Signed-off-by: Suman Ghosh Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../ethernet/marvell/octeontx2/nic/otx2_dcbnl.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c index bfddbff7bcdf..28fb643d2917 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c @@ -399,9 +399,10 @@ static int otx2_dcbnl_ieee_getpfc(struct net_device *dev, struct ieee_pfc *pfc) static int otx2_dcbnl_ieee_setpfc(struct net_device *dev, struct ieee_pfc *pfc) { struct otx2_nic *pfvf = netdev_priv(dev); + u8 old_pfc_en; int err; - /* Save PFC configuration to interface */ + old_pfc_en = pfvf->pfc_en; pfvf->pfc_en = pfc->pfc_en; if (pfvf->hw.tx_queues >= NIX_PF_PFC_PRIO_MAX) @@ -411,13 +412,17 @@ static int otx2_dcbnl_ieee_setpfc(struct net_device *dev, struct ieee_pfc *pfc) * supported by the tx queue configuration */ err = otx2_check_pfc_config(pfvf); - if (err) + if (err) { + pfvf->pfc_en = old_pfc_en; return err; + } process_pfc: err = otx2_config_priority_flow_ctrl(pfvf); - if (err) + if (err) { + pfvf->pfc_en = old_pfc_en; return err; + } /* Request Per channel Bpids */ if (pfc->pfc_en) @@ -425,6 +430,12 @@ static int otx2_dcbnl_ieee_setpfc(struct net_device *dev, struct ieee_pfc *pfc) err = otx2_pfc_txschq_update(pfvf); if (err) { + if (pfc->pfc_en) + otx2_nix_config_bp(pfvf, false); + + otx2_pfc_txschq_stop(pfvf); + pfvf->pfc_en = old_pfc_en; + otx2_config_priority_flow_ctrl(pfvf); dev_err(pfvf->dev, "%s failed to update TX schedulers\n", __func__); return err; } From 2ef87ac54cf8ad5fc472ac9a3e6f6d739fe71ca0 Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Thu, 14 Dec 2023 14:09:22 +0900 Subject: [PATCH 1235/1397] net: Return error from sk_stream_wait_connect() if sk_wait_event() fails [ Upstream commit cac23b7d7627915d967ce25436d7aae26e88ed06 ] The following NULL pointer dereference issue occurred: BUG: kernel NULL pointer dereference, address: 0000000000000000 <...> RIP: 0010:ccid_hc_tx_send_packet net/dccp/ccid.h:166 [inline] RIP: 0010:dccp_write_xmit+0x49/0x140 net/dccp/output.c:356 <...> Call Trace: dccp_sendmsg+0x642/0x7e0 net/dccp/proto.c:801 inet_sendmsg+0x63/0x90 net/ipv4/af_inet.c:846 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x83/0xe0 net/socket.c:745 ____sys_sendmsg+0x443/0x510 net/socket.c:2558 ___sys_sendmsg+0xe5/0x150 net/socket.c:2612 __sys_sendmsg+0xa6/0x120 net/socket.c:2641 __do_sys_sendmsg net/socket.c:2650 [inline] __se_sys_sendmsg net/socket.c:2648 [inline] __x64_sys_sendmsg+0x45/0x50 net/socket.c:2648 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x43/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b sk_wait_event() returns an error (-EPIPE) if disconnect() is called on the socket waiting for the event. However, sk_stream_wait_connect() returns success, i.e. zero, even if sk_wait_event() returns -EPIPE, so a function that waits for a connection with sk_stream_wait_connect() may misbehave. In the case of the above DCCP issue, dccp_sendmsg() is waiting for the connection. If disconnect() is called in concurrently, the above issue occurs. This patch fixes the issue by returning error from sk_stream_wait_connect() if sk_wait_event() fails. Fixes: 419ce133ab92 ("tcp: allow again tcp_disconnect() when threads are waiting") Signed-off-by: Shigeru Yoshida Reviewed-by: Kuniyuki Iwashima Reported-by: syzbot+c71bc336c5061153b502@syzkaller.appspotmail.com Reviewed-by: Eric Dumazet Reported-by: syzbot Reported-by: syzkaller Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/core/stream.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/stream.c b/net/core/stream.c index 96fbcb9bbb30..b16dfa568a2d 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -79,7 +79,7 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p) remove_wait_queue(sk_sleep(sk), &wait); sk->sk_write_pending--; } while (!done); - return 0; + return done < 0 ? done : 0; } EXPORT_SYMBOL(sk_stream_wait_connect); From 2839a639db5e2204ded242d6d56a66d35250e24b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 14 Dec 2023 11:30:38 +0000 Subject: [PATCH 1236/1397] net: sched: ife: fix potential use-after-free [ Upstream commit 19391a2ca98baa7b80279306cdf7dd43f81fa595 ] ife_decode() calls pskb_may_pull() two times, we need to reload ifehdr after the second one, or risk use-after-free as reported by syzbot: BUG: KASAN: slab-use-after-free in __ife_tlv_meta_valid net/ife/ife.c:108 [inline] BUG: KASAN: slab-use-after-free in ife_tlv_meta_decode+0x1d1/0x210 net/ife/ife.c:131 Read of size 2 at addr ffff88802d7300a4 by task syz-executor.5/22323 CPU: 0 PID: 22323 Comm: syz-executor.5 Not tainted 6.7.0-rc3-syzkaller-00804-g074ac38d5b95 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/10/2023 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd9/0x1b0 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:364 [inline] print_report+0xc4/0x620 mm/kasan/report.c:475 kasan_report+0xda/0x110 mm/kasan/report.c:588 __ife_tlv_meta_valid net/ife/ife.c:108 [inline] ife_tlv_meta_decode+0x1d1/0x210 net/ife/ife.c:131 tcf_ife_decode net/sched/act_ife.c:739 [inline] tcf_ife_act+0x4e3/0x1cd0 net/sched/act_ife.c:879 tc_act include/net/tc_wrapper.h:221 [inline] tcf_action_exec+0x1ac/0x620 net/sched/act_api.c:1079 tcf_exts_exec include/net/pkt_cls.h:344 [inline] mall_classify+0x201/0x310 net/sched/cls_matchall.c:42 tc_classify include/net/tc_wrapper.h:227 [inline] __tcf_classify net/sched/cls_api.c:1703 [inline] tcf_classify+0x82f/0x1260 net/sched/cls_api.c:1800 hfsc_classify net/sched/sch_hfsc.c:1147 [inline] hfsc_enqueue+0x315/0x1060 net/sched/sch_hfsc.c:1546 dev_qdisc_enqueue+0x3f/0x230 net/core/dev.c:3739 __dev_xmit_skb net/core/dev.c:3828 [inline] __dev_queue_xmit+0x1de1/0x3d30 net/core/dev.c:4311 dev_queue_xmit include/linux/netdevice.h:3165 [inline] packet_xmit+0x237/0x350 net/packet/af_packet.c:276 packet_snd net/packet/af_packet.c:3081 [inline] packet_sendmsg+0x24aa/0x5200 net/packet/af_packet.c:3113 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0xd5/0x180 net/socket.c:745 __sys_sendto+0x255/0x340 net/socket.c:2190 __do_sys_sendto net/socket.c:2202 [inline] __se_sys_sendto net/socket.c:2198 [inline] __x64_sys_sendto+0xe0/0x1b0 net/socket.c:2198 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x40/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b RIP: 0033:0x7fe9acc7cae9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 e1 20 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fe9ada450c8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 00007fe9acd9bf80 RCX: 00007fe9acc7cae9 RDX: 000000000000fce0 RSI: 00000000200002c0 RDI: 0000000000000003 RBP: 00007fe9accc847a R08: 0000000020000140 R09: 0000000000000014 R10: 0000000000000004 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000000000b R14: 00007fe9acd9bf80 R15: 00007ffd5427ae78 Allocated by task 22323: kasan_save_stack+0x33/0x50 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 ____kasan_kmalloc mm/kasan/common.c:374 [inline] __kasan_kmalloc+0xa2/0xb0 mm/kasan/common.c:383 kasan_kmalloc include/linux/kasan.h:198 [inline] __do_kmalloc_node mm/slab_common.c:1007 [inline] __kmalloc_node_track_caller+0x5a/0x90 mm/slab_common.c:1027 kmalloc_reserve+0xef/0x260 net/core/skbuff.c:582 __alloc_skb+0x12b/0x330 net/core/skbuff.c:651 alloc_skb include/linux/skbuff.h:1298 [inline] alloc_skb_with_frags+0xe4/0x710 net/core/skbuff.c:6331 sock_alloc_send_pskb+0x7e4/0x970 net/core/sock.c:2780 packet_alloc_skb net/packet/af_packet.c:2930 [inline] packet_snd net/packet/af_packet.c:3024 [inline] packet_sendmsg+0x1e2a/0x5200 net/packet/af_packet.c:3113 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0xd5/0x180 net/socket.c:745 __sys_sendto+0x255/0x340 net/socket.c:2190 __do_sys_sendto net/socket.c:2202 [inline] __se_sys_sendto net/socket.c:2198 [inline] __x64_sys_sendto+0xe0/0x1b0 net/socket.c:2198 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x40/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b Freed by task 22323: kasan_save_stack+0x33/0x50 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 kasan_save_free_info+0x2b/0x40 mm/kasan/generic.c:522 ____kasan_slab_free mm/kasan/common.c:236 [inline] ____kasan_slab_free+0x15b/0x1b0 mm/kasan/common.c:200 kasan_slab_free include/linux/kasan.h:164 [inline] slab_free_hook mm/slub.c:1800 [inline] slab_free_freelist_hook+0x114/0x1e0 mm/slub.c:1826 slab_free mm/slub.c:3809 [inline] __kmem_cache_free+0xc0/0x180 mm/slub.c:3822 skb_kfree_head net/core/skbuff.c:950 [inline] skb_free_head+0x110/0x1b0 net/core/skbuff.c:962 pskb_expand_head+0x3c5/0x1170 net/core/skbuff.c:2130 __pskb_pull_tail+0xe1/0x1830 net/core/skbuff.c:2655 pskb_may_pull_reason include/linux/skbuff.h:2685 [inline] pskb_may_pull include/linux/skbuff.h:2693 [inline] ife_decode+0x394/0x4f0 net/ife/ife.c:82 tcf_ife_decode net/sched/act_ife.c:727 [inline] tcf_ife_act+0x43b/0x1cd0 net/sched/act_ife.c:879 tc_act include/net/tc_wrapper.h:221 [inline] tcf_action_exec+0x1ac/0x620 net/sched/act_api.c:1079 tcf_exts_exec include/net/pkt_cls.h:344 [inline] mall_classify+0x201/0x310 net/sched/cls_matchall.c:42 tc_classify include/net/tc_wrapper.h:227 [inline] __tcf_classify net/sched/cls_api.c:1703 [inline] tcf_classify+0x82f/0x1260 net/sched/cls_api.c:1800 hfsc_classify net/sched/sch_hfsc.c:1147 [inline] hfsc_enqueue+0x315/0x1060 net/sched/sch_hfsc.c:1546 dev_qdisc_enqueue+0x3f/0x230 net/core/dev.c:3739 __dev_xmit_skb net/core/dev.c:3828 [inline] __dev_queue_xmit+0x1de1/0x3d30 net/core/dev.c:4311 dev_queue_xmit include/linux/netdevice.h:3165 [inline] packet_xmit+0x237/0x350 net/packet/af_packet.c:276 packet_snd net/packet/af_packet.c:3081 [inline] packet_sendmsg+0x24aa/0x5200 net/packet/af_packet.c:3113 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0xd5/0x180 net/socket.c:745 __sys_sendto+0x255/0x340 net/socket.c:2190 __do_sys_sendto net/socket.c:2202 [inline] __se_sys_sendto net/socket.c:2198 [inline] __x64_sys_sendto+0xe0/0x1b0 net/socket.c:2198 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x40/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b The buggy address belongs to the object at ffff88802d730000 which belongs to the cache kmalloc-8k of size 8192 The buggy address is located 164 bytes inside of freed 8192-byte region [ffff88802d730000, ffff88802d732000) The buggy address belongs to the physical page: page:ffffea0000b5cc00 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x2d730 head:ffffea0000b5cc00 order:3 entire_mapcount:0 nr_pages_mapped:0 pincount:0 flags: 0xfff00000000840(slab|head|node=0|zone=1|lastcpupid=0x7ff) page_type: 0xffffffff() raw: 00fff00000000840 ffff888013042280 dead000000000122 0000000000000000 raw: 0000000000000000 0000000080020002 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 3, migratetype Unmovable, gfp_mask 0x1d20c0(__GFP_IO|__GFP_FS|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC|__GFP_HARDWALL), pid 22323, tgid 22320 (syz-executor.5), ts 950317230369, free_ts 950233467461 set_page_owner include/linux/page_owner.h:31 [inline] post_alloc_hook+0x2d0/0x350 mm/page_alloc.c:1544 prep_new_page mm/page_alloc.c:1551 [inline] get_page_from_freelist+0xa28/0x3730 mm/page_alloc.c:3319 __alloc_pages+0x22e/0x2420 mm/page_alloc.c:4575 alloc_pages_mpol+0x258/0x5f0 mm/mempolicy.c:2133 alloc_slab_page mm/slub.c:1870 [inline] allocate_slab mm/slub.c:2017 [inline] new_slab+0x283/0x3c0 mm/slub.c:2070 ___slab_alloc+0x979/0x1500 mm/slub.c:3223 __slab_alloc.constprop.0+0x56/0xa0 mm/slub.c:3322 __slab_alloc_node mm/slub.c:3375 [inline] slab_alloc_node mm/slub.c:3468 [inline] __kmem_cache_alloc_node+0x131/0x310 mm/slub.c:3517 __do_kmalloc_node mm/slab_common.c:1006 [inline] __kmalloc_node_track_caller+0x4a/0x90 mm/slab_common.c:1027 kmalloc_reserve+0xef/0x260 net/core/skbuff.c:582 __alloc_skb+0x12b/0x330 net/core/skbuff.c:651 alloc_skb include/linux/skbuff.h:1298 [inline] alloc_skb_with_frags+0xe4/0x710 net/core/skbuff.c:6331 sock_alloc_send_pskb+0x7e4/0x970 net/core/sock.c:2780 packet_alloc_skb net/packet/af_packet.c:2930 [inline] packet_snd net/packet/af_packet.c:3024 [inline] packet_sendmsg+0x1e2a/0x5200 net/packet/af_packet.c:3113 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0xd5/0x180 net/socket.c:745 __sys_sendto+0x255/0x340 net/socket.c:2190 page last free stack trace: reset_page_owner include/linux/page_owner.h:24 [inline] free_pages_prepare mm/page_alloc.c:1144 [inline] free_unref_page_prepare+0x53c/0xb80 mm/page_alloc.c:2354 free_unref_page+0x33/0x3b0 mm/page_alloc.c:2494 __unfreeze_partials+0x226/0x240 mm/slub.c:2655 qlink_free mm/kasan/quarantine.c:168 [inline] qlist_free_all+0x6a/0x170 mm/kasan/quarantine.c:187 kasan_quarantine_reduce+0x18e/0x1d0 mm/kasan/quarantine.c:294 __kasan_slab_alloc+0x65/0x90 mm/kasan/common.c:305 kasan_slab_alloc include/linux/kasan.h:188 [inline] slab_post_alloc_hook mm/slab.h:763 [inline] slab_alloc_node mm/slub.c:3478 [inline] slab_alloc mm/slub.c:3486 [inline] __kmem_cache_alloc_lru mm/slub.c:3493 [inline] kmem_cache_alloc_lru+0x219/0x6f0 mm/slub.c:3509 alloc_inode_sb include/linux/fs.h:2937 [inline] ext4_alloc_inode+0x28/0x650 fs/ext4/super.c:1408 alloc_inode+0x5d/0x220 fs/inode.c:261 new_inode_pseudo fs/inode.c:1006 [inline] new_inode+0x22/0x260 fs/inode.c:1032 __ext4_new_inode+0x333/0x5200 fs/ext4/ialloc.c:958 ext4_symlink+0x5d7/0xa20 fs/ext4/namei.c:3398 vfs_symlink fs/namei.c:4464 [inline] vfs_symlink+0x3e5/0x620 fs/namei.c:4448 do_symlinkat+0x25f/0x310 fs/namei.c:4490 __do_sys_symlinkat fs/namei.c:4506 [inline] __se_sys_symlinkat fs/namei.c:4503 [inline] __x64_sys_symlinkat+0x97/0xc0 fs/namei.c:4503 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x40/0x110 arch/x86/entry/common.c:82 Fixes: d57493d6d1be ("net: sched: ife: check on metadata length") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Jamal Hadi Salim Cc: Alexander Aring Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ife/ife.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ife/ife.c b/net/ife/ife.c index 13bbf8cb6a39..be05b690b9ef 100644 --- a/net/ife/ife.c +++ b/net/ife/ife.c @@ -82,6 +82,7 @@ void *ife_decode(struct sk_buff *skb, u16 *metalen) if (unlikely(!pskb_may_pull(skb, total_pull))) return NULL; + ifehdr = (struct ifeheadr *)(skb->data + skb->dev->hard_header_len); skb_set_mac_header(skb, total_pull); __skb_pull(skb, total_pull); *metalen = ifehdrln - IFE_METAHDRLEN; From 73e159a240d78687610054d988e428a46eba94eb Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Thu, 14 Dec 2023 21:04:04 +0800 Subject: [PATCH 1237/1397] ethernet: atheros: fix a memleak in atl1e_setup_ring_resources [ Upstream commit 309fdb1c33fe726d92d0030481346f24e1b01f07 ] In the error handling of 'offset > adapter->ring_size', the tx_ring->tx_buffer allocated by kzalloc should be freed, instead of 'goto failed' instantly. Fixes: a6a5325239c2 ("atl1e: Atheros L1E Gigabit Ethernet driver") Signed-off-by: Zhipeng Lu Reviewed-by: Suman Ghosh Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/atheros/atl1e/atl1e_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c index 5935be190b9e..5f2a6fcba967 100644 --- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c +++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c @@ -866,10 +866,13 @@ static int atl1e_setup_ring_resources(struct atl1e_adapter *adapter) netdev_err(adapter->netdev, "offset(%d) > ring size(%d) !!\n", offset, adapter->ring_size); err = -1; - goto failed; + goto free_buffer; } return 0; +free_buffer: + kfree(tx_ring->tx_buffer); + tx_ring->tx_buffer = NULL; failed: if (adapter->ring_vir_addr != NULL) { dma_free_coherent(&pdev->dev, adapter->ring_size, From ffced26692f83212aa09d0ece0213b23cc2f611d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 14 Dec 2023 15:27:47 +0000 Subject: [PATCH 1238/1397] net/rose: fix races in rose_kill_by_device() [ Upstream commit 64b8bc7d5f1434c636a40bdcfcd42b278d1714be ] syzbot found an interesting netdev refcounting issue in net/rose/af_rose.c, thanks to CONFIG_NET_DEV_REFCNT_TRACKER=y [1] Problem is that rose_kill_by_device() can change rose->device while other threads do not expect the pointer to be changed. We have to first collect sockets in a temporary array, then perform the changes while holding the socket lock and rose_list_lock spinlock (in this order) Change rose_release() to also acquire rose_list_lock before releasing the netdev refcount. [1] [ 1185.055088][ T7889] ref_tracker: reference already released. [ 1185.061476][ T7889] ref_tracker: allocated in: [ 1185.066081][ T7889] rose_bind+0x4ab/0xd10 [ 1185.070446][ T7889] __sys_bind+0x1ec/0x220 [ 1185.074818][ T7889] __x64_sys_bind+0x72/0xb0 [ 1185.079356][ T7889] do_syscall_64+0x40/0x110 [ 1185.083897][ T7889] entry_SYSCALL_64_after_hwframe+0x63/0x6b [ 1185.089835][ T7889] ref_tracker: freed in: [ 1185.094088][ T7889] rose_release+0x2f5/0x570 [ 1185.098629][ T7889] __sock_release+0xae/0x260 [ 1185.103262][ T7889] sock_close+0x1c/0x20 [ 1185.107453][ T7889] __fput+0x270/0xbb0 [ 1185.111467][ T7889] task_work_run+0x14d/0x240 [ 1185.116085][ T7889] get_signal+0x106f/0x2790 [ 1185.120622][ T7889] arch_do_signal_or_restart+0x90/0x7f0 [ 1185.126205][ T7889] exit_to_user_mode_prepare+0x121/0x240 [ 1185.131846][ T7889] syscall_exit_to_user_mode+0x1e/0x60 [ 1185.137293][ T7889] do_syscall_64+0x4d/0x110 [ 1185.141783][ T7889] entry_SYSCALL_64_after_hwframe+0x63/0x6b [ 1185.148085][ T7889] ------------[ cut here ]------------ WARNING: CPU: 1 PID: 7889 at lib/ref_tracker.c:255 ref_tracker_free+0x61a/0x810 lib/ref_tracker.c:255 Modules linked in: CPU: 1 PID: 7889 Comm: syz-executor.2 Not tainted 6.7.0-rc4-syzkaller-00162-g65c95f78917e #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/10/2023 RIP: 0010:ref_tracker_free+0x61a/0x810 lib/ref_tracker.c:255 Code: 00 44 8b 6b 18 31 ff 44 89 ee e8 21 62 f5 fc 45 85 ed 0f 85 a6 00 00 00 e8 a3 66 f5 fc 48 8b 34 24 48 89 ef e8 27 5f f1 05 90 <0f> 0b 90 bb ea ff ff ff e9 52 fd ff ff e8 84 66 f5 fc 4c 8d 6d 44 RSP: 0018:ffffc90004917850 EFLAGS: 00010202 RAX: 0000000000000201 RBX: ffff88802618f4c0 RCX: 0000000000000000 RDX: 0000000000000202 RSI: ffffffff8accb920 RDI: 0000000000000001 RBP: ffff8880269ea5b8 R08: 0000000000000001 R09: fffffbfff23e35f6 R10: ffffffff91f1afb7 R11: 0000000000000001 R12: 1ffff92000922f0c R13: 0000000005a2039b R14: ffff88802618f4d8 R15: 00000000ffffffff FS: 00007f0a720ef6c0(0000) GS:ffff8880b9900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f43a819d988 CR3: 0000000076c64000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: netdev_tracker_free include/linux/netdevice.h:4127 [inline] netdev_put include/linux/netdevice.h:4144 [inline] netdev_put include/linux/netdevice.h:4140 [inline] rose_kill_by_device net/rose/af_rose.c:195 [inline] rose_device_event+0x25d/0x330 net/rose/af_rose.c:218 notifier_call_chain+0xb6/0x3b0 kernel/notifier.c:93 call_netdevice_notifiers_info+0xbe/0x130 net/core/dev.c:1967 call_netdevice_notifiers_extack net/core/dev.c:2005 [inline] call_netdevice_notifiers net/core/dev.c:2019 [inline] __dev_notify_flags+0x1f5/0x2e0 net/core/dev.c:8646 dev_change_flags+0x122/0x170 net/core/dev.c:8682 dev_ifsioc+0x9ad/0x1090 net/core/dev_ioctl.c:529 dev_ioctl+0x224/0x1090 net/core/dev_ioctl.c:786 sock_do_ioctl+0x198/0x270 net/socket.c:1234 sock_ioctl+0x22e/0x6b0 net/socket.c:1339 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:871 [inline] __se_sys_ioctl fs/ioctl.c:857 [inline] __x64_sys_ioctl+0x18f/0x210 fs/ioctl.c:857 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0x40/0x110 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b RIP: 0033:0x7f0a7147cba9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 e1 20 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f0a720ef0c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007f0a7159bf80 RCX: 00007f0a7147cba9 RDX: 0000000020000040 RSI: 0000000000008914 RDI: 0000000000000004 RBP: 00007f0a714c847a R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000000000b R14: 00007f0a7159bf80 R15: 00007ffc8bb3a5f8 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Bernard Pidoux Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/rose/af_rose.c | 39 ++++++++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 4a5c2dc8dd7a..42e8b9e37516 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -182,21 +182,47 @@ void rose_kill_by_neigh(struct rose_neigh *neigh) */ static void rose_kill_by_device(struct net_device *dev) { - struct sock *s; + struct sock *sk, *array[16]; + struct rose_sock *rose; + bool rescan; + int i, cnt; +start: + rescan = false; + cnt = 0; spin_lock_bh(&rose_list_lock); - sk_for_each(s, &rose_list) { - struct rose_sock *rose = rose_sk(s); + sk_for_each(sk, &rose_list) { + rose = rose_sk(sk); + if (rose->device == dev) { + if (cnt == ARRAY_SIZE(array)) { + rescan = true; + break; + } + sock_hold(sk); + array[cnt++] = sk; + } + } + spin_unlock_bh(&rose_list_lock); + for (i = 0; i < cnt; i++) { + sk = array[cnt]; + rose = rose_sk(sk); + lock_sock(sk); + spin_lock_bh(&rose_list_lock); if (rose->device == dev) { - rose_disconnect(s, ENETUNREACH, ROSE_OUT_OF_ORDER, 0); + rose_disconnect(sk, ENETUNREACH, ROSE_OUT_OF_ORDER, 0); if (rose->neighbour) rose->neighbour->use--; netdev_put(rose->device, &rose->dev_tracker); rose->device = NULL; } + spin_unlock_bh(&rose_list_lock); + release_sock(sk); + sock_put(sk); + cond_resched(); } - spin_unlock_bh(&rose_list_lock); + if (rescan) + goto start; } /* @@ -656,7 +682,10 @@ static int rose_release(struct socket *sock) break; } + spin_lock_bh(&rose_list_lock); netdev_put(rose->device, &rose->dev_tracker); + rose->device = NULL; + spin_unlock_bh(&rose_list_lock); sock->sk = NULL; release_sock(sk); sock_put(sk); From 399dea9d5ac8b21ccc104236df3ce4cd73031eca Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 23 Oct 2023 16:26:23 -0700 Subject: [PATCH 1239/1397] Bluetooth: Fix not notifying when connection encryption changes [ Upstream commit f67eabffb57d0bee379994a18ec5f462b2cbdf86 ] Some layers such as SMP depend on getting notified about encryption changes immediately as they only allow certain PDU to be transmitted over an encrypted link which may cause SMP implementation to reject valid PDUs received thus causing pairing to fail when it shouldn't. Fixes: 7aca0ac4792e ("Bluetooth: Wait for HCI_OP_WRITE_AUTH_PAYLOAD_TO to complete") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/hci_event.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index f6d3150bcbb0..da756cbf6220 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -820,8 +820,6 @@ static u8 hci_cc_write_auth_payload_timeout(struct hci_dev *hdev, void *data, if (!rp->status) conn->auth_payload_timeout = get_unaligned_le16(sent + 2); - hci_encrypt_cfm(conn, 0); - unlock: hci_dev_unlock(hdev); @@ -3683,12 +3681,8 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, void *data, cp.handle = cpu_to_le16(conn->handle); cp.timeout = cpu_to_le16(hdev->auth_payload_timeout); if (hci_send_cmd(conn->hdev, HCI_OP_WRITE_AUTH_PAYLOAD_TO, - sizeof(cp), &cp)) { + sizeof(cp), &cp)) bt_dev_err(hdev, "write auth payload timeout failed"); - goto notify; - } - - goto unlock; } notify: From 7fe3556f98b1a5ab8e3145eac19fb0c1a47c2d7d Mon Sep 17 00:00:00 2001 From: Ying Hsu Date: Fri, 10 Nov 2023 01:46:05 +0000 Subject: [PATCH 1240/1397] Bluetooth: Fix deadlock in vhci_send_frame [ Upstream commit 769bf60e17ee1a56a81e7c031192c3928312c52e ] syzbot found a potential circular dependency leading to a deadlock: -> #3 (&hdev->req_lock){+.+.}-{3:3}: __mutex_lock_common+0x1b6/0x1bc2 kernel/locking/mutex.c:599 __mutex_lock kernel/locking/mutex.c:732 [inline] mutex_lock_nested+0x17/0x1c kernel/locking/mutex.c:784 hci_dev_do_close+0x3f/0x9f net/bluetooth/hci_core.c:551 hci_rfkill_set_block+0x130/0x1ac net/bluetooth/hci_core.c:935 rfkill_set_block+0x1e6/0x3b8 net/rfkill/core.c:345 rfkill_fop_write+0x2d8/0x672 net/rfkill/core.c:1274 vfs_write+0x277/0xcf5 fs/read_write.c:594 ksys_write+0x19b/0x2bd fs/read_write.c:650 do_syscall_x64 arch/x86/entry/common.c:55 [inline] do_syscall_64+0x51/0xba arch/x86/entry/common.c:93 entry_SYSCALL_64_after_hwframe+0x61/0xcb -> #2 (rfkill_global_mutex){+.+.}-{3:3}: __mutex_lock_common+0x1b6/0x1bc2 kernel/locking/mutex.c:599 __mutex_lock kernel/locking/mutex.c:732 [inline] mutex_lock_nested+0x17/0x1c kernel/locking/mutex.c:784 rfkill_register+0x30/0x7e3 net/rfkill/core.c:1045 hci_register_dev+0x48f/0x96d net/bluetooth/hci_core.c:2622 __vhci_create_device drivers/bluetooth/hci_vhci.c:341 [inline] vhci_create_device+0x3ad/0x68f drivers/bluetooth/hci_vhci.c:374 vhci_get_user drivers/bluetooth/hci_vhci.c:431 [inline] vhci_write+0x37b/0x429 drivers/bluetooth/hci_vhci.c:511 call_write_iter include/linux/fs.h:2109 [inline] new_sync_write fs/read_write.c:509 [inline] vfs_write+0xaa8/0xcf5 fs/read_write.c:596 ksys_write+0x19b/0x2bd fs/read_write.c:650 do_syscall_x64 arch/x86/entry/common.c:55 [inline] do_syscall_64+0x51/0xba arch/x86/entry/common.c:93 entry_SYSCALL_64_after_hwframe+0x61/0xcb -> #1 (&data->open_mutex){+.+.}-{3:3}: __mutex_lock_common+0x1b6/0x1bc2 kernel/locking/mutex.c:599 __mutex_lock kernel/locking/mutex.c:732 [inline] mutex_lock_nested+0x17/0x1c kernel/locking/mutex.c:784 vhci_send_frame+0x68/0x9c drivers/bluetooth/hci_vhci.c:75 hci_send_frame+0x1cc/0x2ff net/bluetooth/hci_core.c:2989 hci_sched_acl_pkt net/bluetooth/hci_core.c:3498 [inline] hci_sched_acl net/bluetooth/hci_core.c:3583 [inline] hci_tx_work+0xb94/0x1a60 net/bluetooth/hci_core.c:3654 process_one_work+0x901/0xfb8 kernel/workqueue.c:2310 worker_thread+0xa67/0x1003 kernel/workqueue.c:2457 kthread+0x36a/0x430 kernel/kthread.c:319 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:298 -> #0 ((work_completion)(&hdev->tx_work)){+.+.}-{0:0}: check_prev_add kernel/locking/lockdep.c:3053 [inline] check_prevs_add kernel/locking/lockdep.c:3172 [inline] validate_chain kernel/locking/lockdep.c:3787 [inline] __lock_acquire+0x2d32/0x77fa kernel/locking/lockdep.c:5011 lock_acquire+0x273/0x4d5 kernel/locking/lockdep.c:5622 __flush_work+0xee/0x19f kernel/workqueue.c:3090 hci_dev_close_sync+0x32f/0x1113 net/bluetooth/hci_sync.c:4352 hci_dev_do_close+0x47/0x9f net/bluetooth/hci_core.c:553 hci_rfkill_set_block+0x130/0x1ac net/bluetooth/hci_core.c:935 rfkill_set_block+0x1e6/0x3b8 net/rfkill/core.c:345 rfkill_fop_write+0x2d8/0x672 net/rfkill/core.c:1274 vfs_write+0x277/0xcf5 fs/read_write.c:594 ksys_write+0x19b/0x2bd fs/read_write.c:650 do_syscall_x64 arch/x86/entry/common.c:55 [inline] do_syscall_64+0x51/0xba arch/x86/entry/common.c:93 entry_SYSCALL_64_after_hwframe+0x61/0xcb This change removes the need for acquiring the open_mutex in vhci_send_frame, thus eliminating the potential deadlock while maintaining the required packet ordering. Fixes: 92d4abd66f70 ("Bluetooth: vhci: Fix race when opening vhci device") Signed-off-by: Ying Hsu Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- drivers/bluetooth/hci_vhci.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c index f3892e9ce800..572d68d52965 100644 --- a/drivers/bluetooth/hci_vhci.c +++ b/drivers/bluetooth/hci_vhci.c @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -44,6 +45,7 @@ struct vhci_data { bool wakeup; __u16 msft_opcode; bool aosp_capable; + atomic_t initialized; }; static int vhci_open_dev(struct hci_dev *hdev) @@ -75,11 +77,10 @@ static int vhci_send_frame(struct hci_dev *hdev, struct sk_buff *skb) memcpy(skb_push(skb, 1), &hci_skb_pkt_type(skb), 1); - mutex_lock(&data->open_mutex); skb_queue_tail(&data->readq, skb); - mutex_unlock(&data->open_mutex); - wake_up_interruptible(&data->read_wait); + if (atomic_read(&data->initialized)) + wake_up_interruptible(&data->read_wait); return 0; } @@ -464,7 +465,8 @@ static int __vhci_create_device(struct vhci_data *data, __u8 opcode) skb_put_u8(skb, 0xff); skb_put_u8(skb, opcode); put_unaligned_le16(hdev->id, skb_put(skb, 2)); - skb_queue_tail(&data->readq, skb); + skb_queue_head(&data->readq, skb); + atomic_inc(&data->initialized); wake_up_interruptible(&data->read_wait); return 0; From 7ee2ba3dd66b97f14c5b4a7867af87a94490993a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 22 Nov 2023 23:17:44 +0100 Subject: [PATCH 1241/1397] Bluetooth: hci_event: shut up a false-positive warning [ Upstream commit a5812c68d849505ea657f653446512b85887f813 ] Turning on -Wstringop-overflow globally exposed a misleading compiler warning in bluetooth: net/bluetooth/hci_event.c: In function 'hci_cc_read_class_of_dev': net/bluetooth/hci_event.c:524:9: error: 'memcpy' writing 3 bytes into a region of size 0 overflows the destination [-Werror=stringop-overflow=] 524 | memcpy(hdev->dev_class, rp->dev_class, 3); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The problem here is the check for hdev being NULL in bt_dev_dbg() that leads the compiler to conclude that hdev->dev_class might be an invalid pointer access. Add another explicit check for the same condition to make sure gcc sees this cannot happen. Fixes: a9de9248064b ("[Bluetooth] Switch from OGF+OCF to using only opcodes") Fixes: 1b56c90018f0 ("Makefile: Enable -Wstringop-overflow globally") Signed-off-by: Arnd Bergmann Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/hci_event.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index da756cbf6220..3661f8cdbab7 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -516,6 +516,9 @@ static u8 hci_cc_read_class_of_dev(struct hci_dev *hdev, void *data, { struct hci_rp_read_class_of_dev *rp = data; + if (WARN_ON(!hdev)) + return HCI_ERROR_UNSPECIFIED; + bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); if (rp->status) From a07a95bcb966b238a3d2b92c9fe1fc684c0910c4 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 8 Dec 2023 17:22:29 -0500 Subject: [PATCH 1242/1397] Bluetooth: hci_core: Fix hci_conn_hash_lookup_cis [ Upstream commit 50efc63d1a7a7b9a6ed21adae1b9a7123ec8abc0 ] hci_conn_hash_lookup_cis shall always match the requested CIG and CIS ids even when they are unset as otherwise it result in not being able to bind/connect different sockets to the same address as that would result in having multiple sockets mapping to the same hci_conn which doesn't really work and prevents BAP audio configuration such as AC 6(i) when CIG and CIS are left unset. Fixes: c14516faede3 ("Bluetooth: hci_conn: Fix not matching by CIS ID") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- include/net/bluetooth/hci_core.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 7fa95b72e5c8..22ce39a2aa7b 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1227,11 +1227,11 @@ static inline struct hci_conn *hci_conn_hash_lookup_cis(struct hci_dev *hdev, continue; /* Match CIG ID if set */ - if (cig != BT_ISO_QOS_CIG_UNSET && cig != c->iso_qos.ucast.cig) + if (cig != c->iso_qos.ucast.cig) continue; /* Match CIS ID if set */ - if (id != BT_ISO_QOS_CIS_UNSET && id != c->iso_qos.ucast.cis) + if (id != c->iso_qos.ucast.cis) continue; /* Match destination address if set */ From f0534c0fa81b281bf515a094b854207d8b2b7fe3 Mon Sep 17 00:00:00 2001 From: Andy Gospodarek Date: Thu, 14 Dec 2023 13:31:38 -0800 Subject: [PATCH 1243/1397] bnxt_en: do not map packet buffers twice [ Upstream commit 23c93c3b6275a59f2a685f4a693944b53c31df4e ] Remove double-mapping of DMA buffers as it can prevent page pool entries from being freed. Mapping is managed by page pool infrastructure and was previously managed by the driver in __bnxt_alloc_rx_page before allowing the page pool infrastructure to manage it. Fixes: 578fcfd26e2a ("bnxt_en: Let the page pool manage the DMA mapping") Reviewed-by: Somnath Kotur Signed-off-by: Andy Gospodarek Signed-off-by: Michael Chan Reviewed-by: David Wei Link: https://lore.kernel.org/r/20231214213138.98095-1-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 96f5ca778c67..8cb9a99154aa 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -59,7 +59,6 @@ struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp, for (i = 0; i < num_frags ; i++) { skb_frag_t *frag = &sinfo->frags[i]; struct bnxt_sw_tx_bd *frag_tx_buf; - struct pci_dev *pdev = bp->pdev; dma_addr_t frag_mapping; int frag_len; @@ -73,16 +72,10 @@ struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp, txbd = &txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)]; frag_len = skb_frag_size(frag); - frag_mapping = skb_frag_dma_map(&pdev->dev, frag, 0, - frag_len, DMA_TO_DEVICE); - - if (unlikely(dma_mapping_error(&pdev->dev, frag_mapping))) - return NULL; - - dma_unmap_addr_set(frag_tx_buf, mapping, frag_mapping); - flags = frag_len << TX_BD_LEN_SHIFT; txbd->tx_bd_len_flags_type = cpu_to_le32(flags); + frag_mapping = page_pool_get_dma_addr(skb_frag_page(frag)) + + skb_frag_off(frag); txbd->tx_bd_haddr = cpu_to_le64(frag_mapping); len = frag_len; From d98ce1f083cca895982f3e88b8e8c372288e3e23 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Tue, 12 Dec 2023 00:05:35 +0000 Subject: [PATCH 1244/1397] net: phy: skip LED triggers on PHYs on SFP modules [ Upstream commit b1dfc0f76231bbf395c59d20a2070684620d5d0f ] Calling led_trigger_register() when attaching a PHY located on an SFP module potentially (and practically) leads into a deadlock. Fix this by not calling led_trigger_register() for PHYs localted on SFP modules as such modules actually never got any LEDs. ====================================================== WARNING: possible circular locking dependency detected 6.7.0-rc4-next-20231208+ #0 Tainted: G O ------------------------------------------------------ kworker/u8:2/43 is trying to acquire lock: ffffffc08108c4e8 (triggers_list_lock){++++}-{3:3}, at: led_trigger_register+0x4c/0x1a8 but task is already holding lock: ffffff80c5c6f318 (&sfp->sm_mutex){+.+.}-{3:3}, at: cleanup_module+0x2ba8/0x3120 [sfp] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #3 (&sfp->sm_mutex){+.+.}-{3:3}: __mutex_lock+0x88/0x7a0 mutex_lock_nested+0x20/0x28 cleanup_module+0x2ae0/0x3120 [sfp] sfp_register_bus+0x5c/0x9c sfp_register_socket+0x48/0xd4 cleanup_module+0x271c/0x3120 [sfp] platform_probe+0x64/0xb8 really_probe+0x17c/0x3c0 __driver_probe_device+0x78/0x164 driver_probe_device+0x3c/0xd4 __driver_attach+0xec/0x1f0 bus_for_each_dev+0x60/0xa0 driver_attach+0x20/0x28 bus_add_driver+0x108/0x208 driver_register+0x5c/0x118 __platform_driver_register+0x24/0x2c init_module+0x28/0xa7c [sfp] do_one_initcall+0x70/0x2ec do_init_module+0x54/0x1e4 load_module+0x1b78/0x1c8c __do_sys_init_module+0x1bc/0x2cc __arm64_sys_init_module+0x18/0x20 invoke_syscall.constprop.0+0x4c/0xdc do_el0_svc+0x3c/0xbc el0_svc+0x34/0x80 el0t_64_sync_handler+0xf8/0x124 el0t_64_sync+0x150/0x154 -> #2 (rtnl_mutex){+.+.}-{3:3}: __mutex_lock+0x88/0x7a0 mutex_lock_nested+0x20/0x28 rtnl_lock+0x18/0x20 set_device_name+0x30/0x130 netdev_trig_activate+0x13c/0x1ac led_trigger_set+0x118/0x234 led_trigger_write+0x104/0x17c sysfs_kf_bin_write+0x64/0x80 kernfs_fop_write_iter+0x128/0x1b4 vfs_write+0x178/0x2a4 ksys_write+0x58/0xd4 __arm64_sys_write+0x18/0x20 invoke_syscall.constprop.0+0x4c/0xdc do_el0_svc+0x3c/0xbc el0_svc+0x34/0x80 el0t_64_sync_handler+0xf8/0x124 el0t_64_sync+0x150/0x154 -> #1 (&led_cdev->trigger_lock){++++}-{3:3}: down_write+0x4c/0x13c led_trigger_write+0xf8/0x17c sysfs_kf_bin_write+0x64/0x80 kernfs_fop_write_iter+0x128/0x1b4 vfs_write+0x178/0x2a4 ksys_write+0x58/0xd4 __arm64_sys_write+0x18/0x20 invoke_syscall.constprop.0+0x4c/0xdc do_el0_svc+0x3c/0xbc el0_svc+0x34/0x80 el0t_64_sync_handler+0xf8/0x124 el0t_64_sync+0x150/0x154 -> #0 (triggers_list_lock){++++}-{3:3}: __lock_acquire+0x12a0/0x2014 lock_acquire+0x100/0x2ac down_write+0x4c/0x13c led_trigger_register+0x4c/0x1a8 phy_led_triggers_register+0x9c/0x214 phy_attach_direct+0x154/0x36c phylink_attach_phy+0x30/0x60 phylink_sfp_connect_phy+0x140/0x510 sfp_add_phy+0x34/0x50 init_module+0x15c/0xa7c [sfp] cleanup_module+0x1d94/0x3120 [sfp] cleanup_module+0x2bb4/0x3120 [sfp] process_one_work+0x1f8/0x4ec worker_thread+0x1e8/0x3d8 kthread+0x104/0x110 ret_from_fork+0x10/0x20 other info that might help us debug this: Chain exists of: triggers_list_lock --> rtnl_mutex --> &sfp->sm_mutex Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&sfp->sm_mutex); lock(rtnl_mutex); lock(&sfp->sm_mutex); lock(triggers_list_lock); *** DEADLOCK *** 4 locks held by kworker/u8:2/43: #0: ffffff80c000f938 ((wq_completion)events_power_efficient){+.+.}-{0:0}, at: process_one_work+0x150/0x4ec #1: ffffffc08214bde8 ((work_completion)(&(&sfp->timeout)->work)){+.+.}-{0:0}, at: process_one_work+0x150/0x4ec #2: ffffffc0810902f8 (rtnl_mutex){+.+.}-{3:3}, at: rtnl_lock+0x18/0x20 #3: ffffff80c5c6f318 (&sfp->sm_mutex){+.+.}-{3:3}, at: cleanup_module+0x2ba8/0x3120 [sfp] stack backtrace: CPU: 0 PID: 43 Comm: kworker/u8:2 Tainted: G O 6.7.0-rc4-next-20231208+ #0 Hardware name: Bananapi BPI-R4 (DT) Workqueue: events_power_efficient cleanup_module [sfp] Call trace: dump_backtrace+0xa8/0x10c show_stack+0x14/0x1c dump_stack_lvl+0x5c/0xa0 dump_stack+0x14/0x1c print_circular_bug+0x328/0x430 check_noncircular+0x124/0x134 __lock_acquire+0x12a0/0x2014 lock_acquire+0x100/0x2ac down_write+0x4c/0x13c led_trigger_register+0x4c/0x1a8 phy_led_triggers_register+0x9c/0x214 phy_attach_direct+0x154/0x36c phylink_attach_phy+0x30/0x60 phylink_sfp_connect_phy+0x140/0x510 sfp_add_phy+0x34/0x50 init_module+0x15c/0xa7c [sfp] cleanup_module+0x1d94/0x3120 [sfp] cleanup_module+0x2bb4/0x3120 [sfp] process_one_work+0x1f8/0x4ec worker_thread+0x1e8/0x3d8 kthread+0x104/0x110 ret_from_fork+0x10/0x20 Signed-off-by: Daniel Golle Fixes: 01e5b728e9e4 ("net: phy: Add a binding for PHY LEDs") Link: https://lore.kernel.org/r/102a9dce38bdf00215735d04cd4704458273ad9c.1702339354.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/phy/phy_device.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 2ce74593d6e4..a42df2c1bd04 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1548,7 +1548,8 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, goto error; phy_resume(phydev); - phy_led_triggers_register(phydev); + if (!phydev->is_on_sfp_module) + phy_led_triggers_register(phydev); /** * If the external phy used by current mac interface is managed by @@ -1817,7 +1818,8 @@ void phy_detach(struct phy_device *phydev) } phydev->phylink = NULL; - phy_led_triggers_unregister(phydev); + if (!phydev->is_on_sfp_module) + phy_led_triggers_unregister(phydev); if (phydev->mdio.dev.driver) module_put(phydev->mdio.dev.driver->owner); From 194e51acb3c47a5ed9191f6d972ce430b306468a Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 6 Dec 2023 12:19:05 -0800 Subject: [PATCH 1245/1397] ice: stop trashing VF VSI aggregator node ID information [ Upstream commit 7d881346121a97756f34e00e6296a5d63f001f7f ] When creating new VSIs, they are assigned into an aggregator node in the scheduler tree. Information about which aggregator node a VSI is assigned into is maintained by the vsi->agg_node structure. In ice_vsi_decfg(), this information is being destroyed, by overwriting the valid flag and the agg_id field to zero. For VF VSIs, this breaks the aggregator node configuration replay, which depends on this information. This results in VFs being inserted into the default aggregator node. The resulting configuration will have unexpected Tx bandwidth sharing behavior. This was broken by commit 6624e780a577 ("ice: split ice_vsi_setup into smaller functions"), which added the block to reset the agg_node data. The vsi->agg_node structure is not managed by the scheduler code, but is instead a wrapper around an aggregator node ID that is tracked at the VSI layer. Its been around for a long time, and its primary purpose was for handling VFs. The SR-IOV VF reset flow does not make use of the standard VSI rebuild/replay logic, and uses vsi->agg_node as part of its handling to rebuild the aggregator node configuration. The logic for aggregator nodes stretches back to early ice driver code from commit b126bd6bcd67 ("ice: create scheduler aggregator node config and move VSIs") The logic in ice_vsi_decfg() which trashes the ice_agg_node data is clearly wrong. It destroys information that is necessary for handling VF reset,. It is also not the correct way to actually remove a VSI from an aggregator node. For that, we need to implement logic in the scheduler code. Further, non-VF VSIs properly replay their aggregator configuration using existing scheduler replay logic. To fix the VF replay logic, remove this broken aggregator node cleanup logic. This is the simplest way to immediately fix this. This ensures that VFs will have proper aggregate configuration after a reset. This is especially important since VFs often perform resets as part of their reconfiguration flows. Without fixing this, VFs will be placed in the default aggregator node and Tx bandwidth will not be shared in the expected and configured manner. Fixes: 6624e780a577 ("ice: split ice_vsi_setup into smaller functions") Signed-off-by: Jacob Keller Reviewed-by: Przemek Kitszel Reviewed-by: Simon Horman Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_lib.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 73bbf06a76db..8dbf7a381e49 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -2633,10 +2633,6 @@ void ice_vsi_decfg(struct ice_vsi *vsi) if (vsi->type == ICE_VSI_VF && vsi->agg_node && vsi->agg_node->valid) vsi->agg_node->num_vsis--; - if (vsi->agg_node) { - vsi->agg_node->valid = false; - vsi->agg_node->agg_id = 0; - } } /** From fc4d6d136d42fab207b3ce20a8ebfd61a13f931f Mon Sep 17 00:00:00 2001 From: Dave Ertman Date: Mon, 11 Dec 2023 13:19:28 -0800 Subject: [PATCH 1246/1397] ice: alter feature support check for SRIOV and LAG [ Upstream commit 4d50fcdc2476eef94c14c6761073af5667bb43b6 ] Previously, the ice driver had support for using a handler for bonding netdev events to ensure that conflicting features were not allowed to be activated at the same time. While this was still in place, additional support was added to specifically support SRIOV and LAG together. These both utilized the netdev event handler, but the SRIOV and LAG feature was behind a capabilities feature check to make sure the current NVM has support. The exclusion part of the event handler should be removed since there are users who have custom made solutions that depend on the non-exclusion of features. Wrap the creation/registration and cleanup of the event handler and associated structs in the probe flow with a feature check so that the only systems that support the full implementation of LAG features will initialize support. This will leave other systems unhindered with functionality as it existed before any LAG code was added. Fixes: bb52f42acef6 ("ice: Add driver support for firmware changes for LAG") Reviewed-by: Jesse Brandeburg Signed-off-by: Dave Ertman Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_lag.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c index d86e2460b5a4..23e197c3d02a 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.c +++ b/drivers/net/ethernet/intel/ice/ice_lag.c @@ -1963,6 +1963,8 @@ int ice_init_lag(struct ice_pf *pf) int n, err; ice_lag_init_feature_support_flag(pf); + if (!ice_is_feature_supported(pf, ICE_F_SRIOV_LAG)) + return 0; pf->lag = kzalloc(sizeof(*lag), GFP_KERNEL); if (!pf->lag) From 79733dfcc913ae51c09cf1ddd7cbacd4914963b4 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Tue, 12 Dec 2023 10:29:01 +0100 Subject: [PATCH 1247/1397] ice: Fix PF with enabled XDP going no-carrier after reset [ Upstream commit f5728a418945ba53e2fdf38a6e5c5a2670965e85 ] Commit 6624e780a577fc596788 ("ice: split ice_vsi_setup into smaller functions") has refactored a bunch of code involved in PFR. In this process, TC queue number adjustment for XDP was lost. Bring it back. Lack of such adjustment causes interface to go into no-carrier after a reset, if XDP program is attached, with the following message: ice 0000:b1:00.0: Failed to set LAN Tx queue context, error: -22 ice 0000:b1:00.0 ens801f0np0: Failed to open VSI 0x0006 on switch 0x0001 ice 0000:b1:00.0: enable VSI failed, err -22, VSI index 0, type ICE_VSI_PF ice 0000:b1:00.0: PF VSI rebuild failed: -22 ice 0000:b1:00.0: Rebuild failed, unload and reload driver Fixes: 6624e780a577 ("ice: split ice_vsi_setup into smaller functions") Reviewed-by: Przemek Kitszel Signed-off-by: Larysa Zaremba Reviewed-by: Simon Horman Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_lib.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 8dbf7a381e49..a66c3b6ccec1 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -2384,6 +2384,9 @@ static int ice_vsi_cfg_tc_lan(struct ice_pf *pf, struct ice_vsi *vsi) } else { max_txqs[i] = vsi->alloc_txq; } + + if (vsi->type == ICE_VSI_PF) + max_txqs[i] += vsi->num_xdp_txq; } dev_dbg(dev, "vsi->tc_cfg.ena_tc = %d\n", vsi->tc_cfg.ena_tc); From 330fe5d51bcefbb9531bc31cd72f5f836cf36ee3 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Fri, 15 Dec 2023 12:33:53 -0800 Subject: [PATCH 1248/1397] net: mana: select PAGE_POOL [ Upstream commit 340943fbff3d8faa44d2223ca04917df28786a07 ] Mana uses PAGE_POOL API. x86_64 defconfig doesn't select it: ld: vmlinux.o: in function `mana_create_page_pool.isra.0': mana_en.c:(.text+0x9ae36f): undefined reference to `page_pool_create' ld: vmlinux.o: in function `mana_get_rxfrag': mana_en.c:(.text+0x9afed1): undefined reference to `page_pool_alloc_pages' make[3]: *** [/home/yury/work/linux/scripts/Makefile.vmlinux:37: vmlinux] Error 1 make[2]: *** [/home/yury/work/linux/Makefile:1154: vmlinux] Error 2 make[1]: *** [/home/yury/work/linux/Makefile:234: __sub-make] Error 2 make[1]: Leaving directory '/home/yury/work/build-linux-x86_64' make: *** [Makefile:234: __sub-make] Error 2 So we need to select it explicitly. Signed-off-by: Yury Norov Reviewed-by: Simon Horman Tested-by: Simon Horman # build-tested Fixes: ca9c54d2 ("net: mana: Add a driver for Microsoft Azure Network Adapter") Link: https://lore.kernel.org/r/20231215203353.635379-1-yury.norov@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/microsoft/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/microsoft/Kconfig b/drivers/net/ethernet/microsoft/Kconfig index 090e6b983243..01eb7445ead9 100644 --- a/drivers/net/ethernet/microsoft/Kconfig +++ b/drivers/net/ethernet/microsoft/Kconfig @@ -20,6 +20,7 @@ config MICROSOFT_MANA depends on PCI_MSI && X86_64 depends on PCI_HYPERV select AUXILIARY_BUS + select PAGE_POOL help This driver supports Microsoft Azure Network Adapter (MANA). So far, the driver is only supported on X86_64. From 337ca88fde4da0f0fbdf1f24c6db2267e5ac54b9 Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Sat, 16 Dec 2023 15:52:18 +0800 Subject: [PATCH 1249/1397] net: check vlan filter feature in vlan_vids_add_by_dev() and vlan_vids_del_by_dev() [ Upstream commit 01a564bab4876007ce35f312e16797dfe40e4823 ] I got the below warning trace: WARNING: CPU: 4 PID: 4056 at net/core/dev.c:11066 unregister_netdevice_many_notify CPU: 4 PID: 4056 Comm: ip Not tainted 6.7.0-rc4+ #15 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1 04/01/2014 RIP: 0010:unregister_netdevice_many_notify+0x9a4/0x9b0 Call Trace: rtnl_dellink rtnetlink_rcv_msg netlink_rcv_skb netlink_unicast netlink_sendmsg __sock_sendmsg ____sys_sendmsg ___sys_sendmsg __sys_sendmsg do_syscall_64 entry_SYSCALL_64_after_hwframe It can be repoduced via: ip netns add ns1 ip netns exec ns1 ip link add bond0 type bond mode 0 ip netns exec ns1 ip link add bond_slave_1 type veth peer veth2 ip netns exec ns1 ip link set bond_slave_1 master bond0 [1] ip netns exec ns1 ethtool -K bond0 rx-vlan-filter off [2] ip netns exec ns1 ip link add link bond_slave_1 name bond_slave_1.0 type vlan id 0 [3] ip netns exec ns1 ip link add link bond0 name bond0.0 type vlan id 0 [4] ip netns exec ns1 ip link set bond_slave_1 nomaster [5] ip netns exec ns1 ip link del veth2 ip netns del ns1 This is all caused by command [1] turning off the rx-vlan-filter function of bond0. The reason is the same as commit 01f4fd270870 ("bonding: Fix incorrect deletion of ETH_P_8021AD protocol vid from slaves"). Commands [2] [3] add the same vid to slave and master respectively, causing command [4] to empty slave->vlan_info. The following command [5] triggers this problem. To fix this problem, we should add VLAN_FILTER feature checks in vlan_vids_add_by_dev() and vlan_vids_del_by_dev() to prevent incorrect addition or deletion of vlan_vid information. Fixes: 348a1443cc43 ("vlan: introduce functions to do mass addition/deletion of vids by another device") Signed-off-by: Liu Jian Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/8021q/vlan_core.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 0beb44f2fe1f..f00158234505 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -407,6 +407,8 @@ int vlan_vids_add_by_dev(struct net_device *dev, return 0; list_for_each_entry(vid_info, &vlan_info->vid_list, list) { + if (!vlan_hw_filter_capable(by_dev, vid_info->proto)) + continue; err = vlan_vid_add(dev, vid_info->proto, vid_info->vid); if (err) goto unwind; @@ -417,6 +419,8 @@ int vlan_vids_add_by_dev(struct net_device *dev, list_for_each_entry_continue_reverse(vid_info, &vlan_info->vid_list, list) { + if (!vlan_hw_filter_capable(by_dev, vid_info->proto)) + continue; vlan_vid_del(dev, vid_info->proto, vid_info->vid); } @@ -436,8 +440,11 @@ void vlan_vids_del_by_dev(struct net_device *dev, if (!vlan_info) return; - list_for_each_entry(vid_info, &vlan_info->vid_list, list) + list_for_each_entry(vid_info, &vlan_info->vid_list, list) { + if (!vlan_hw_filter_capable(by_dev, vid_info->proto)) + continue; vlan_vid_del(dev, vid_info->proto, vid_info->vid); + } } EXPORT_SYMBOL(vlan_vids_del_by_dev); From 9ff7ae01a0c03d89a435f6ebe046d638411838a8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 11 Dec 2023 15:08:57 +0000 Subject: [PATCH 1250/1397] afs: Fix the dynamic root's d_delete to always delete unused dentries [ Upstream commit 71f8b55bc30e82d6355e07811213d847981a32e2 ] Fix the afs dynamic root's d_delete function to always delete unused dentries rather than only deleting them if they're positive. With things as they stand upstream, negative dentries stemming from failed DNS lookups stick around preventing retries. Fixes: 66c7e1d319a5 ("afs: Split the dynroot stuff out and give it its own ops tables") Signed-off-by: David Howells Tested-by: Markus Suvanto cc: Marc Dionne cc: linux-afs@lists.infradead.org Signed-off-by: Sasha Levin --- fs/afs/dynroot.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index 8081d68004d0..cec7d8e5ad0c 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -252,20 +252,9 @@ static int afs_dynroot_d_revalidate(struct dentry *dentry, unsigned int flags) return 1; } -/* - * Allow the VFS to enquire as to whether a dentry should be unhashed (mustn't - * sleep) - * - called from dput() when d_count is going to 0. - * - return 1 to request dentry be unhashed, 0 otherwise - */ -static int afs_dynroot_d_delete(const struct dentry *dentry) -{ - return d_really_is_positive(dentry); -} - const struct dentry_operations afs_dynroot_dentry_operations = { .d_revalidate = afs_dynroot_d_revalidate, - .d_delete = afs_dynroot_d_delete, + .d_delete = always_delete_dentry, .d_release = afs_d_release, .d_automount = afs_d_automount, }; From 3c305aa9962d550612dcfd47ecec4948ae69c0f4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 11 Dec 2023 15:15:02 +0000 Subject: [PATCH 1251/1397] afs: Fix dynamic root lookup DNS check [ Upstream commit 74cef6872ceaefb5b6c5c60641371ea28702d358 ] In the afs dynamic root directory, the ->lookup() function does a DNS check on the cell being asked for and if the DNS upcall reports an error it will report an error back to userspace (typically ENOENT). However, if a failed DNS upcall returns a new-style result, it will return a valid result, with the status field set appropriately to indicate the type of failure - and in that case, dns_query() doesn't return an error and we let stat() complete with no error - which can cause confusion in userspace as subsequent calls that trigger d_automount then fail with ENOENT. Fix this by checking the status result from a valid dns_query() and returning an error if it indicates a failure. Fixes: bbb4c4323a4d ("dns: Allow the dns resolver to retrieve a server set") Reported-by: Markus Suvanto Closes: https://bugzilla.kernel.org/show_bug.cgi?id=216637 Signed-off-by: David Howells Tested-by: Markus Suvanto cc: Marc Dionne cc: linux-afs@lists.infradead.org Signed-off-by: Sasha Levin --- fs/afs/dynroot.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index cec7d8e5ad0c..10905a53d5b2 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -114,6 +114,7 @@ static int afs_probe_cell_name(struct dentry *dentry) struct afs_net *net = afs_d2net(dentry); const char *name = dentry->d_name.name; size_t len = dentry->d_name.len; + char *result = NULL; int ret; /* Names prefixed with a dot are R/W mounts. */ @@ -131,9 +132,22 @@ static int afs_probe_cell_name(struct dentry *dentry) } ret = dns_query(net->net, "afsdb", name, len, "srv=1", - NULL, NULL, false); - if (ret == -ENODATA || ret == -ENOKEY) + &result, NULL, false); + if (ret == -ENODATA || ret == -ENOKEY || ret == 0) ret = -ENOENT; + if (ret > 0 && ret >= sizeof(struct dns_server_list_v1_header)) { + struct dns_server_list_v1_header *v1 = (void *)result; + + if (v1->hdr.zero == 0 && + v1->hdr.content == DNS_PAYLOAD_IS_SERVER_LIST && + v1->hdr.version == 1 && + (v1->status != DNS_LOOKUP_GOOD && + v1->status != DNS_LOOKUP_GOOD_WITH_BAD)) + return -ENOENT; + + } + + kfree(result); return ret; } From 5c7a24ab04e20a2229997ba45bf9ea275024247b Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sun, 17 Dec 2023 16:37:40 +0100 Subject: [PATCH 1252/1397] net: ethernet: mtk_wed: fix possible NULL pointer dereference in mtk_wed_wo_queue_tx_clean() [ Upstream commit 7cb8cd4daacfea646cf8b5925ca2c66c98b18480 ] In order to avoid a NULL pointer dereference, check entry->buf pointer before running skb_free_frag in mtk_wed_wo_queue_tx_clean routine. Fixes: 799684448e3e ("net: ethernet: mtk_wed: introduce wed wo support") Signed-off-by: Lorenzo Bianconi Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/3c1262464d215faa8acebfc08869798c81c96f4a.1702827359.git.lorenzo@kernel.org Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/mediatek/mtk_wed_wo.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mediatek/mtk_wed_wo.c b/drivers/net/ethernet/mediatek/mtk_wed_wo.c index 3bd51a3d6650..ae44ad5f8ce8 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed_wo.c +++ b/drivers/net/ethernet/mediatek/mtk_wed_wo.c @@ -291,6 +291,9 @@ mtk_wed_wo_queue_tx_clean(struct mtk_wed_wo *wo, struct mtk_wed_wo_queue *q) for (i = 0; i < q->n_desc; i++) { struct mtk_wed_wo_queue_entry *entry = &q->entry[i]; + if (!entry->buf) + continue; + dma_unmap_single(wo->hw->dev, entry->addr, entry->len, DMA_TO_DEVICE); skb_free_frag(entry->buf); From b577b9aa1340ee7f36441b0740691550abaad5f2 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 18 Dec 2023 20:02:43 -0700 Subject: [PATCH 1253/1397] net/ipv6: Revert remove expired routes with a separated list of routes [ Upstream commit dade3f6a1e4e35a5ae916d5e78b3229ec34c78ec ] This reverts commit 3dec89b14d37ee635e772636dad3f09f78f1ab87. The commit has some race conditions given how expires is managed on a fib6_info in relation to gc start, adding the entry to the gc list and setting the timer value leading to UAF. Revert the commit and try again in a later release. Fixes: 3dec89b14d37 ("net/ipv6: Remove expired routes with a separated list of routes") Cc: Kui-Feng Lee Signed-off-by: David Ahern Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20231219030243.25687-1-dsahern@kernel.org Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- include/net/ip6_fib.h | 64 +++++++++---------------------------------- net/ipv6/ip6_fib.c | 55 ++++--------------------------------- net/ipv6/route.c | 6 ++-- 3 files changed, 22 insertions(+), 103 deletions(-) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 1ba9f4ddf2f6..9ba6413fd2e3 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -179,9 +179,6 @@ struct fib6_info { refcount_t fib6_ref; unsigned long expires; - - struct hlist_node gc_link; - struct dst_metrics *fib6_metrics; #define fib6_pmtu fib6_metrics->metrics[RTAX_MTU-1] @@ -250,6 +247,19 @@ static inline bool fib6_requires_src(const struct fib6_info *rt) return rt->fib6_src.plen > 0; } +static inline void fib6_clean_expires(struct fib6_info *f6i) +{ + f6i->fib6_flags &= ~RTF_EXPIRES; + f6i->expires = 0; +} + +static inline void fib6_set_expires(struct fib6_info *f6i, + unsigned long expires) +{ + f6i->expires = expires; + f6i->fib6_flags |= RTF_EXPIRES; +} + static inline bool fib6_check_expired(const struct fib6_info *f6i) { if (f6i->fib6_flags & RTF_EXPIRES) @@ -257,11 +267,6 @@ static inline bool fib6_check_expired(const struct fib6_info *f6i) return false; } -static inline bool fib6_has_expires(const struct fib6_info *f6i) -{ - return f6i->fib6_flags & RTF_EXPIRES; -} - /* Function to safely get fn->fn_sernum for passed in rt * and store result in passed in cookie. * Return true if we can get cookie safely @@ -383,7 +388,6 @@ struct fib6_table { struct inet_peer_base tb6_peers; unsigned int flags; unsigned int fib_seq; - struct hlist_head tb6_gc_hlist; /* GC candidates */ #define RT6_TABLE_HAS_DFLT_ROUTER BIT(0) }; @@ -500,48 +504,6 @@ void fib6_gc_cleanup(void); int fib6_init(void); -/* fib6_info must be locked by the caller, and fib6_info->fib6_table can be - * NULL. - */ -static inline void fib6_set_expires_locked(struct fib6_info *f6i, - unsigned long expires) -{ - struct fib6_table *tb6; - - tb6 = f6i->fib6_table; - f6i->expires = expires; - if (tb6 && !fib6_has_expires(f6i)) - hlist_add_head(&f6i->gc_link, &tb6->tb6_gc_hlist); - f6i->fib6_flags |= RTF_EXPIRES; -} - -/* fib6_info must be locked by the caller, and fib6_info->fib6_table can be - * NULL. If fib6_table is NULL, the fib6_info will no be inserted into the - * list of GC candidates until it is inserted into a table. - */ -static inline void fib6_set_expires(struct fib6_info *f6i, - unsigned long expires) -{ - spin_lock_bh(&f6i->fib6_table->tb6_lock); - fib6_set_expires_locked(f6i, expires); - spin_unlock_bh(&f6i->fib6_table->tb6_lock); -} - -static inline void fib6_clean_expires_locked(struct fib6_info *f6i) -{ - if (fib6_has_expires(f6i)) - hlist_del_init(&f6i->gc_link); - f6i->fib6_flags &= ~RTF_EXPIRES; - f6i->expires = 0; -} - -static inline void fib6_clean_expires(struct fib6_info *f6i) -{ - spin_lock_bh(&f6i->fib6_table->tb6_lock); - fib6_clean_expires_locked(f6i); - spin_unlock_bh(&f6i->fib6_table->tb6_lock); -} - struct ipv6_route_iter { struct seq_net_private p; struct fib6_walker w; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 7772f42ff2b9..4fc2cae0d116 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -160,8 +160,6 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh) INIT_LIST_HEAD(&f6i->fib6_siblings); refcount_set(&f6i->fib6_ref, 1); - INIT_HLIST_NODE(&f6i->gc_link); - return f6i; } @@ -248,7 +246,6 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id) net->ipv6.fib6_null_entry); table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; inet_peer_base_init(&table->tb6_peers); - INIT_HLIST_HEAD(&table->tb6_gc_hlist); } return table; @@ -1060,8 +1057,6 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn, lockdep_is_held(&table->tb6_lock)); } } - - fib6_clean_expires_locked(rt); } /* @@ -1123,10 +1118,9 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, if (!(iter->fib6_flags & RTF_EXPIRES)) return -EEXIST; if (!(rt->fib6_flags & RTF_EXPIRES)) - fib6_clean_expires_locked(iter); + fib6_clean_expires(iter); else - fib6_set_expires_locked(iter, - rt->expires); + fib6_set_expires(iter, rt->expires); if (rt->fib6_pmtu) fib6_metric_set(iter, RTAX_MTU, @@ -1485,10 +1479,6 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt, if (rt->nh) list_add(&rt->nh_list, &rt->nh->f6i_list); __fib6_update_sernum_upto_root(rt, fib6_new_sernum(info->nl_net)); - - if (fib6_has_expires(rt)) - hlist_add_head(&rt->gc_link, &table->tb6_gc_hlist); - fib6_start_gc(info->nl_net, rt); } @@ -2291,8 +2281,9 @@ static void fib6_flush_trees(struct net *net) * Garbage collection */ -static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args) +static int fib6_age(struct fib6_info *rt, void *arg) { + struct fib6_gc_args *gc_args = arg; unsigned long now = jiffies; /* @@ -2300,7 +2291,7 @@ static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args) * Routes are expired even if they are in use. */ - if (fib6_has_expires(rt) && rt->expires) { + if (rt->fib6_flags & RTF_EXPIRES && rt->expires) { if (time_after(now, rt->expires)) { RT6_TRACE("expiring %p\n", rt); return -1; @@ -2317,40 +2308,6 @@ static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args) return 0; } -static void fib6_gc_table(struct net *net, - struct fib6_table *tb6, - struct fib6_gc_args *gc_args) -{ - struct fib6_info *rt; - struct hlist_node *n; - struct nl_info info = { - .nl_net = net, - .skip_notify = false, - }; - - hlist_for_each_entry_safe(rt, n, &tb6->tb6_gc_hlist, gc_link) - if (fib6_age(rt, gc_args) == -1) - fib6_del(rt, &info); -} - -static void fib6_gc_all(struct net *net, struct fib6_gc_args *gc_args) -{ - struct fib6_table *table; - struct hlist_head *head; - unsigned int h; - - rcu_read_lock(); - for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { - head = &net->ipv6.fib_table_hash[h]; - hlist_for_each_entry_rcu(table, head, tb6_hlist) { - spin_lock_bh(&table->tb6_lock); - fib6_gc_table(net, table, gc_args); - spin_unlock_bh(&table->tb6_lock); - } - } - rcu_read_unlock(); -} - void fib6_run_gc(unsigned long expires, struct net *net, bool force) { struct fib6_gc_args gc_args; @@ -2366,7 +2323,7 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force) net->ipv6.sysctl.ip6_rt_gc_interval; gc_args.more = 0; - fib6_gc_all(net, &gc_args); + fib6_clean_all(net, fib6_age, &gc_args); now = jiffies; net->ipv6.ip6_rt_last_gc = now; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9c687b357e6a..56525b5b95a2 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3763,10 +3763,10 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, rt->dst_nocount = true; if (cfg->fc_flags & RTF_EXPIRES) - fib6_set_expires_locked(rt, jiffies + - clock_t_to_jiffies(cfg->fc_expires)); + fib6_set_expires(rt, jiffies + + clock_t_to_jiffies(cfg->fc_expires)); else - fib6_clean_expires_locked(rt); + fib6_clean_expires(rt); if (cfg->fc_protocol == RTPROT_UNSPEC) cfg->fc_protocol = RTPROT_BOOT; From 449f9d843ecea1b451b37d2955d38e1865e93e03 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 Dec 2023 12:53:31 +0000 Subject: [PATCH 1254/1397] net: check dev->gso_max_size in gso_features_check() [ Upstream commit 24ab059d2ebd62fdccc43794796f6ffbabe49ebc ] Some drivers might misbehave if TSO packets get too big. GVE for instance uses a 16bit field in its TX descriptor, and will do bad things if a packet is bigger than 2^16 bytes. Linux TCP stack honors dev->gso_max_size, but there are other ways for too big packets to reach an ndo_start_xmit() handler : virtio_net, af_packet, GRO... Add a generic check in gso_features_check() and fallback to GSO when needed. gso_max_size was added in the blamed commit. Fixes: 82cc1a7a5687 ("[NET]: Add per-connection option to set max TSO frame size") Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20231219125331.4127498-1-edumazet@google.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/core/dev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index 9bf90b2a75b6..e480afb50d4c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3500,6 +3500,9 @@ static netdev_features_t gso_features_check(const struct sk_buff *skb, if (gso_segs > READ_ONCE(dev->gso_max_segs)) return features & ~NETIF_F_GSO_MASK; + if (unlikely(skb->len >= READ_ONCE(dev->gso_max_size))) + return features & ~NETIF_F_GSO_MASK; + if (!skb_shinfo(skb)->gso_type) { skb_warn_bad_offload(skb); return features & ~NETIF_F_GSO_MASK; From afc360e8a1256acb7579a6f5b6f2c30b85b39301 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 9 Dec 2023 00:41:55 +0000 Subject: [PATCH 1255/1397] keys, dns: Allow key types (eg. DNS) to be reclaimed immediately on expiry [ Upstream commit 39299bdd2546688d92ed9db4948f6219ca1b9542 ] If a key has an expiration time, then when that time passes, the key is left around for a certain amount of time before being collected (5 mins by default) so that EKEYEXPIRED can be returned instead of ENOKEY. This is a problem for DNS keys because we want to redo the DNS lookup immediately at that point. Fix this by allowing key types to be marked such that keys of that type don't have this extra period, but are reclaimed as soon as they expire and turn this on for dns_resolver-type keys. To make this easier to handle, key->expiry is changed to be permanent if TIME64_MAX rather than 0. Furthermore, give such new-style negative DNS results a 1s default expiry if no other expiry time is set rather than allowing it to stick around indefinitely. This shouldn't be zero as ls will follow a failing stat call immediately with a second with AT_SYMLINK_NOFOLLOW added. Fixes: 1a4240f4764a ("DNS: Separate out CIFS DNS Resolver code") Signed-off-by: David Howells Tested-by: Markus Suvanto cc: Wang Lei cc: Jeff Layton cc: Steve French cc: Marc Dionne cc: Jarkko Sakkinen cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: linux-cifs@vger.kernel.org cc: linux-nfs@vger.kernel.org cc: ceph-devel@vger.kernel.org cc: keyrings@vger.kernel.org cc: netdev@vger.kernel.org Signed-off-by: Sasha Levin --- include/linux/key-type.h | 1 + net/dns_resolver/dns_key.c | 10 +++++++++- security/keys/gc.c | 31 +++++++++++++++++++++---------- security/keys/internal.h | 11 ++++++++++- security/keys/key.c | 15 +++++---------- security/keys/proc.c | 2 +- 6 files changed, 47 insertions(+), 23 deletions(-) diff --git a/include/linux/key-type.h b/include/linux/key-type.h index 7d985a1dfe4a..5caf3ce82373 100644 --- a/include/linux/key-type.h +++ b/include/linux/key-type.h @@ -73,6 +73,7 @@ struct key_type { unsigned int flags; #define KEY_TYPE_NET_DOMAIN 0x00000001 /* Keys of this type have a net namespace domain */ +#define KEY_TYPE_INSTANT_REAP 0x00000002 /* Keys of this type don't have a delay after expiring */ /* vet a description */ int (*vet_description)(const char *description); diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index 01e54b46ae0b..2a6d363763a2 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -91,6 +91,7 @@ const struct cred *dns_resolver_cache; static int dns_resolver_preparse(struct key_preparsed_payload *prep) { + const struct dns_server_list_v1_header *v1; const struct dns_payload_header *bin; struct user_key_payload *upayload; unsigned long derrno; @@ -122,6 +123,13 @@ dns_resolver_preparse(struct key_preparsed_payload *prep) return -EINVAL; } + v1 = (const struct dns_server_list_v1_header *)bin; + if ((v1->status != DNS_LOOKUP_GOOD && + v1->status != DNS_LOOKUP_GOOD_WITH_BAD)) { + if (prep->expiry == TIME64_MAX) + prep->expiry = ktime_get_real_seconds() + 1; + } + result_len = datalen; goto store_result; } @@ -314,7 +322,7 @@ static long dns_resolver_read(const struct key *key, struct key_type key_type_dns_resolver = { .name = "dns_resolver", - .flags = KEY_TYPE_NET_DOMAIN, + .flags = KEY_TYPE_NET_DOMAIN | KEY_TYPE_INSTANT_REAP, .preparse = dns_resolver_preparse, .free_preparse = dns_resolver_free_preparse, .instantiate = generic_key_instantiate, diff --git a/security/keys/gc.c b/security/keys/gc.c index 3c90807476eb..eaddaceda14e 100644 --- a/security/keys/gc.c +++ b/security/keys/gc.c @@ -66,6 +66,19 @@ void key_schedule_gc(time64_t gc_at) } } +/* + * Set the expiration time on a key. + */ +void key_set_expiry(struct key *key, time64_t expiry) +{ + key->expiry = expiry; + if (expiry != TIME64_MAX) { + if (!(key->type->flags & KEY_TYPE_INSTANT_REAP)) + expiry += key_gc_delay; + key_schedule_gc(expiry); + } +} + /* * Schedule a dead links collection run. */ @@ -176,7 +189,6 @@ static void key_garbage_collector(struct work_struct *work) static u8 gc_state; /* Internal persistent state */ #define KEY_GC_REAP_AGAIN 0x01 /* - Need another cycle */ #define KEY_GC_REAPING_LINKS 0x02 /* - We need to reap links */ -#define KEY_GC_SET_TIMER 0x04 /* - We need to restart the timer */ #define KEY_GC_REAPING_DEAD_1 0x10 /* - We need to mark dead keys */ #define KEY_GC_REAPING_DEAD_2 0x20 /* - We need to reap dead key links */ #define KEY_GC_REAPING_DEAD_3 0x40 /* - We need to reap dead keys */ @@ -184,21 +196,17 @@ static void key_garbage_collector(struct work_struct *work) struct rb_node *cursor; struct key *key; - time64_t new_timer, limit; + time64_t new_timer, limit, expiry; kenter("[%lx,%x]", key_gc_flags, gc_state); limit = ktime_get_real_seconds(); - if (limit > key_gc_delay) - limit -= key_gc_delay; - else - limit = key_gc_delay; /* Work out what we're going to be doing in this pass */ gc_state &= KEY_GC_REAPING_DEAD_1 | KEY_GC_REAPING_DEAD_2; gc_state <<= 1; if (test_and_clear_bit(KEY_GC_KEY_EXPIRED, &key_gc_flags)) - gc_state |= KEY_GC_REAPING_LINKS | KEY_GC_SET_TIMER; + gc_state |= KEY_GC_REAPING_LINKS; if (test_and_clear_bit(KEY_GC_REAP_KEYTYPE, &key_gc_flags)) gc_state |= KEY_GC_REAPING_DEAD_1; @@ -233,8 +241,11 @@ static void key_garbage_collector(struct work_struct *work) } } - if (gc_state & KEY_GC_SET_TIMER) { - if (key->expiry > limit && key->expiry < new_timer) { + expiry = key->expiry; + if (expiry != TIME64_MAX) { + if (!(key->type->flags & KEY_TYPE_INSTANT_REAP)) + expiry += key_gc_delay; + if (expiry > limit && expiry < new_timer) { kdebug("will expire %x in %lld", key_serial(key), key->expiry - limit); new_timer = key->expiry; @@ -276,7 +287,7 @@ static void key_garbage_collector(struct work_struct *work) */ kdebug("pass complete"); - if (gc_state & KEY_GC_SET_TIMER && new_timer != (time64_t)TIME64_MAX) { + if (new_timer != TIME64_MAX) { new_timer += key_gc_delay; key_schedule_gc(new_timer); } diff --git a/security/keys/internal.h b/security/keys/internal.h index 3c1e7122076b..ec2ec335b613 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -174,6 +174,7 @@ extern unsigned key_gc_delay; extern void keyring_gc(struct key *keyring, time64_t limit); extern void keyring_restriction_gc(struct key *keyring, struct key_type *dead_type); +void key_set_expiry(struct key *key, time64_t expiry); extern void key_schedule_gc(time64_t gc_at); extern void key_schedule_gc_links(void); extern void key_gc_keytype(struct key_type *ktype); @@ -222,10 +223,18 @@ extern struct key *key_get_instantiation_authkey(key_serial_t target_id); */ static inline bool key_is_dead(const struct key *key, time64_t limit) { + time64_t expiry = key->expiry; + + if (expiry != TIME64_MAX) { + if (!(key->type->flags & KEY_TYPE_INSTANT_REAP)) + expiry += key_gc_delay; + if (expiry <= limit) + return true; + } + return key->flags & ((1 << KEY_FLAG_DEAD) | (1 << KEY_FLAG_INVALIDATED)) || - (key->expiry > 0 && key->expiry <= limit) || key->domain_tag->removed; } diff --git a/security/keys/key.c b/security/keys/key.c index 5c0c7df833f8..5f103b2713c6 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -294,6 +294,7 @@ struct key *key_alloc(struct key_type *type, const char *desc, key->uid = uid; key->gid = gid; key->perm = perm; + key->expiry = TIME64_MAX; key->restrict_link = restrict_link; key->last_used_at = ktime_get_real_seconds(); @@ -463,10 +464,7 @@ static int __key_instantiate_and_link(struct key *key, if (authkey) key_invalidate(authkey); - if (prep->expiry != TIME64_MAX) { - key->expiry = prep->expiry; - key_schedule_gc(prep->expiry + key_gc_delay); - } + key_set_expiry(key, prep->expiry); } } @@ -606,8 +604,7 @@ int key_reject_and_link(struct key *key, atomic_inc(&key->user->nikeys); mark_key_instantiated(key, -error); notify_key(key, NOTIFY_KEY_INSTANTIATED, -error); - key->expiry = ktime_get_real_seconds() + timeout; - key_schedule_gc(key->expiry + key_gc_delay); + key_set_expiry(key, ktime_get_real_seconds() + timeout); if (test_and_clear_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags)) awaken = 1; @@ -722,16 +719,14 @@ struct key_type *key_type_lookup(const char *type) void key_set_timeout(struct key *key, unsigned timeout) { - time64_t expiry = 0; + time64_t expiry = TIME64_MAX; /* make the changes with the locks held to prevent races */ down_write(&key->sem); if (timeout > 0) expiry = ktime_get_real_seconds() + timeout; - - key->expiry = expiry; - key_schedule_gc(key->expiry + key_gc_delay); + key_set_expiry(key, expiry); up_write(&key->sem); } diff --git a/security/keys/proc.c b/security/keys/proc.c index d0cde6685627..4f4e2c1824f1 100644 --- a/security/keys/proc.c +++ b/security/keys/proc.c @@ -198,7 +198,7 @@ static int proc_keys_show(struct seq_file *m, void *v) /* come up with a suitable timeout value */ expiry = READ_ONCE(key->expiry); - if (expiry == 0) { + if (expiry == TIME64_MAX) { memcpy(xbuf, "perm", 5); } else if (now >= expiry) { memcpy(xbuf, "expd", 5); From 81fc8dceb78b76dfe9d167e0845144f39b65cb1d Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 21 Dec 2023 15:09:31 +0000 Subject: [PATCH 1256/1397] afs: Fix overwriting of result of DNS query [ Upstream commit a9e01ac8c5ff32669119c40dfdc9e80eb0b7d7aa ] In afs_update_cell(), ret is the result of the DNS lookup and the errors are to be handled by a switch - however, the value gets clobbered in between by setting it to -ENOMEM in case afs_alloc_vlserver_list() fails. Fix this by moving the setting of -ENOMEM into the error handling for OOM failure. Further, only do it if we don't have an alternative error to return. Found by Linux Verification Center (linuxtesting.org) with SVACE. Based on a patch from Anastasia Belova [1]. Fixes: d5c32c89b208 ("afs: Fix cell DNS lookup") Signed-off-by: David Howells Reviewed-by: Jeffrey Altman cc: Anastasia Belova cc: Marc Dionne cc: linux-afs@lists.infradead.org cc: lvc-project@linuxtesting.org Link: https://lore.kernel.org/r/20231221085849.1463-1-abelova@astralinux.ru/ [1] Link: https://lore.kernel.org/r/1700862.1703168632@warthog.procyon.org.uk/ # v1 Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/afs/cell.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/afs/cell.c b/fs/afs/cell.c index 988c2ac7cece..926cb1188eba 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -409,10 +409,12 @@ static int afs_update_cell(struct afs_cell *cell) if (ret == -ENOMEM) goto out_wake; - ret = -ENOMEM; vllist = afs_alloc_vlserver_list(0); - if (!vllist) + if (!vllist) { + if (ret >= 0) + ret = -ENOMEM; goto out_wake; + } switch (ret) { case -ENODATA: From c3215484ca1f64b6b8af03847856499e5364e65a Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 21 Dec 2023 13:57:31 +0000 Subject: [PATCH 1257/1397] afs: Fix use-after-free due to get/remove race in volume tree [ Upstream commit 9a6b294ab496650e9f270123730df37030911b55 ] When an afs_volume struct is put, its refcount is reduced to 0 before the cell->volume_lock is taken and the volume removed from the cell->volumes tree. Unfortunately, this means that the lookup code can race and see a volume with a zero ref in the tree, resulting in a use-after-free: refcount_t: addition on 0; use-after-free. WARNING: CPU: 3 PID: 130782 at lib/refcount.c:25 refcount_warn_saturate+0x7a/0xda ... RIP: 0010:refcount_warn_saturate+0x7a/0xda ... Call Trace: afs_get_volume+0x3d/0x55 afs_create_volume+0x126/0x1de afs_validate_fc+0xfe/0x130 afs_get_tree+0x20/0x2e5 vfs_get_tree+0x1d/0xc9 do_new_mount+0x13b/0x22e do_mount+0x5d/0x8a __do_sys_mount+0x100/0x12a do_syscall_64+0x3a/0x94 entry_SYSCALL_64_after_hwframe+0x62/0x6a Fix this by: (1) When putting, use a flag to indicate if the volume has been removed from the tree and skip the rb_erase if it has. (2) When looking up, use a conditional ref increment and if it fails because the refcount is 0, replace the node in the tree and set the removal flag. Fixes: 20325960f875 ("afs: Reorganise volume and server trees to be rooted on the cell") Signed-off-by: David Howells Reviewed-by: Jeffrey Altman cc: Marc Dionne cc: linux-afs@lists.infradead.org Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/afs/internal.h | 2 ++ fs/afs/volume.c | 26 +++++++++++++++++++++++--- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 5041eae64423..c4bf8439bc9c 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -586,6 +586,7 @@ struct afs_volume { #define AFS_VOLUME_OFFLINE 4 /* - T if volume offline notice given */ #define AFS_VOLUME_BUSY 5 /* - T if volume busy notice given */ #define AFS_VOLUME_MAYBE_NO_IBULK 6 /* - T if some servers don't have InlineBulkStatus */ +#define AFS_VOLUME_RM_TREE 7 /* - Set if volume removed from cell->volumes */ #ifdef CONFIG_AFS_FSCACHE struct fscache_volume *cache; /* Caching cookie */ #endif @@ -1513,6 +1514,7 @@ extern struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *, extern struct afs_volume *afs_create_volume(struct afs_fs_context *); extern int afs_activate_volume(struct afs_volume *); extern void afs_deactivate_volume(struct afs_volume *); +bool afs_try_get_volume(struct afs_volume *volume, enum afs_volume_trace reason); extern struct afs_volume *afs_get_volume(struct afs_volume *, enum afs_volume_trace); extern void afs_put_volume(struct afs_net *, struct afs_volume *, enum afs_volume_trace); extern int afs_check_volume_status(struct afs_volume *, struct afs_operation *); diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 29d483c80281..115c081a8e2c 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -32,8 +32,13 @@ static struct afs_volume *afs_insert_volume_into_cell(struct afs_cell *cell, } else if (p->vid > volume->vid) { pp = &(*pp)->rb_right; } else { - volume = afs_get_volume(p, afs_volume_trace_get_cell_insert); - goto found; + if (afs_try_get_volume(p, afs_volume_trace_get_cell_insert)) { + volume = p; + goto found; + } + + set_bit(AFS_VOLUME_RM_TREE, &volume->flags); + rb_replace_node_rcu(&p->cell_node, &volume->cell_node, &cell->volumes); } } @@ -56,7 +61,8 @@ static void afs_remove_volume_from_cell(struct afs_volume *volume) afs_volume_trace_remove); write_seqlock(&cell->volume_lock); hlist_del_rcu(&volume->proc_link); - rb_erase(&volume->cell_node, &cell->volumes); + if (!test_and_set_bit(AFS_VOLUME_RM_TREE, &volume->flags)) + rb_erase(&volume->cell_node, &cell->volumes); write_sequnlock(&cell->volume_lock); } } @@ -231,6 +237,20 @@ static void afs_destroy_volume(struct afs_net *net, struct afs_volume *volume) _leave(" [destroyed]"); } +/* + * Try to get a reference on a volume record. + */ +bool afs_try_get_volume(struct afs_volume *volume, enum afs_volume_trace reason) +{ + int r; + + if (__refcount_inc_not_zero(&volume->ref, &r)) { + trace_afs_volume(volume->vid, r + 1, reason); + return true; + } + return false; +} + /* * Get a reference on a volume record. */ From 80419c96f8e68ecdd79ea74497c36a61b3b2fc3f Mon Sep 17 00:00:00 2001 From: Karthik Poosa Date: Mon, 4 Dec 2023 20:18:09 +0530 Subject: [PATCH 1258/1397] drm/i915/hwmon: Fix static analysis tool reported issues [ Upstream commit 768f17fd25e4a98bf5166148629ecf6f647d5efc ] Updated i915 hwmon with fixes for issues reported by static analysis tool. Fixed integer overflow with upcasting. v2: - Added Fixes tag (Badal). - Updated commit message as per review comments (Anshuman). Fixes: 4c2572fe0ae7 ("drm/i915/hwmon: Expose power1_max_interval") Reviewed-by: Badal Nilawar Reviewed-by: Anshuman Gupta Signed-off-by: Karthik Poosa Signed-off-by: Anshuman Gupta Link: https://patchwork.freedesktop.org/patch/msgid/20231204144809.1518704-1-karthik.poosa@intel.com (cherry picked from commit ac3420d3d428443a08b923f9118121c170192b62) Signed-off-by: Jani Nikula Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/i915_hwmon.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index 975da8e7f2a9..8c3f443c8347 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -175,7 +175,7 @@ hwm_power1_max_interval_show(struct device *dev, struct device_attribute *attr, * tau4 = (4 | x) << y * but add 2 when doing the final right shift to account for units */ - tau4 = ((1 << x_w) | x) << y; + tau4 = (u64)((1 << x_w) | x) << y; /* val in hwmon interface units (millisec) */ out = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w); @@ -211,7 +211,7 @@ hwm_power1_max_interval_store(struct device *dev, r = FIELD_PREP(PKG_MAX_WIN, PKG_MAX_WIN_DEFAULT); x = REG_FIELD_GET(PKG_MAX_WIN_X, r); y = REG_FIELD_GET(PKG_MAX_WIN_Y, r); - tau4 = ((1 << x_w) | x) << y; + tau4 = (u64)((1 << x_w) | x) << y; max_win = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w); if (val > max_win) From 6472e3217fe5d01e10ef72cd26650b4f99591947 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Thu, 14 Dec 2023 00:05:26 +0200 Subject: [PATCH 1259/1397] drm/i915/mtl: Fix HDMI/DP PLL clock selection [ Upstream commit dbcab554f777390d9bb6a808ed0cd90ee59bb44e ] Select the HDMI specific PLL clock only for HDMI outputs. Fixes: 62618c7f117e ("drm/i915/mtl: C20 PLL programming") Cc: Mika Kahola Cc: Radhakrishna Sripada Reviewed-by: Radhakrishna Sripada Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20231213220526.1828827-1-imre.deak@intel.com (cherry picked from commit 937d02cc79c6828fef28a4d80d8d0ad2f7bf2b62) Signed-off-by: Jani Nikula Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/display/intel_cx0_phy.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c index 80e4ec6ee403..048e581fda16 100644 --- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c +++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c @@ -2420,7 +2420,8 @@ static void intel_program_port_clock_ctl(struct intel_encoder *encoder, val |= XELPDP_FORWARD_CLOCK_UNGATE; - if (is_hdmi_frl(crtc_state->port_clock)) + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI) && + is_hdmi_frl(crtc_state->port_clock)) val |= XELPDP_DDI_CLOCK_SELECT(XELPDP_DDI_CLOCK_SELECT_DIV18CLK); else val |= XELPDP_DDI_CLOCK_SELECT(XELPDP_DDI_CLOCK_SELECT_MAXPCLK); From b965c22e1a0b376aef1fa9f4bab970866b8534f8 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Mon, 18 Dec 2023 15:56:52 +0100 Subject: [PATCH 1260/1397] ASoC: hdmi-codec: fix missing report for jack initial status [ Upstream commit 025222a9d6d25eee2ad9a1bb5a8b29b34b5ba576 ] This fixes a problem introduced while fixing ELD reporting with no jack set. Most driver using the hdmi-codec will call the 'plugged_cb' callback directly when registered to report the initial state of the HDMI connector. With the commit mentionned, this occurs before jack is ready and the initial report is lost for platforms actually providing a jack for HDMI. Fix this by storing the hdmi connector status regardless of jack being set or not and report the last status when jack gets set. With this, the initial state is reported correctly even if it is disconnected. This was not done initially and is also a fix. Fixes: 15be353d55f9 ("ASoC: hdmi-codec: register hpd callback on component probe") Reported-by: Zhengqiao Xia Closes: https://lore.kernel.org/alsa-devel/CADYyEwTNyY+fR9SgfDa-g6iiDwkU3MUdPVCYexs2_3wbcM8_vg@mail.gmail.com/ Cc: Hsin-Yi Wang Tested-by: Zhengqiao Xia Signed-off-by: Jerome Brunet Link: https://msgid.link/r/20231218145655.134929-1-jbrunet@baylibre.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/hdmi-codec.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/hdmi-codec.c b/sound/soc/codecs/hdmi-codec.c index 20da1eaa4f1c..0938671700c6 100644 --- a/sound/soc/codecs/hdmi-codec.c +++ b/sound/soc/codecs/hdmi-codec.c @@ -850,8 +850,9 @@ static int hdmi_dai_probe(struct snd_soc_dai *dai) static void hdmi_codec_jack_report(struct hdmi_codec_priv *hcp, unsigned int jack_status) { - if (hcp->jack && jack_status != hcp->jack_status) { - snd_soc_jack_report(hcp->jack, jack_status, SND_JACK_LINEOUT); + if (jack_status != hcp->jack_status) { + if (hcp->jack) + snd_soc_jack_report(hcp->jack, jack_status, SND_JACK_LINEOUT); hcp->jack_status = jack_status; } } @@ -880,6 +881,13 @@ static int hdmi_codec_set_jack(struct snd_soc_component *component, if (hcp->hcd.ops->hook_plugged_cb) { hcp->jack = jack; + + /* + * Report the initial jack status which may have been provided + * by the parent hdmi driver while the hpd hook was registered. + */ + snd_soc_jack_report(jack, hcp->jack_status, SND_JACK_LINEOUT); + return 0; } From fb0f25c8fe6d8ae45bfb24f869ea8c41d35e979a Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Tue, 19 Dec 2023 10:30:57 +0800 Subject: [PATCH 1261/1397] ASoC: fsl_sai: Fix channel swap issue on i.MX8MP [ Upstream commit 8f0f01647550daf9cd8752c1656dcb0136d79ce1 ] When flag mclk_with_tere and mclk_direction_output enabled, The SAI transmitter or receiver will be enabled in very early stage, that if FSL_SAI_xMR is set by previous case, for example previous case is one channel, current case is two channels, then current case started with wrong xMR in the beginning, then channel swap happen. The patch is to clear xMR in hw_free() to avoid such channel swap issue. Fixes: 3e4a82612998 ("ASoC: fsl_sai: MCLK bind with TX/RX enable bit") Signed-off-by: Shengjiu Wang Reviewed-by: Daniel Baluta Link: https://msgid.link/r/1702953057-4499-1-git-send-email-shengjiu.wang@nxp.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/fsl/fsl_sai.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c index 3252eefc4bc0..3d202398c541 100644 --- a/sound/soc/fsl/fsl_sai.c +++ b/sound/soc/fsl/fsl_sai.c @@ -715,6 +715,9 @@ static int fsl_sai_hw_free(struct snd_pcm_substream *substream, bool tx = substream->stream == SNDRV_PCM_STREAM_PLAYBACK; unsigned int ofs = sai->soc_data->reg_offset; + /* Clear xMR to avoid channel swap with mclk_with_tere enabled case */ + regmap_write(sai->regmap, FSL_SAI_xMR(tx), 0); + regmap_update_bits(sai->regmap, FSL_SAI_xCR3(tx, ofs), FSL_SAI_CR3_TRCE_MASK, 0); From 5fcd03fa028ba2d77717de41197fd5baa5bc7c5c Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 30 Nov 2023 09:43:24 +0800 Subject: [PATCH 1262/1397] i2c: qcom-geni: fix missing clk_disable_unprepare() and geni_se_resources_off() [ Upstream commit 043465b66506e8c647cdd38a2db1f2ee0f369a1b ] Add missing clk_disable_unprepare() and geni_se_resources_off() in the error path in geni_i2c_probe(). Fixes: 14d02fbadb5d ("i2c: qcom-geni: add desc struct to prepare support for I2C Master Hub variant") Signed-off-by: Yang Yingliang Reviewed-by: Andi Shyti Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-qcom-geni.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index 229353e96e09..0a9d389df301 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -857,6 +857,7 @@ static int geni_i2c_probe(struct platform_device *pdev) ret = geni_se_resources_on(&gi2c->se); if (ret) { dev_err(dev, "Error turning on resources %d\n", ret); + clk_disable_unprepare(gi2c->core_clk); return ret; } proto = geni_se_read_proto(&gi2c->se); @@ -876,8 +877,11 @@ static int geni_i2c_probe(struct platform_device *pdev) /* FIFO is disabled, so we can only use GPI DMA */ gi2c->gpi_mode = true; ret = setup_gpi_dma(gi2c); - if (ret) + if (ret) { + geni_se_resources_off(&gi2c->se); + clk_disable_unprepare(gi2c->core_clk); return dev_err_probe(dev, ret, "Failed to setup GPI DMA mode\n"); + } dev_dbg(dev, "Using GPI DMA mode for I2C\n"); } else { @@ -890,6 +894,8 @@ static int geni_i2c_probe(struct platform_device *pdev) if (!tx_depth) { dev_err(dev, "Invalid TX FIFO depth\n"); + geni_se_resources_off(&gi2c->se); + clk_disable_unprepare(gi2c->core_clk); return -EINVAL; } From 63835e3f4e0b3046584945ec9723e515f180ee58 Mon Sep 17 00:00:00 2001 From: ZhenGuo Yin Date: Tue, 19 Dec 2023 14:39:42 +0800 Subject: [PATCH 1263/1397] drm/amdgpu: re-create idle bo's PTE during VM state machine reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 4a0057afa35872a5f2e65576785844688dd9fa5e ] Idle bo's PTE needs to be re-created when resetting VM state machine. Set idle bo's vm_bo as moved to mark it as invalid. Fixes: 55bf196f60df ("drm/amdgpu: reset VM when an error is detected") Signed-off-by: ZhenGuo Yin Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 89c8e51cd332..9fe1278fd586 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -285,6 +285,7 @@ static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm) list_for_each_entry_safe(vm_bo, tmp, &vm->idle, vm_status) { struct amdgpu_bo *bo = vm_bo->bo; + vm_bo->moved = true; if (!bo || bo->tbo.type != ttm_bo_type_kernel) list_move(&vm_bo->vm_status, &vm_bo->vm->moved); else if (bo->parent) From 8f21b05995b3a8f79b87b6057fe1c254b6bd8879 Mon Sep 17 00:00:00 2001 From: Quan Nguyen Date: Mon, 11 Dec 2023 17:22:16 +0700 Subject: [PATCH 1264/1397] i2c: aspeed: Handle the coalesced stop conditions with the start conditions. [ Upstream commit b4cc1cbba5195a4dd497cf2f8f09e7807977d543 ] Some masters may drive the transfers with low enough latency between the nak/stop phase of the current command and the start/address phase of the following command that the interrupts are coalesced by the time we process them. Handle the stop conditions before processing SLAVE_MATCH to fix the complaints that sometimes occur below. "aspeed-i2c-bus 1e78a040.i2c-bus: irq handled != irq. Expected 0x00000086, but was 0x00000084" Fixes: f9eb91350bb2 ("i2c: aspeed: added slave support for Aspeed I2C driver") Signed-off-by: Quan Nguyen Reviewed-by: Andrew Jeffery Reviewed-by: Andi Shyti Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-aspeed.c | 48 ++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c index 28e2a5fc4528..5511fd46a65e 100644 --- a/drivers/i2c/busses/i2c-aspeed.c +++ b/drivers/i2c/busses/i2c-aspeed.c @@ -249,18 +249,46 @@ static u32 aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus, u32 irq_status) if (!slave) return 0; - command = readl(bus->base + ASPEED_I2C_CMD_REG); + /* + * Handle stop conditions early, prior to SLAVE_MATCH. Some masters may drive + * transfers with low enough latency between the nak/stop phase of the current + * command and the start/address phase of the following command that the + * interrupts are coalesced by the time we process them. + */ + if (irq_status & ASPEED_I2CD_INTR_NORMAL_STOP) { + irq_handled |= ASPEED_I2CD_INTR_NORMAL_STOP; + bus->slave_state = ASPEED_I2C_SLAVE_STOP; + } + + if (irq_status & ASPEED_I2CD_INTR_TX_NAK && + bus->slave_state == ASPEED_I2C_SLAVE_READ_PROCESSED) { + irq_handled |= ASPEED_I2CD_INTR_TX_NAK; + bus->slave_state = ASPEED_I2C_SLAVE_STOP; + } + + /* Propagate any stop conditions to the slave implementation. */ + if (bus->slave_state == ASPEED_I2C_SLAVE_STOP) { + i2c_slave_event(slave, I2C_SLAVE_STOP, &value); + bus->slave_state = ASPEED_I2C_SLAVE_INACTIVE; + } - /* Slave was requested, restart state machine. */ + /* + * Now that we've dealt with any potentially coalesced stop conditions, + * address any start conditions. + */ if (irq_status & ASPEED_I2CD_INTR_SLAVE_MATCH) { irq_handled |= ASPEED_I2CD_INTR_SLAVE_MATCH; bus->slave_state = ASPEED_I2C_SLAVE_START; } - /* Slave is not currently active, irq was for someone else. */ + /* + * If the slave has been stopped and not started then slave interrupt + * handling is complete. + */ if (bus->slave_state == ASPEED_I2C_SLAVE_INACTIVE) return irq_handled; + command = readl(bus->base + ASPEED_I2C_CMD_REG); dev_dbg(bus->dev, "slave irq status 0x%08x, cmd 0x%08x\n", irq_status, command); @@ -279,17 +307,6 @@ static u32 aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus, u32 irq_status) irq_handled |= ASPEED_I2CD_INTR_RX_DONE; } - /* Slave was asked to stop. */ - if (irq_status & ASPEED_I2CD_INTR_NORMAL_STOP) { - irq_handled |= ASPEED_I2CD_INTR_NORMAL_STOP; - bus->slave_state = ASPEED_I2C_SLAVE_STOP; - } - if (irq_status & ASPEED_I2CD_INTR_TX_NAK && - bus->slave_state == ASPEED_I2C_SLAVE_READ_PROCESSED) { - irq_handled |= ASPEED_I2CD_INTR_TX_NAK; - bus->slave_state = ASPEED_I2C_SLAVE_STOP; - } - switch (bus->slave_state) { case ASPEED_I2C_SLAVE_READ_REQUESTED: if (unlikely(irq_status & ASPEED_I2CD_INTR_TX_ACK)) @@ -324,8 +341,7 @@ static u32 aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus, u32 irq_status) i2c_slave_event(slave, I2C_SLAVE_WRITE_RECEIVED, &value); break; case ASPEED_I2C_SLAVE_STOP: - i2c_slave_event(slave, I2C_SLAVE_STOP, &value); - bus->slave_state = ASPEED_I2C_SLAVE_INACTIVE; + /* Stop event handling is done early. Unreachable. */ break; case ASPEED_I2C_SLAVE_START: /* Slave was just started. Waiting for the next event. */; From 6214f5c966f0ff0a31d1487c89b725fea847c8e0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 4 Dec 2023 09:47:01 +0100 Subject: [PATCH 1265/1397] x86/xen: add CPU dependencies for 32-bit build [ Upstream commit 93cd0597649844a0fe7989839a3202735fb3ae67 ] Xen only supports modern CPUs even when running a 32-bit kernel, and it now requires a kernel built for a 64 byte (or larger) cache line: In file included from : In function 'xen_vcpu_setup', inlined from 'xen_vcpu_setup_restore' at arch/x86/xen/enlighten.c:111:3, inlined from 'xen_vcpu_restore' at arch/x86/xen/enlighten.c:141:3: include/linux/compiler_types.h:435:45: error: call to '__compiletime_assert_287' declared with attribute error: BUILD_BUG_ON failed: sizeof(*vcpup) > SMP_CACHE_BYTES arch/x86/xen/enlighten.c:166:9: note: in expansion of macro 'BUILD_BUG_ON' 166 | BUILD_BUG_ON(sizeof(*vcpup) > SMP_CACHE_BYTES); | ^~~~~~~~~~~~ Enforce the dependency with a whitelist of CPU configurations. In normal distro kernels, CONFIG_X86_GENERIC is enabled, and this works fine. When this is not set, still allow Xen to be built on kernels that target a 64-bit capable CPU. Fixes: db2832309a82 ("x86/xen: fix percpu vcpu_info allocation") Signed-off-by: Arnd Bergmann Reviewed-by: Juergen Gross Tested-by: Alyssa Ross Link: https://lore.kernel.org/r/20231204084722.3789473-1-arnd@kernel.org Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- arch/x86/xen/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 9b1ec5d8c99c..a65fc2ae15b4 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -9,6 +9,7 @@ config XEN select PARAVIRT_CLOCK select X86_HV_CALLBACK_VECTOR depends on X86_64 || (X86_32 && X86_PAE) + depends on X86_64 || (X86_GENERIC || MPENTIUM4 || MCORE2 || MATOM || MK8) depends on X86_LOCAL_APIC && X86_TSC help This is the Linux Xen port. Enabling this will allow the From 3ccefdae5817072cb4254450125f17acba9a1510 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexis=20Lothor=C3=A9?= Date: Fri, 15 Dec 2023 22:34:24 +0100 Subject: [PATCH 1266/1397] pinctrl: at91-pio4: use dedicated lock class for IRQ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 14694179e561b5f2f7e56a0f590e2cb49a9cc7ab ] Trying to suspend to RAM on SAMA5D27 EVK leads to the following lockdep warning: ============================================ WARNING: possible recursive locking detected 6.7.0-rc5-wt+ #532 Not tainted -------------------------------------------- sh/92 is trying to acquire lock: c3cf306c (&irq_desc_lock_class){-.-.}-{2:2}, at: __irq_get_desc_lock+0xe8/0x100 but task is already holding lock: c3d7c46c (&irq_desc_lock_class){-.-.}-{2:2}, at: __irq_get_desc_lock+0xe8/0x100 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&irq_desc_lock_class); lock(&irq_desc_lock_class); *** DEADLOCK *** May be due to missing lock nesting notation 6 locks held by sh/92: #0: c3aa0258 (sb_writers#6){.+.+}-{0:0}, at: ksys_write+0xd8/0x178 #1: c4c2df44 (&of->mutex){+.+.}-{3:3}, at: kernfs_fop_write_iter+0x138/0x284 #2: c32684a0 (kn->active){.+.+}-{0:0}, at: kernfs_fop_write_iter+0x148/0x284 #3: c232b6d4 (system_transition_mutex){+.+.}-{3:3}, at: pm_suspend+0x13c/0x4e8 #4: c387b088 (&dev->mutex){....}-{3:3}, at: __device_suspend+0x1e8/0x91c #5: c3d7c46c (&irq_desc_lock_class){-.-.}-{2:2}, at: __irq_get_desc_lock+0xe8/0x100 stack backtrace: CPU: 0 PID: 92 Comm: sh Not tainted 6.7.0-rc5-wt+ #532 Hardware name: Atmel SAMA5 unwind_backtrace from show_stack+0x18/0x1c show_stack from dump_stack_lvl+0x34/0x48 dump_stack_lvl from __lock_acquire+0x19ec/0x3a0c __lock_acquire from lock_acquire.part.0+0x124/0x2d0 lock_acquire.part.0 from _raw_spin_lock_irqsave+0x5c/0x78 _raw_spin_lock_irqsave from __irq_get_desc_lock+0xe8/0x100 __irq_get_desc_lock from irq_set_irq_wake+0xa8/0x204 irq_set_irq_wake from atmel_gpio_irq_set_wake+0x58/0xb4 atmel_gpio_irq_set_wake from irq_set_irq_wake+0x100/0x204 irq_set_irq_wake from gpio_keys_suspend+0xec/0x2b8 gpio_keys_suspend from dpm_run_callback+0xe4/0x248 dpm_run_callback from __device_suspend+0x234/0x91c __device_suspend from dpm_suspend+0x224/0x43c dpm_suspend from dpm_suspend_start+0x9c/0xa8 dpm_suspend_start from suspend_devices_and_enter+0x1e0/0xa84 suspend_devices_and_enter from pm_suspend+0x460/0x4e8 pm_suspend from state_store+0x78/0xe4 state_store from kernfs_fop_write_iter+0x1a0/0x284 kernfs_fop_write_iter from vfs_write+0x38c/0x6f4 vfs_write from ksys_write+0xd8/0x178 ksys_write from ret_fast_syscall+0x0/0x1c Exception stack(0xc52b3fa8 to 0xc52b3ff0) 3fa0: 00000004 005a0ae8 00000001 005a0ae8 00000004 00000001 3fc0: 00000004 005a0ae8 00000001 00000004 00000004 b6c616c0 00000020 0059d190 3fe0: 00000004 b6c61678 aec5a041 aebf1a26 This warning is raised because pinctrl-at91-pio4 uses chained IRQ. Whenever a wake up source configures an IRQ through irq_set_irq_wake, it will lock the corresponding IRQ desc, and then call irq_set_irq_wake on "parent" IRQ which will do the same on its own IRQ desc, but since those two locks share the same class, lockdep reports this as an issue. Fix lockdep false positive by setting a different class for parent and children IRQ Fixes: 776180848b57 ("pinctrl: introduce driver for Atmel PIO4 controller") Signed-off-by: Alexis Lothoré Link: https://lore.kernel.org/r/20231215-lockdep_warning-v1-1-8137b2510ed5@bootlin.com Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/pinctrl-at91-pio4.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/pinctrl/pinctrl-at91-pio4.c b/drivers/pinctrl/pinctrl-at91-pio4.c index 383309e533c3..a27c01fcbb47 100644 --- a/drivers/pinctrl/pinctrl-at91-pio4.c +++ b/drivers/pinctrl/pinctrl-at91-pio4.c @@ -1068,6 +1068,13 @@ static const struct of_device_id atmel_pctrl_of_match[] = { } }; +/* + * This lock class allows to tell lockdep that parent IRQ and children IRQ do + * not share the same class so it does not raise false positive + */ +static struct lock_class_key atmel_lock_key; +static struct lock_class_key atmel_request_key; + static int atmel_pinctrl_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -1214,6 +1221,7 @@ static int atmel_pinctrl_probe(struct platform_device *pdev) irq_set_chip_and_handler(irq, &atmel_gpio_irq_chip, handle_simple_irq); irq_set_chip_data(irq, atmel_pioctrl); + irq_set_lockdep_class(irq, &atmel_lock_key, &atmel_request_key); dev_dbg(dev, "atmel gpio irq domain: hwirq: %d, linux irq: %d\n", i, irq); From 249cbac4454f1c56f2b3f8dfe40f04c3cb82d82f Mon Sep 17 00:00:00 2001 From: Kent Gibson Date: Thu, 21 Dec 2023 09:20:36 +0800 Subject: [PATCH 1267/1397] gpiolib: cdev: add gpio_device locking wrapper around gpio_ioctl() [ Upstream commit 1d656bd259edb89dc1d9938ec5c5389867088546 ] While the GPIO cdev gpio_ioctl() call is in progress, the kernel can call gpiochip_remove() which will set gdev->chip to NULL, after which any subsequent access will cause a crash. gpio_ioctl() was overlooked by the previous fix to protect syscalls (bdbbae241a04), so add protection for that. Fixes: bdbbae241a04 ("gpiolib: protect the GPIO device against being dropped while in use by user-space") Fixes: d7c51b47ac11 ("gpio: userspace ABI for reading/writing GPIO lines") Fixes: 3c0d9c635ae2 ("gpiolib: cdev: support GPIO_V2_GET_LINE_IOCTL and GPIO_V2_LINE_GET_VALUES_IOCTL") Fixes: aad955842d1c ("gpiolib: cdev: support GPIO_V2_GET_LINEINFO_IOCTL and GPIO_V2_GET_LINEINFO_WATCH_IOCTL") Signed-off-by: Kent Gibson Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- drivers/gpio/gpiolib-cdev.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c index e39d344feb28..4f3e66ece7f7 100644 --- a/drivers/gpio/gpiolib-cdev.c +++ b/drivers/gpio/gpiolib-cdev.c @@ -2482,10 +2482,7 @@ static int lineinfo_unwatch(struct gpio_chardev_data *cdev, void __user *ip) return 0; } -/* - * gpio_ioctl() - ioctl handler for the GPIO chardev - */ -static long gpio_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +static long gpio_ioctl_unlocked(struct file *file, unsigned int cmd, unsigned long arg) { struct gpio_chardev_data *cdev = file->private_data; struct gpio_device *gdev = cdev->gdev; @@ -2522,6 +2519,17 @@ static long gpio_ioctl(struct file *file, unsigned int cmd, unsigned long arg) } } +/* + * gpio_ioctl() - ioctl handler for the GPIO chardev + */ +static long gpio_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct gpio_chardev_data *cdev = file->private_data; + + return call_ioctl_locked(file, cmd, arg, cdev->gdev, + gpio_ioctl_unlocked); +} + #ifdef CONFIG_COMPAT static long gpio_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg) From fedbc8732fd06d77559c8e92f43e12b69c178d7c Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Tue, 19 Dec 2023 17:48:23 +0100 Subject: [PATCH 1268/1397] nvme-pci: fix sleeping function called from interrupt context [ Upstream commit f6fe0b2d35457c10ec37acc209d19726bdc16dbd ] the nvme_handle_cqe() interrupt handler calls nvme_complete_async_event() but the latter may call nvme_auth_stop() which is a blocking function. Sleeping functions can't be called in interrupt context BUG: sleeping function called from invalid context in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 0, name: swapper/15 Call Trace: __cancel_work_timer+0x31e/0x460 ? nvme_change_ctrl_state+0xcf/0x3c0 [nvme_core] ? nvme_change_ctrl_state+0xcf/0x3c0 [nvme_core] nvme_complete_async_event+0x365/0x480 [nvme_core] nvme_poll_cq+0x262/0xe50 [nvme] Fix the bug by moving nvme_auth_stop() to fw_act_work (executed by the nvme_wq workqueue) Fixes: f50fff73d620 ("nvme: implement In-Band authentication") Signed-off-by: Maurizio Lombardi Reviewed-by: Jens Axboe Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/host/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index d5c8b0a08d49..b32e3cff37b1 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4100,6 +4100,8 @@ static void nvme_fw_act_work(struct work_struct *work) struct nvme_ctrl, fw_act_work); unsigned long fw_act_timeout; + nvme_auth_stop(ctrl); + if (ctrl->mtfa) fw_act_timeout = jiffies + msecs_to_jiffies(ctrl->mtfa * 100); @@ -4155,7 +4157,6 @@ static bool nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result) * firmware activation. */ if (nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) { - nvme_auth_stop(ctrl); requeue = false; queue_work(nvme_wq, &ctrl->fw_act_work); } From 1d42af06c6d2dc4b4f6bb84b6067989ca5ebf34f Mon Sep 17 00:00:00 2001 From: Mike Tipton Date: Wed, 25 Oct 2023 07:58:29 -0700 Subject: [PATCH 1269/1397] interconnect: Treat xlate() returning NULL node as an error [ Upstream commit ad2ab1297d0c80899125a842bb7a078abfe1e6ce ] Currently, if provider->xlate() or provider->xlate_extended() "successfully" return a NULL node, then of_icc_get_from_provider() won't consider that an error and will successfully return the NULL node. This bypasses error handling in of_icc_get_by_index() and leads to NULL dereferences in path_find(). This could be avoided by ensuring provider callbacks always return an error for NULL nodes, but it's better to explicitly protect against this in the common framework. Fixes: 87e3031b6fbd ("interconnect: Allow endpoints translation via DT") Signed-off-by: Mike Tipton Link: https://lore.kernel.org/r/20231025145829.11603-1-quic_mdtipton@quicinc.com Signed-off-by: Georgi Djakov Signed-off-by: Sasha Levin --- drivers/interconnect/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c index dfab160ca529..50bac2d79d9b 100644 --- a/drivers/interconnect/core.c +++ b/drivers/interconnect/core.c @@ -395,6 +395,9 @@ struct icc_node_data *of_icc_get_from_provider(struct of_phandle_args *spec) } mutex_unlock(&icc_lock); + if (!node) + return ERR_PTR(-EINVAL); + if (IS_ERR(node)) return ERR_CAST(node); From 040c16b5d93411041df309e09c5cbfc50775e584 Mon Sep 17 00:00:00 2001 From: Su Hui Date: Mon, 30 Oct 2023 10:02:19 +0800 Subject: [PATCH 1270/1397] iio: imu: inv_mpu6050: fix an error code problem in inv_mpu6050_read_raw [ Upstream commit c3df0e29fb7788c4b3ddf37d5ed87dda2b822943 ] inv_mpu6050_sensor_show() can return -EINVAL or IIO_VAL_INT. Return the true value rather than only return IIO_VAL_INT. Fixes: d5098447147c ("iio: imu: mpu6050: add calibration offset support") Signed-off-by: Su Hui Link: https://lore.kernel.org/r/20231030020218.65728-1-suhui@nfschina.com Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/imu/inv_mpu6050/inv_mpu_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c index 29f906c884bd..a9a5fb266ef1 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c @@ -749,13 +749,13 @@ inv_mpu6050_read_raw(struct iio_dev *indio_dev, ret = inv_mpu6050_sensor_show(st, st->reg->gyro_offset, chan->channel2, val); mutex_unlock(&st->lock); - return IIO_VAL_INT; + return ret; case IIO_ACCEL: mutex_lock(&st->lock); ret = inv_mpu6050_sensor_show(st, st->reg->accl_offset, chan->channel2, val); mutex_unlock(&st->lock); - return IIO_VAL_INT; + return ret; default: return -EINVAL; From 5832822a4acb92401b2056f0db112e91242cac35 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Thu, 30 Nov 2023 15:04:45 +0100 Subject: [PATCH 1271/1397] interconnect: qcom: sm8250: Enable sync_state [ Upstream commit bfc7db1cb94ad664546d70212699f8cc6c539e8c ] Add the generic icc sync_state callback to ensure interconnect votes are taken into account, instead of being pegged at maximum values. Fixes: b95b668eaaa2 ("interconnect: qcom: icc-rpmh: Add BCMs to commit list in pre_aggregate") Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20231130-topic-8250icc_syncstate-v1-1-7ce78ba6e04c@linaro.org Signed-off-by: Georgi Djakov Signed-off-by: Sasha Levin --- drivers/interconnect/qcom/sm8250.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/interconnect/qcom/sm8250.c b/drivers/interconnect/qcom/sm8250.c index 661dc18d99db..2a2f56b99373 100644 --- a/drivers/interconnect/qcom/sm8250.c +++ b/drivers/interconnect/qcom/sm8250.c @@ -1995,6 +1995,7 @@ static struct platform_driver qnoc_driver = { .driver = { .name = "qnoc-sm8250", .of_match_table = qnoc_of_match, + .sync_state = icc_sync_state, }, }; module_platform_driver(qnoc_driver); From 2a83824c6cf9ab81062c5556681d1aa297bd2307 Mon Sep 17 00:00:00 2001 From: Haoran Liu Date: Sun, 3 Dec 2023 19:00:23 +0000 Subject: [PATCH 1272/1397] Input: ipaq-micro-keys - add error handling for devm_kmemdup [ Upstream commit 59b6a747e2d39227ac2325c5e29d6ab3bb070c2a ] Check the return value of i2c_add_adapter. Static analysis revealed that the function did not properly handle potential failures of i2c_add_adapter, which could lead to partial initialization of the I2C adapter and unstable operation. Signed-off-by: Haoran Liu Link: https://lore.kernel.org/r/20231203164653.38983-1-liuhaoran14@163.com Fixes: d7535ffa427b ("Input: driver for microcontroller keys on the iPaq h3xxx") Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/keyboard/ipaq-micro-keys.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/input/keyboard/ipaq-micro-keys.c b/drivers/input/keyboard/ipaq-micro-keys.c index 7b509bce2b33..1d71dd79ffd2 100644 --- a/drivers/input/keyboard/ipaq-micro-keys.c +++ b/drivers/input/keyboard/ipaq-micro-keys.c @@ -105,6 +105,9 @@ static int micro_key_probe(struct platform_device *pdev) keys->codes = devm_kmemdup(&pdev->dev, micro_keycodes, keys->input->keycodesize * keys->input->keycodemax, GFP_KERNEL); + if (!keys->codes) + return -ENOMEM; + keys->input->keycode = keys->codes; __set_bit(EV_KEY, keys->input->evbit); From e40e75a2e554ec53cd01caff680cc0d3e0dd3dfa Mon Sep 17 00:00:00 2001 From: George Stark Date: Tue, 28 Nov 2023 02:55:58 +0300 Subject: [PATCH 1273/1397] iio: adc: meson: add separate config for axg SoC family [ Upstream commit 59b75dcb0953813676b5030877f3f37cedaed87d ] According to Amlogic custom kernels ADC of axg SoC family has vref_select and requires this setting to work nominally and thus needs a separate config. Fixes: 90c6241860bf ("iio: adc: meson: init voltage control bits") Signed-off-by: George Stark Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20231127235558.71995-1-gnstark@salutedevices.com Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/adc/meson_saradc.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/iio/adc/meson_saradc.c b/drivers/iio/adc/meson_saradc.c index 320e3e7e3d4d..57cfabe80c82 100644 --- a/drivers/iio/adc/meson_saradc.c +++ b/drivers/iio/adc/meson_saradc.c @@ -1239,6 +1239,20 @@ static const struct meson_sar_adc_param meson_sar_adc_gxl_param = { .cmv_select = 1, }; +static const struct meson_sar_adc_param meson_sar_adc_axg_param = { + .has_bl30_integration = true, + .clock_rate = 1200000, + .bandgap_reg = MESON_SAR_ADC_REG11, + .regmap_config = &meson_sar_adc_regmap_config_gxbb, + .resolution = 12, + .disable_ring_counter = 1, + .has_reg11 = true, + .vref_volatge = 1, + .has_vref_select = true, + .vref_select = VREF_VDDA, + .cmv_select = 1, +}; + static const struct meson_sar_adc_param meson_sar_adc_g12a_param = { .has_bl30_integration = false, .clock_rate = 1200000, @@ -1283,7 +1297,7 @@ static const struct meson_sar_adc_data meson_sar_adc_gxm_data = { }; static const struct meson_sar_adc_data meson_sar_adc_axg_data = { - .param = &meson_sar_adc_gxl_param, + .param = &meson_sar_adc_axg_param, .name = "meson-axg-saradc", }; From 92ba459b8d63a642441a8408d874c3429bbd97ba Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 14 Nov 2022 11:06:26 +0000 Subject: [PATCH 1274/1397] scsi: bnx2fc: Fix skb double free in bnx2fc_rcv() [ Upstream commit 08c94d80b2da481652fb633e79cbc41e9e326a91 ] skb_share_check() already drops the reference to the skb when returning NULL. Using kfree_skb() in the error handling path leads to an skb double free. Fix this by removing the variable tmp_skb, and return directly when skb_share_check() returns NULL. Fixes: 01a4cc4d0cd6 ("bnx2fc: do not add shared skbs to the fcoe_rx_list") Signed-off-by: Wei Yongjun Link: https://lore.kernel.org/r/20221114110626.526643-1-weiyongjun@huaweicloud.com Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/bnx2fc/bnx2fc_fcoe.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c index 05ddbb9bb7d8..451a58e0fd96 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c +++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c @@ -429,7 +429,6 @@ static int bnx2fc_rcv(struct sk_buff *skb, struct net_device *dev, struct fcoe_ctlr *ctlr; struct fcoe_rcv_info *fr; struct fcoe_percpu_s *bg; - struct sk_buff *tmp_skb; interface = container_of(ptype, struct bnx2fc_interface, fcoe_packet_type); @@ -441,11 +440,9 @@ static int bnx2fc_rcv(struct sk_buff *skb, struct net_device *dev, goto err; } - tmp_skb = skb_share_check(skb, GFP_ATOMIC); - if (!tmp_skb) - goto err; - - skb = tmp_skb; + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + return -1; if (unlikely(eth_hdr(skb)->h_proto != htons(ETH_P_FCOE))) { printk(KERN_ERR PFX "bnx2fc_rcv: Wrong FC type frame\n"); From a8409bcc324347bcb6abff033fffc3ab8be1c819 Mon Sep 17 00:00:00 2001 From: ChanWoo Lee Date: Fri, 15 Dec 2023 09:38:12 +0900 Subject: [PATCH 1275/1397] scsi: ufs: qcom: Return ufs_qcom_clk_scale_*() errors in ufs_qcom_clk_scale_notify() [ Upstream commit 9264fd61e628ce180a168e6b90bde134dd49ec28 ] In commit 031312dbc695 ("scsi: ufs: ufs-qcom: Remove unnecessary goto statements") the error handling was accidentally changed, resulting in the error of ufs_qcom_clk_scale_*() calls not being returned. This is the case I checked: ufs_qcom_clk_scale_notify -> 'ufs_qcom_clk_scale_up_/down_pre_change' error -> return 0; Make sure those errors are properly returned. Fixes: 031312dbc695 ("scsi: ufs: ufs-qcom: Remove unnecessary goto statements") Reviewed-by: Manivannan Sadhasivam Signed-off-by: ChanWoo Lee Link: https://lore.kernel.org/r/20231215003812.29650-1-cw9316.lee@samsung.com Reviewed-by: Andrew Halaney Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/ufs/host/ufs-qcom.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c index b1d720031251..fc40726e13c2 100644 --- a/drivers/ufs/host/ufs-qcom.c +++ b/drivers/ufs/host/ufs-qcom.c @@ -1399,9 +1399,11 @@ static int ufs_qcom_clk_scale_notify(struct ufs_hba *hba, err = ufs_qcom_clk_scale_up_pre_change(hba); else err = ufs_qcom_clk_scale_down_pre_change(hba); - if (err) - ufshcd_uic_hibern8_exit(hba); + if (err) { + ufshcd_uic_hibern8_exit(hba); + return err; + } } else { if (scale_up) err = ufs_qcom_clk_scale_up_post_change(hba); From 1f7213358d846079b88c22ab8b7eb1f5652037c2 Mon Sep 17 00:00:00 2001 From: Can Guo Date: Mon, 18 Dec 2023 07:32:17 -0800 Subject: [PATCH 1276/1397] scsi: ufs: core: Let the sq_lock protect sq_tail_slot access [ Upstream commit 04c116e2bdfc3969f9819d2cebfdf678353c354c ] When accessing sq_tail_slot without protection from sq_lock, a race condition can cause multiple SQEs to be copied to duplicate SQE slots. This can lead to multiple stability issues. Fix this by moving the *dest initialization in ufshcd_send_command() back under protection from the sq_lock. Fixes: 3c85f087faec ("scsi: ufs: mcq: Use pointer arithmetic in ufshcd_send_command()") Signed-off-by: Can Guo Link: https://lore.kernel.org/r/1702913550-20631-1-git-send-email-quic_cang@quicinc.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/ufs/core/ufshcd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 170fbd5715b2..a885cc177cff 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -2172,9 +2172,10 @@ void ufshcd_send_command(struct ufs_hba *hba, unsigned int task_tag, if (is_mcq_enabled(hba)) { int utrd_size = sizeof(struct utp_transfer_req_desc); struct utp_transfer_req_desc *src = lrbp->utr_descriptor_ptr; - struct utp_transfer_req_desc *dest = hwq->sqe_base_addr + hwq->sq_tail_slot; + struct utp_transfer_req_desc *dest; spin_lock(&hwq->sq_lock); + dest = hwq->sqe_base_addr + hwq->sq_tail_slot; memcpy(dest, src, utrd_size); ufshcd_inc_sq_tail(hwq); spin_unlock(&hwq->sq_lock); From 60d6d2704056bd83adec09d8717319d844593e8a Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Thu, 19 Oct 2023 16:23:56 +0300 Subject: [PATCH 1277/1397] iio: kx022a: Fix acceleration value scaling commit 92bfa4ab1b79be95c4f52d13f5386390f0a513c2 upstream. The IIO ABI mandates acceleration values from accelerometer to be emitted in m/s^2. The KX022A was emitting values in micro m/s^2. Fix driver to report the correct scale values. Signed-off-by: Matti Vaittinen Reported-by: Jagath Jog J Fixes: 7c1d1677b322 ("iio: accel: Support Kionix/ROHM KX022A accelerometer") Tested-by: Jagath Jog J Link: https://lore.kernel.org/r/ZTEt7NqfDHPOkm8j@dc78bmyyyyyyyyyyyyydt-3.rev.dnainternet.fi Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/accel/kionix-kx022a.c | 37 ++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/drivers/iio/accel/kionix-kx022a.c b/drivers/iio/accel/kionix-kx022a.c index 4ea3c6718ed4..971fc60efef0 100644 --- a/drivers/iio/accel/kionix-kx022a.c +++ b/drivers/iio/accel/kionix-kx022a.c @@ -273,17 +273,17 @@ static const unsigned int kx022a_odrs[] = { * (range / 2^bits) * g = (range / 2^bits) * 9.80665 m/s^2 * => KX022A uses 16 bit (HiRes mode - assume the low 8 bits are zeroed * in low-power mode(?) ) - * => +/-2G => 4 / 2^16 * 9,80665 * 10^6 (to scale to micro) - * => +/-2G - 598.550415 - * +/-4G - 1197.10083 - * +/-8G - 2394.20166 - * +/-16G - 4788.40332 + * => +/-2G => 4 / 2^16 * 9,80665 + * => +/-2G - 0.000598550415 + * +/-4G - 0.00119710083 + * +/-8G - 0.00239420166 + * +/-16G - 0.00478840332 */ static const int kx022a_scale_table[][2] = { - { 598, 550415 }, - { 1197, 100830 }, - { 2394, 201660 }, - { 4788, 403320 }, + { 0, 598550 }, + { 0, 1197101 }, + { 0, 2394202 }, + { 0, 4788403 }, }; static int kx022a_read_avail(struct iio_dev *indio_dev, @@ -302,7 +302,7 @@ static int kx022a_read_avail(struct iio_dev *indio_dev, *vals = (const int *)kx022a_scale_table; *length = ARRAY_SIZE(kx022a_scale_table) * ARRAY_SIZE(kx022a_scale_table[0]); - *type = IIO_VAL_INT_PLUS_MICRO; + *type = IIO_VAL_INT_PLUS_NANO; return IIO_AVAIL_LIST; default: return -EINVAL; @@ -366,6 +366,20 @@ static int kx022a_turn_on_unlock(struct kx022a_data *data) return ret; } +static int kx022a_write_raw_get_fmt(struct iio_dev *idev, + struct iio_chan_spec const *chan, + long mask) +{ + switch (mask) { + case IIO_CHAN_INFO_SCALE: + return IIO_VAL_INT_PLUS_NANO; + case IIO_CHAN_INFO_SAMP_FREQ: + return IIO_VAL_INT_PLUS_MICRO; + default: + return -EINVAL; + } +} + static int kx022a_write_raw(struct iio_dev *idev, struct iio_chan_spec const *chan, int val, int val2, long mask) @@ -510,7 +524,7 @@ static int kx022a_read_raw(struct iio_dev *idev, kx022a_reg2scale(regval, val, val2); - return IIO_VAL_INT_PLUS_MICRO; + return IIO_VAL_INT_PLUS_NANO; } return -EINVAL; @@ -712,6 +726,7 @@ static int kx022a_fifo_flush(struct iio_dev *idev, unsigned int samples) static const struct iio_info kx022a_info = { .read_raw = &kx022a_read_raw, .write_raw = &kx022a_write_raw, + .write_raw_get_fmt = &kx022a_write_raw_get_fmt, .read_avail = &kx022a_read_avail, .validate_trigger = iio_validate_own_trigger, From d79cb9bcc63c2a43bc512dcb98f6764e7bb0a0ec Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Thu, 16 Nov 2023 15:10:26 +0800 Subject: [PATCH 1278/1397] iio: adc: imx93: add four channels for imx93 adc commit 2475ecdb9b6e177b133cf26e64e8d441d37bebde upstream. According to the spec, this ADC totally support 8 channels. i.MX93 contain this ADC with 4 channels connected to pins in the package. i.MX95 contain this ADC with 8 channels connected to pins in the package. Signed-off-by: Haibo Chen Fixes: 7d02296ac8b8 ("iio: adc: add imx93 adc support") Link: https://lore.kernel.org/r/20231116071026.611269-1-haibo.chen@nxp.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/imx93_adc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/iio/adc/imx93_adc.c b/drivers/iio/adc/imx93_adc.c index dce9ec91e4a7..512d7b95b08e 100644 --- a/drivers/iio/adc/imx93_adc.c +++ b/drivers/iio/adc/imx93_adc.c @@ -93,6 +93,10 @@ static const struct iio_chan_spec imx93_adc_iio_channels[] = { IMX93_ADC_CHAN(1), IMX93_ADC_CHAN(2), IMX93_ADC_CHAN(3), + IMX93_ADC_CHAN(4), + IMX93_ADC_CHAN(5), + IMX93_ADC_CHAN(6), + IMX93_ADC_CHAN(7), }; static void imx93_adc_power_down(struct imx93_adc *adc) From 706b7aca24f4875a8bf4b9342048d40b1368cadb Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Thu, 26 Oct 2023 17:44:49 +0200 Subject: [PATCH 1279/1397] iio: common: ms_sensors: ms_sensors_i2c: fix humidity conversion time table commit 54cf39ec16335dadbe1ba008d8e5e98dae3e26f8 upstream. The HTU21 offers 4 sampling frequencies: 20, 40, 70 and 120, which are associated to an index that is used to select the right measurement resolution and its corresponding measurement time. The current implementation selects the measurement resolution and the temperature measurement time properly, but it does not select the right humidity measurement time in all cases. In summary, the 40 and 70 humidity measurement times are swapped. The reason for that is probably the unusual coding for the measurement resolution. According to the datasheet, the bits [7,0] of the "user register" are used as follows to select the bit resolution: -------------------------------------------------- | Bit 7 | Bit 0 | RH | Temp | Trh (us) | Tt (us) | -------------------------------------------------- | 0 | 0 | 12 | 14 | 16000 | 50000 | -------------------------------------------------- | 0 | 1 | 8 | 12 | 3000 | 13000 | -------------------------------------------------- | 1 | 0 | 10 | 13 | 5000 | 25000 | -------------------------------------------------- | 1 | 1 | 11 | 11 | 8000 | 7000 | -------------------------------------------------- *This table is available in the official datasheet, page 13/21. I have just appended the times provided in the humidity/temperature tables, pages 3/21, 5/21. Note that always a pair of resolutions is selected. The sampling frequencies [20, 40, 70, 120] are assigned to a linear index [0..3] which is then coded as follows [1]: Index [7,0] -------------- idx 0 0,0 idx 1 1,0 idx 2 0,1 idx 3 1,1 That is done that way because the temperature measurements are being used as the reference for the sampling frequency (the frequencies and the temperature measurement times are correlated), so increasing the index always reduces the temperature measurement time and its resolution. Therefore, the temperature measurement time array is as simple as [50000, 25000, 13000, 7000] On the other hand, the humidity resolution cannot follow the same pattern because of the way it is coded in the "user register", where both resolutions are selected at the same time. The humidity measurement time array is the following: [16000, 3000, 5000, 8000], which defines the following assignments: Index [7,0] Trh ----------------------- idx 0 0,0 16000 -> right, [0,0] selects 12 bits (Trh = 16000) idx 1 1,0 3000 -> wrong! [1,0] selects 10 bits (Trh = 5000) idx 2 0,1 5000 -> wrong! [0,1] selects 8 bits (Trh = 3000) idx 3 1,1 8000 -> right, [1,1] selects 11 bits (Trh = 8000) The times have been ordered as if idx = 1 -> [0,1] and idx = 2 -> [1,0], which is not the case for the reason explained above. So a simple modification is required to obtain the right humidity measurement time array, swapping the values in the positions 1 and 2. The right table should be the following: [16000, 5000, 3000, 8000] Fix the humidity measurement time array with the right idex/value coding. [1] The actual code that makes this coding and assigns it to the current value of the "user register" is the following: config_reg &= 0x7E; config_reg |= ((i & 1) << 7) + ((i & 2) >> 1); Fixes: d574a87cc311 ("Add meas-spec sensors common part") Signed-off-by: Javier Carrasco Link: https://lore.kernel.org/r/20231026-topic-htu21_conversion_time-v1-1-bd257dc44209@gmail.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/common/ms_sensors/ms_sensors_i2c.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/common/ms_sensors/ms_sensors_i2c.c b/drivers/iio/common/ms_sensors/ms_sensors_i2c.c index 6633b35a94e6..9c9bc77003c7 100644 --- a/drivers/iio/common/ms_sensors/ms_sensors_i2c.c +++ b/drivers/iio/common/ms_sensors/ms_sensors_i2c.c @@ -15,8 +15,8 @@ /* Conversion times in us */ static const u16 ms_sensors_ht_t_conversion_time[] = { 50000, 25000, 13000, 7000 }; -static const u16 ms_sensors_ht_h_conversion_time[] = { 16000, 3000, - 5000, 8000 }; +static const u16 ms_sensors_ht_h_conversion_time[] = { 16000, 5000, + 3000, 8000 }; static const u16 ms_sensors_tp_conversion_time[] = { 500, 1100, 2100, 4100, 8220, 16440 }; From 7a9c971743589b0afa31cd421392fe9e4b0f4af6 Mon Sep 17 00:00:00 2001 From: Nuno Sa Date: Thu, 2 Nov 2023 13:52:58 +0100 Subject: [PATCH 1280/1397] iio: imu: adis16475: add spi_device_id table commit ee4d79055aeea27f1b8c42233cc0c90d0a8b5355 upstream. This prevents the warning message "SPI driver has no spi_device_id for..." when registering the driver. More importantly, it makes sure that module autoloading works as spi relies on spi: modaliases and not of. While at it, move the of_device_id table to it's natural place. Fixes: fff7352bf7a3c ("iio: imu: Add support for adis16475") Signed-off-by: Nuno Sa Link: https://lore.kernel.org/r/20231102125258.3284830-1-nuno.sa@analog.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/imu/adis16475.c | 117 ++++++++++++++++++++++-------------- 1 file changed, 72 insertions(+), 45 deletions(-) diff --git a/drivers/iio/imu/adis16475.c b/drivers/iio/imu/adis16475.c index 17275a53ca2c..0a9951a50210 100644 --- a/drivers/iio/imu/adis16475.c +++ b/drivers/iio/imu/adis16475.c @@ -1244,50 +1244,6 @@ static int adis16475_config_irq_pin(struct adis16475 *st) return 0; } -static const struct of_device_id adis16475_of_match[] = { - { .compatible = "adi,adis16470", - .data = &adis16475_chip_info[ADIS16470] }, - { .compatible = "adi,adis16475-1", - .data = &adis16475_chip_info[ADIS16475_1] }, - { .compatible = "adi,adis16475-2", - .data = &adis16475_chip_info[ADIS16475_2] }, - { .compatible = "adi,adis16475-3", - .data = &adis16475_chip_info[ADIS16475_3] }, - { .compatible = "adi,adis16477-1", - .data = &adis16475_chip_info[ADIS16477_1] }, - { .compatible = "adi,adis16477-2", - .data = &adis16475_chip_info[ADIS16477_2] }, - { .compatible = "adi,adis16477-3", - .data = &adis16475_chip_info[ADIS16477_3] }, - { .compatible = "adi,adis16465-1", - .data = &adis16475_chip_info[ADIS16465_1] }, - { .compatible = "adi,adis16465-2", - .data = &adis16475_chip_info[ADIS16465_2] }, - { .compatible = "adi,adis16465-3", - .data = &adis16475_chip_info[ADIS16465_3] }, - { .compatible = "adi,adis16467-1", - .data = &adis16475_chip_info[ADIS16467_1] }, - { .compatible = "adi,adis16467-2", - .data = &adis16475_chip_info[ADIS16467_2] }, - { .compatible = "adi,adis16467-3", - .data = &adis16475_chip_info[ADIS16467_3] }, - { .compatible = "adi,adis16500", - .data = &adis16475_chip_info[ADIS16500] }, - { .compatible = "adi,adis16505-1", - .data = &adis16475_chip_info[ADIS16505_1] }, - { .compatible = "adi,adis16505-2", - .data = &adis16475_chip_info[ADIS16505_2] }, - { .compatible = "adi,adis16505-3", - .data = &adis16475_chip_info[ADIS16505_3] }, - { .compatible = "adi,adis16507-1", - .data = &adis16475_chip_info[ADIS16507_1] }, - { .compatible = "adi,adis16507-2", - .data = &adis16475_chip_info[ADIS16507_2] }, - { .compatible = "adi,adis16507-3", - .data = &adis16475_chip_info[ADIS16507_3] }, - { }, -}; -MODULE_DEVICE_TABLE(of, adis16475_of_match); static int adis16475_probe(struct spi_device *spi) { @@ -1301,7 +1257,7 @@ static int adis16475_probe(struct spi_device *spi) st = iio_priv(indio_dev); - st->info = device_get_match_data(&spi->dev); + st->info = spi_get_device_match_data(spi); if (!st->info) return -EINVAL; @@ -1341,12 +1297,83 @@ static int adis16475_probe(struct spi_device *spi) return 0; } +static const struct of_device_id adis16475_of_match[] = { + { .compatible = "adi,adis16470", + .data = &adis16475_chip_info[ADIS16470] }, + { .compatible = "adi,adis16475-1", + .data = &adis16475_chip_info[ADIS16475_1] }, + { .compatible = "adi,adis16475-2", + .data = &adis16475_chip_info[ADIS16475_2] }, + { .compatible = "adi,adis16475-3", + .data = &adis16475_chip_info[ADIS16475_3] }, + { .compatible = "adi,adis16477-1", + .data = &adis16475_chip_info[ADIS16477_1] }, + { .compatible = "adi,adis16477-2", + .data = &adis16475_chip_info[ADIS16477_2] }, + { .compatible = "adi,adis16477-3", + .data = &adis16475_chip_info[ADIS16477_3] }, + { .compatible = "adi,adis16465-1", + .data = &adis16475_chip_info[ADIS16465_1] }, + { .compatible = "adi,adis16465-2", + .data = &adis16475_chip_info[ADIS16465_2] }, + { .compatible = "adi,adis16465-3", + .data = &adis16475_chip_info[ADIS16465_3] }, + { .compatible = "adi,adis16467-1", + .data = &adis16475_chip_info[ADIS16467_1] }, + { .compatible = "adi,adis16467-2", + .data = &adis16475_chip_info[ADIS16467_2] }, + { .compatible = "adi,adis16467-3", + .data = &adis16475_chip_info[ADIS16467_3] }, + { .compatible = "adi,adis16500", + .data = &adis16475_chip_info[ADIS16500] }, + { .compatible = "adi,adis16505-1", + .data = &adis16475_chip_info[ADIS16505_1] }, + { .compatible = "adi,adis16505-2", + .data = &adis16475_chip_info[ADIS16505_2] }, + { .compatible = "adi,adis16505-3", + .data = &adis16475_chip_info[ADIS16505_3] }, + { .compatible = "adi,adis16507-1", + .data = &adis16475_chip_info[ADIS16507_1] }, + { .compatible = "adi,adis16507-2", + .data = &adis16475_chip_info[ADIS16507_2] }, + { .compatible = "adi,adis16507-3", + .data = &adis16475_chip_info[ADIS16507_3] }, + { }, +}; +MODULE_DEVICE_TABLE(of, adis16475_of_match); + +static const struct spi_device_id adis16475_ids[] = { + { "adis16470", (kernel_ulong_t)&adis16475_chip_info[ADIS16470] }, + { "adis16475-1", (kernel_ulong_t)&adis16475_chip_info[ADIS16475_1] }, + { "adis16475-2", (kernel_ulong_t)&adis16475_chip_info[ADIS16475_2] }, + { "adis16475-3", (kernel_ulong_t)&adis16475_chip_info[ADIS16475_3] }, + { "adis16477-1", (kernel_ulong_t)&adis16475_chip_info[ADIS16477_1] }, + { "adis16477-2", (kernel_ulong_t)&adis16475_chip_info[ADIS16477_2] }, + { "adis16477-3", (kernel_ulong_t)&adis16475_chip_info[ADIS16477_3] }, + { "adis16465-1", (kernel_ulong_t)&adis16475_chip_info[ADIS16465_1] }, + { "adis16465-2", (kernel_ulong_t)&adis16475_chip_info[ADIS16465_2] }, + { "adis16465-3", (kernel_ulong_t)&adis16475_chip_info[ADIS16465_3] }, + { "adis16467-1", (kernel_ulong_t)&adis16475_chip_info[ADIS16467_1] }, + { "adis16467-2", (kernel_ulong_t)&adis16475_chip_info[ADIS16467_2] }, + { "adis16467-3", (kernel_ulong_t)&adis16475_chip_info[ADIS16467_3] }, + { "adis16500", (kernel_ulong_t)&adis16475_chip_info[ADIS16500] }, + { "adis16505-1", (kernel_ulong_t)&adis16475_chip_info[ADIS16505_1] }, + { "adis16505-2", (kernel_ulong_t)&adis16475_chip_info[ADIS16505_2] }, + { "adis16505-3", (kernel_ulong_t)&adis16475_chip_info[ADIS16505_3] }, + { "adis16507-1", (kernel_ulong_t)&adis16475_chip_info[ADIS16507_1] }, + { "adis16507-2", (kernel_ulong_t)&adis16475_chip_info[ADIS16507_2] }, + { "adis16507-3", (kernel_ulong_t)&adis16475_chip_info[ADIS16507_3] }, + { } +}; +MODULE_DEVICE_TABLE(spi, adis16475_ids); + static struct spi_driver adis16475_driver = { .driver = { .name = "adis16475", .of_match_table = adis16475_of_match, }, .probe = adis16475_probe, + .id_table = adis16475_ids, }; module_spi_driver(adis16475_driver); From 7116fa37b2f20f871c59e5741f7978260feabdf1 Mon Sep 17 00:00:00 2001 From: Wadim Egorov Date: Mon, 25 Sep 2023 15:44:27 +0200 Subject: [PATCH 1281/1397] iio: adc: ti_am335x_adc: Fix return value check of tiadc_request_dma() commit 60576e84c187043cef11f11d015249e71151d35a upstream. Fix wrong handling of a DMA request where the probing only failed if -EPROPE_DEFER was returned. Instead, let us fail if a non -ENODEV value is returned. This makes DMAs explicitly optional. Even if the DMA request is unsuccessfully, the ADC can still work properly. We do also handle the defer probe case by making use of dev_err_probe(). Fixes: f438b9da75eb ("drivers: iio: ti_am335x_adc: add dma support") Signed-off-by: Wadim Egorov Reviewed-by: Bhavya Kapoor Link: https://lore.kernel.org/r/20230925134427.214556-1-w.egorov@phytec.de Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/ti_am335x_adc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iio/adc/ti_am335x_adc.c b/drivers/iio/adc/ti_am335x_adc.c index 8db7a01cb5fb..5f8795986995 100644 --- a/drivers/iio/adc/ti_am335x_adc.c +++ b/drivers/iio/adc/ti_am335x_adc.c @@ -670,8 +670,10 @@ static int tiadc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, indio_dev); err = tiadc_request_dma(pdev, adc_dev); - if (err && err == -EPROBE_DEFER) + if (err && err != -ENODEV) { + dev_err_probe(&pdev->dev, err, "DMA request failed\n"); goto err_dma; + } return 0; From 55efc549392e21a066a10a2fae93e9446e3e0e59 Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Tue, 21 Nov 2023 06:48:39 +0100 Subject: [PATCH 1282/1397] iio: tmag5273: fix temperature offset commit 3b8157ec4573e304a29b7bced627e144dbc3dfdb upstream. The current offset has the scale already applied to it. The ABI documentation defines the offset parameter as "offset to be added to [Y]_raw prior to scaling by [Y]_scale in order to obtain value in the units as specified in [Y]_raw documentation" The right value is obtained at 0 degrees Celsius by the formula provided in the datasheet: T = Tsens_t0 + (Tadc_t - Tadc_t0) / Tadc_res where: T = 0 degrees Celsius Tsens_t0 (reference temperature) = 25 degrees Celsius Tadc_t0 (16-bit format for Tsens_t0) = 17508 Tadc_res = 60.1 LSB/degree Celsius The resulting offset is 16005.5, which has been truncated to 16005 to provide an integer value with a precision loss smaller than the 1-LSB measurement precision. Fix the offset to apply its value prior to scaling. Signed-off-by: Javier Carrasco Link: https://lore.kernel.org/r/9879beec-05fc-4fc6-af62-d771e238954e@wolfvision.net Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/magnetometer/tmag5273.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/magnetometer/tmag5273.c b/drivers/iio/magnetometer/tmag5273.c index c5e5c4ad681e..e8c4ca142d21 100644 --- a/drivers/iio/magnetometer/tmag5273.c +++ b/drivers/iio/magnetometer/tmag5273.c @@ -356,7 +356,7 @@ static int tmag5273_read_raw(struct iio_dev *indio_dev, case IIO_CHAN_INFO_OFFSET: switch (chan->type) { case IIO_TEMP: - *val = -266314; + *val = -16005; return IIO_VAL_INT; default: return -EINVAL; From ae0faa924d07392a3c6b085df509080a3ada0e1a Mon Sep 17 00:00:00 2001 From: David Lechner Date: Tue, 31 Oct 2023 16:05:19 -0500 Subject: [PATCH 1283/1397] iio: triggered-buffer: prevent possible freeing of wrong buffer commit bce61476dc82f114e24e9c2e11fb064781ec563c upstream. Commit ee708e6baacd ("iio: buffer: introduce support for attaching more IIO buffers") introduced support for multiple buffers per indio_dev but left indio_dev->buffer for a few legacy use cases. In the case of the triggered buffer, iio_triggered_buffer_cleanup() still assumes that indio_dev->buffer points to the buffer allocated by iio_triggered_buffer_setup_ext(). However, since iio_triggered_buffer_setup_ext() now calls iio_device_attach_buffer() to attach the buffer, indio_dev->buffer will only point to the buffer allocated by iio_device_attach_buffer() if it the first buffer attached. This adds a check to make sure that no other buffer has been attached yet to ensure that indio_dev->buffer will be assigned when iio_device_attach_buffer() is called. As per discussion in the review thread, we may want to deal with multiple triggers per device, but this is a fix for the issue in the meantime and any such support would be unlikely to be suitable for a backport. Fixes: ee708e6baacd ("iio: buffer: introduce support for attaching more IIO buffers") Signed-off-by: David Lechner Acked-by: Nuno Sa Link: https://lore.kernel.org/r/20231031210521.1661552-1-dlechner@baylibre.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/buffer/industrialio-triggered-buffer.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/iio/buffer/industrialio-triggered-buffer.c b/drivers/iio/buffer/industrialio-triggered-buffer.c index c7671b1f5ead..c06515987e7a 100644 --- a/drivers/iio/buffer/industrialio-triggered-buffer.c +++ b/drivers/iio/buffer/industrialio-triggered-buffer.c @@ -46,6 +46,16 @@ int iio_triggered_buffer_setup_ext(struct iio_dev *indio_dev, struct iio_buffer *buffer; int ret; + /* + * iio_triggered_buffer_cleanup() assumes that the buffer allocated here + * is assigned to indio_dev->buffer but this is only the case if this + * function is the first caller to iio_device_attach_buffer(). If + * indio_dev->buffer is already set then we can't proceed otherwise the + * cleanup function will try to free a buffer that was not allocated here. + */ + if (indio_dev->buffer) + return -EADDRINUSE; + buffer = iio_kfifo_allocate(); if (!buffer) { ret = -ENOMEM; From e8e214d0bf70442083722f33ea3ca29c6b33238d Mon Sep 17 00:00:00 2001 From: Jeremie Knuesel Date: Sun, 17 Dec 2023 12:22:43 +0100 Subject: [PATCH 1284/1397] ALSA: usb-audio: Increase delay in MOTU M quirk commit 48d6b91798a6694fdd6edb62799754b9d3fe0792 upstream. Increase the quirk delay from 2 seconds to 4 seconds. This reflects a change in the Windows driver in which the delay was increased to about 3.7 seconds. The larger delay fixes an issue where the device fails to work unless it was powered up early during boot. Also clarify in the quirk comment that the quirk is only applied to older devices (USB ID 07fd:0008). Signed-off-by: Jeremie Knuesel Suggested-by: Alexander Tsoy Cc: Link: https://bugzilla.kernel.org/show_bug.cgi?id=211975 Link: https://lore.kernel.org/r/20231217112243.33409-1-knuesel@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index ab2b938502eb..07cc6a201579 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1387,7 +1387,7 @@ static int snd_usb_motu_microbookii_boot_quirk(struct usb_device *dev) static int snd_usb_motu_m_series_boot_quirk(struct usb_device *dev) { - msleep(2000); + msleep(4000); return 0; } @@ -1630,7 +1630,7 @@ int snd_usb_apply_boot_quirk_once(struct usb_device *dev, unsigned int id) { switch (id) { - case USB_ID(0x07fd, 0x0008): /* MOTU M Series */ + case USB_ID(0x07fd, 0x0008): /* MOTU M Series, 1st hardware version */ return snd_usb_motu_m_series_boot_quirk(dev); } From 1937e40875c8cb34dd59186ecd6de79146bc0f73 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 12 Dec 2023 15:50:35 +0200 Subject: [PATCH 1285/1397] ARM: dts: Fix occasional boot hang for am3 usb commit 9b6a51aab5f5f9f71d2fa16e8b4d530e1643dfcb upstream. With subtle timings changes, we can now sometimes get an external abort on non-linefetch error booting am3 devices at sysc_reset(). This is because of a missing reset delay needed for the usb target module. Looks like we never enabled the delay earlier for am3, although a similar issue was seen earlier with a similar usb setup for dm814x as described in commit ebf244148092 ("ARM: OMAP2+: Use srst_udelay for USB on dm814x"). Cc: stable@vger.kernel.org Fixes: 0782e8572ce4 ("ARM: dts: Probe am335x musb with ti-sysc") Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/ti/omap/am33xx.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/ti/omap/am33xx.dtsi b/arch/arm/boot/dts/ti/omap/am33xx.dtsi index 1a2cd5baf402..5b9e01a8aa5d 100644 --- a/arch/arm/boot/dts/ti/omap/am33xx.dtsi +++ b/arch/arm/boot/dts/ti/omap/am33xx.dtsi @@ -359,6 +359,7 @@ , , ; + ti,sysc-delay-us = <2>; clocks = <&l3s_clkctrl AM3_L3S_USB_OTG_HS_CLKCTRL 0>; clock-names = "fck"; #address-cells = <1>; From af60d63b86a40b0ebc3e502d7b739d4122918264 Mon Sep 17 00:00:00 2001 From: Tasos Sahanidis Date: Thu, 7 Dec 2023 15:44:41 +0200 Subject: [PATCH 1286/1397] usb-storage: Add quirk for incorrect WP on Kingston DT Ultimate 3.0 G3 commit 772685c14743ad565bb271041ad3c262298cd6fc upstream. This flash drive reports write protect during the first mode sense. In the past this was not an issue as the kernel called revalidate twice, thus asking the device for its write protect status twice, with write protect being disabled in the second mode sense. However, since commit 1e029397d12f ("scsi: sd: Reorganize DIF/DIX code to avoid calling revalidate twice") that is no longer the case, thus the device shows up read only. [490891.289495] sd 12:0:0:0: [sdl] Write Protect is on [490891.289497] sd 12:0:0:0: [sdl] Mode Sense: 2b 00 80 08 This does not appear to be a timing issue, as enabling the usbcore quirk USB_QUIRK_DELAY_INIT has no effect on write protect. Fixes: 1e029397d12f ("scsi: sd: Reorganize DIF/DIX code to avoid calling revalidate twice") Cc: stable Signed-off-by: Tasos Sahanidis Acked-by: Alan Stern Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20231207134441.298131-1-tasos@tasossah.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_devs.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index 20dcbccb290b..fd68204374f2 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -1305,6 +1305,17 @@ UNUSUAL_DEV( 0x090c, 0x6000, 0x0100, 0x0100, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_INITIAL_READ10 ), +/* + * Patch by Tasos Sahanidis + * This flash drive always shows up with write protect enabled + * during the first mode sense. + */ +UNUSUAL_DEV(0x0951, 0x1697, 0x0100, 0x0100, + "Kingston", + "DT Ultimate G3", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_NO_WP_DETECT), + /* * This Pentax still camera is not conformant * to the USB storage specification: - From e4006c5a5c0d0ea4fbb31f0cc8b9ef5580a21217 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 8 Dec 2023 08:50:04 +0100 Subject: [PATCH 1287/1397] wifi: mt76: fix crash with WED rx support enabled commit cd607f2cbbbec90682b2f6d6b85e1525d0f43b19 upstream. If WED rx is enabled, rx buffers are added to a buffer pool that can be filled from multiple page pools. Because buffers freed from rx poll are not guaranteed to belong to the processed queue's page pool, lockless caching must not be used in this case. Cc: stable@vger.kernel.org Fixes: 2f5c3c77fc9b ("wifi: mt76: switch to page_pool allocator") Signed-off-by: Felix Fietkau Acked-by: Lorenzo Bianconi Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20231208075004.69843-1-nbd@nbd.name Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mediatek/mt76/dma.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c index 6ca7b494c2c2..cd0486597064 100644 --- a/drivers/net/wireless/mediatek/mt76/dma.c +++ b/drivers/net/wireless/mediatek/mt76/dma.c @@ -776,7 +776,7 @@ mt76_dma_rx_reset(struct mt76_dev *dev, enum mt76_rxq_id qid) static void mt76_add_fragment(struct mt76_dev *dev, struct mt76_queue *q, void *data, - int len, bool more, u32 info) + int len, bool more, u32 info, bool allow_direct) { struct sk_buff *skb = q->rx_head; struct skb_shared_info *shinfo = skb_shinfo(skb); @@ -788,7 +788,7 @@ mt76_add_fragment(struct mt76_dev *dev, struct mt76_queue *q, void *data, skb_add_rx_frag(skb, nr_frags, page, offset, len, q->buf_size); } else { - mt76_put_page_pool_buf(data, true); + mt76_put_page_pool_buf(data, allow_direct); } if (more) @@ -808,6 +808,7 @@ mt76_dma_rx_process(struct mt76_dev *dev, struct mt76_queue *q, int budget) struct sk_buff *skb; unsigned char *data; bool check_ddone = false; + bool allow_direct = !mt76_queue_is_wed_rx(q); bool more; if (IS_ENABLED(CONFIG_NET_MEDIATEK_SOC_WED) && @@ -848,7 +849,8 @@ mt76_dma_rx_process(struct mt76_dev *dev, struct mt76_queue *q, int budget) } if (q->rx_head) { - mt76_add_fragment(dev, q, data, len, more, info); + mt76_add_fragment(dev, q, data, len, more, info, + allow_direct); continue; } @@ -877,7 +879,7 @@ mt76_dma_rx_process(struct mt76_dev *dev, struct mt76_queue *q, int budget) continue; free_frag: - mt76_put_page_pool_buf(data, true); + mt76_put_page_pool_buf(data, allow_direct); } mt76_dma_rx_fill(dev, q, true); From 018f336f79fd7660a553cb1020760850047fc15f Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Thu, 7 Dec 2023 21:20:50 +0800 Subject: [PATCH 1288/1397] wifi: cfg80211: Add my certificate commit fb768d3b13ffa325b7e84480d488ac799c9d2cd7 upstream. As announced [1][2], I have taken over maintainership of the wireless-regdb project. Add my certificate so that newer releases are valid to the kernel. Seth's certificate should be kept around for awhile, at least until a few new releases by me happen. This should also be applied to stable trees so that stable kernels can utilize newly released database binaries. [1] https://lore.kernel.org/linux-wireless/CAGb2v657baNMPKU3QADijx7hZa=GUcSv2LEDdn6N=QQaFX8r-g@mail.gmail.com/ [2] https://lore.kernel.org/linux-wireless/ZWmRR5ul7EDfxCan@wens.tw/ Cc: stable@vger.kernel.org Signed-off-by: Chen-Yu Tsai Acked-by: Seth Forshee Link: https://msgid.link/ZXHGsqs34qZyzZng@wens.tw Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/certs/wens.hex | 87 +++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 net/wireless/certs/wens.hex diff --git a/net/wireless/certs/wens.hex b/net/wireless/certs/wens.hex new file mode 100644 index 000000000000..ccd5b5dc3360 --- /dev/null +++ b/net/wireless/certs/wens.hex @@ -0,0 +1,87 @@ +/* Chen-Yu Tsai's regdb certificate */ +0x30, 0x82, 0x02, 0xa7, 0x30, 0x82, 0x01, 0x8f, +0x02, 0x14, 0x61, 0xc0, 0x38, 0x65, 0x1a, 0xab, +0xdc, 0xf9, 0x4b, 0xd0, 0xac, 0x7f, 0xf0, 0x6c, +0x72, 0x48, 0xdb, 0x18, 0xc6, 0x00, 0x30, 0x0d, +0x06, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, +0x01, 0x01, 0x0b, 0x05, 0x00, 0x30, 0x0f, 0x31, +0x0d, 0x30, 0x0b, 0x06, 0x03, 0x55, 0x04, 0x03, +0x0c, 0x04, 0x77, 0x65, 0x6e, 0x73, 0x30, 0x20, +0x17, 0x0d, 0x32, 0x33, 0x31, 0x32, 0x30, 0x31, +0x30, 0x37, 0x34, 0x31, 0x31, 0x34, 0x5a, 0x18, +0x0f, 0x32, 0x31, 0x32, 0x33, 0x31, 0x31, 0x30, +0x37, 0x30, 0x37, 0x34, 0x31, 0x31, 0x34, 0x5a, +0x30, 0x0f, 0x31, 0x0d, 0x30, 0x0b, 0x06, 0x03, +0x55, 0x04, 0x03, 0x0c, 0x04, 0x77, 0x65, 0x6e, +0x73, 0x30, 0x82, 0x01, 0x22, 0x30, 0x0d, 0x06, +0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, +0x01, 0x01, 0x05, 0x00, 0x03, 0x82, 0x01, 0x0f, +0x00, 0x30, 0x82, 0x01, 0x0a, 0x02, 0x82, 0x01, +0x01, 0x00, 0xa9, 0x7a, 0x2c, 0x78, 0x4d, 0xa7, +0x19, 0x2d, 0x32, 0x52, 0xa0, 0x2e, 0x6c, 0xef, +0x88, 0x7f, 0x15, 0xc5, 0xb6, 0x69, 0x54, 0x16, +0x43, 0x14, 0x79, 0x53, 0xb7, 0xae, 0x88, 0xfe, +0xc0, 0xb7, 0x5d, 0x47, 0x8e, 0x1a, 0xe1, 0xef, +0xb3, 0x90, 0x86, 0xda, 0xd3, 0x64, 0x81, 0x1f, +0xce, 0x5d, 0x9e, 0x4b, 0x6e, 0x58, 0x02, 0x3e, +0xb2, 0x6f, 0x5e, 0x42, 0x47, 0x41, 0xf4, 0x2c, +0xb8, 0xa8, 0xd4, 0xaa, 0xc0, 0x0e, 0xe6, 0x48, +0xf0, 0xa8, 0xce, 0xcb, 0x08, 0xae, 0x37, 0xaf, +0xf6, 0x40, 0x39, 0xcb, 0x55, 0x6f, 0x5b, 0x4f, +0x85, 0x34, 0xe6, 0x69, 0x10, 0x50, 0x72, 0x5e, +0x4e, 0x9d, 0x4c, 0xba, 0x38, 0x36, 0x0d, 0xce, +0x73, 0x38, 0xd7, 0x27, 0x02, 0x2a, 0x79, 0x03, +0xe1, 0xac, 0xcf, 0xb0, 0x27, 0x85, 0x86, 0x93, +0x17, 0xab, 0xec, 0x42, 0x77, 0x37, 0x65, 0x8a, +0x44, 0xcb, 0xd6, 0x42, 0x93, 0x92, 0x13, 0xe3, +0x39, 0x45, 0xc5, 0x6e, 0x00, 0x4a, 0x7f, 0xcb, +0x42, 0x17, 0x2b, 0x25, 0x8c, 0xb8, 0x17, 0x3b, +0x15, 0x36, 0x59, 0xde, 0x42, 0xce, 0x21, 0xe6, +0xb6, 0xc7, 0x6e, 0x5e, 0x26, 0x1f, 0xf7, 0x8a, +0x57, 0x9e, 0xa5, 0x96, 0x72, 0xb7, 0x02, 0x32, +0xeb, 0x07, 0x2b, 0x73, 0xe2, 0x4f, 0x66, 0x58, +0x9a, 0xeb, 0x0f, 0x07, 0xb6, 0xab, 0x50, 0x8b, +0xc3, 0x8f, 0x17, 0xfa, 0x0a, 0x99, 0xc2, 0x16, +0x25, 0xbf, 0x2d, 0x6b, 0x1a, 0xaa, 0xe6, 0x3e, +0x5f, 0xeb, 0x6d, 0x9b, 0x5d, 0x4d, 0x42, 0x83, +0x2d, 0x39, 0xb8, 0xc9, 0xac, 0xdb, 0x3a, 0x91, +0x50, 0xdf, 0xbb, 0xb1, 0x76, 0x6d, 0x15, 0x73, +0xfd, 0xc6, 0xe6, 0x6b, 0x71, 0x9e, 0x67, 0x36, +0x22, 0x83, 0x79, 0xb1, 0xd6, 0xb8, 0x84, 0x52, +0xaf, 0x96, 0x5b, 0xc3, 0x63, 0x02, 0x4e, 0x78, +0x70, 0x57, 0x02, 0x03, 0x01, 0x00, 0x01, 0x30, +0x0d, 0x06, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, +0x0d, 0x01, 0x01, 0x0b, 0x05, 0x00, 0x03, 0x82, +0x01, 0x01, 0x00, 0x24, 0x28, 0xee, 0x22, 0x74, +0x7f, 0x7c, 0xfa, 0x6c, 0x1f, 0xb3, 0x18, 0xd1, +0xc2, 0x3d, 0x7d, 0x29, 0x42, 0x88, 0xad, 0x82, +0xa5, 0xb1, 0x8a, 0x05, 0xd0, 0xec, 0x5c, 0x91, +0x20, 0xf6, 0x82, 0xfd, 0xd5, 0x67, 0x60, 0x5f, +0x31, 0xf5, 0xbd, 0x88, 0x91, 0x70, 0xbd, 0xb8, +0xb9, 0x8c, 0x88, 0xfe, 0x53, 0xc9, 0x54, 0x9b, +0x43, 0xc4, 0x7a, 0x43, 0x74, 0x6b, 0xdd, 0xb0, +0xb1, 0x3b, 0x33, 0x45, 0x46, 0x78, 0xa3, 0x1c, +0xef, 0x54, 0x68, 0xf7, 0x85, 0x9c, 0xe4, 0x51, +0x6f, 0x06, 0xaf, 0x81, 0xdb, 0x2a, 0x7b, 0x7b, +0x6f, 0xa8, 0x9c, 0x67, 0xd8, 0xcb, 0xc9, 0x91, +0x40, 0x00, 0xae, 0xd9, 0xa1, 0x9f, 0xdd, 0xa6, +0x43, 0x0e, 0x28, 0x7b, 0xaa, 0x1b, 0xe9, 0x84, +0xdb, 0x76, 0x64, 0x42, 0x70, 0xc9, 0xc0, 0xeb, +0xae, 0x84, 0x11, 0x16, 0x68, 0x4e, 0x84, 0x9e, +0x7e, 0x92, 0x36, 0xee, 0x1c, 0x3b, 0x08, 0x63, +0xeb, 0x79, 0x84, 0x15, 0x08, 0x9d, 0xaf, 0xc8, +0x9a, 0xc7, 0x34, 0xd3, 0x94, 0x4b, 0xd1, 0x28, +0x97, 0xbe, 0xd1, 0x45, 0x75, 0xdc, 0x35, 0x62, +0xac, 0x1d, 0x1f, 0xb7, 0xb7, 0x15, 0x87, 0xc8, +0x98, 0xc0, 0x24, 0x31, 0x56, 0x8d, 0xed, 0xdb, +0x06, 0xc6, 0x46, 0xbf, 0x4b, 0x6d, 0xa6, 0xd5, +0xab, 0xcc, 0x60, 0xfc, 0xe5, 0x37, 0xb6, 0x53, +0x7d, 0x58, 0x95, 0xa9, 0x56, 0xc7, 0xf7, 0xee, +0xc3, 0xa0, 0x76, 0xf7, 0x65, 0x4d, 0x53, 0xfa, +0xff, 0x5f, 0x76, 0x33, 0x5a, 0x08, 0xfa, 0x86, +0x92, 0x5a, 0x13, 0xfa, 0x1a, 0xfc, 0xf2, 0x1b, +0x8c, 0x7f, 0x42, 0x6d, 0xb7, 0x7e, 0xb7, 0xb4, +0xf0, 0xc7, 0x83, 0xbb, 0xa2, 0x81, 0x03, 0x2d, +0xd4, 0x2a, 0x63, 0x3f, 0xf7, 0x31, 0x2e, 0x40, +0x33, 0x5c, 0x46, 0xbc, 0x9b, 0xc1, 0x05, 0xa5, +0x45, 0x4e, 0xc3 From 4ccca0017c4e4fafb2378eba73407e722c6a0c42 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 14 Dec 2023 09:08:16 +0100 Subject: [PATCH 1289/1397] wifi: cfg80211: fix certs build to not depend on file order commit 3c2a8ebe3fe66a5f77d4c164a0bea8e2ff37b455 upstream. The file for the new certificate (Chen-Yu Tsai's) didn't end with a comma, so depending on the file order in the build rule, we'd end up with invalid C when concatenating the (now two) certificates. Fix that. Cc: stable@vger.kernel.org Reported-by: Biju Das Reported-by: Naresh Kamboju Fixes: fb768d3b13ff ("wifi: cfg80211: Add my certificate") Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/certs/wens.hex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/certs/wens.hex b/net/wireless/certs/wens.hex index ccd5b5dc3360..0d50369bede9 100644 --- a/net/wireless/certs/wens.hex +++ b/net/wireless/certs/wens.hex @@ -84,4 +84,4 @@ 0xf0, 0xc7, 0x83, 0xbb, 0xa2, 0x81, 0x03, 0x2d, 0xd4, 0x2a, 0x63, 0x3f, 0xf7, 0x31, 0x2e, 0x40, 0x33, 0x5c, 0x46, 0xbc, 0x9b, 0xc1, 0x05, 0xa5, -0x45, 0x4e, 0xc3 +0x45, 0x4e, 0xc3, From 66c131518213bacd3d6044421cd0a1504bedf1fb Mon Sep 17 00:00:00 2001 From: Mark Glover Date: Wed, 20 Dec 2023 13:57:40 +0000 Subject: [PATCH 1290/1397] USB: serial: ftdi_sio: update Actisense PIDs constant names commit 513d88a88e0203188a38f4647dd08170aebd85df upstream. Update the constant names for unused USB PIDs (product identifiers) to reflect the new products now using the PIDs. Signed-off-by: Mark Glover Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ftdi_sio.c | 6 +++--- drivers/usb/serial/ftdi_sio_ids.h | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 1bf23611be12..13a56783830d 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1033,9 +1033,9 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(FTDI_VID, ACTISENSE_USG_PID) }, { USB_DEVICE(FTDI_VID, ACTISENSE_NGT_PID) }, { USB_DEVICE(FTDI_VID, ACTISENSE_NGW_PID) }, - { USB_DEVICE(FTDI_VID, ACTISENSE_D9AC_PID) }, - { USB_DEVICE(FTDI_VID, ACTISENSE_D9AD_PID) }, - { USB_DEVICE(FTDI_VID, ACTISENSE_D9AE_PID) }, + { USB_DEVICE(FTDI_VID, ACTISENSE_UID_PID) }, + { USB_DEVICE(FTDI_VID, ACTISENSE_USA_PID) }, + { USB_DEVICE(FTDI_VID, ACTISENSE_NGX_PID) }, { USB_DEVICE(FTDI_VID, ACTISENSE_D9AF_PID) }, { USB_DEVICE(FTDI_VID, CHETCO_SEAGAUGE_PID) }, { USB_DEVICE(FTDI_VID, CHETCO_SEASWITCH_PID) }, diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index e2099445db70..21a2b5a25fc0 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -1568,9 +1568,9 @@ #define ACTISENSE_USG_PID 0xD9A9 /* USG USB Serial Adapter */ #define ACTISENSE_NGT_PID 0xD9AA /* NGT NMEA2000 Interface */ #define ACTISENSE_NGW_PID 0xD9AB /* NGW NMEA2000 Gateway */ -#define ACTISENSE_D9AC_PID 0xD9AC /* Actisense Reserved */ -#define ACTISENSE_D9AD_PID 0xD9AD /* Actisense Reserved */ -#define ACTISENSE_D9AE_PID 0xD9AE /* Actisense Reserved */ +#define ACTISENSE_UID_PID 0xD9AC /* USB Isolating Device */ +#define ACTISENSE_USA_PID 0xD9AD /* USB to Serial Adapter */ +#define ACTISENSE_NGX_PID 0xD9AE /* NGX NMEA2000 Gateway */ #define ACTISENSE_D9AF_PID 0xD9AF /* Actisense Reserved */ #define CHETCO_SEAGAUGE_PID 0xA548 /* SeaGauge USB Adapter */ #define CHETCO_SEASWITCH_PID 0xA549 /* SeaSwitch USB Adapter */ From 7dbe89b73f7fed986448beeab9e535150fa3f099 Mon Sep 17 00:00:00 2001 From: Alper Ak Date: Tue, 8 Aug 2023 13:51:58 +0300 Subject: [PATCH 1291/1397] USB: serial: option: add Quectel EG912Y module support commit 6d79d9434c69bb8ffa8a631050eb0ad6b83d3e90 upstream. Add Quectel EG912Y "DIAG, AT, MODEM" 0x6001: ECM / RNDIS + DIAG + AT + MODEM T: Bus=01 Lev=02 Prnt=02 Port=00 Cnt=01 Dev#= 3 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=6001 Rev= 3.18 S: Manufacturer=Android S: Product=Android S: SerialNumber=0000 C:* #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=500mA A: FirstIf#= 0 IfCount= 2 Cls=02(comm.) Sub=06 Prot=00 I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=06 Prot=00 Driver=cdc_ether E: Ad=87(I) Atr=03(Int.) MxPS= 64 Ivl=4096ms I: If#= 1 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether I:* If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=0c(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=0b(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=89(I) Atr=03(Int.) MxPS= 64 Ivl=4096ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=0f(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=88(I) Atr=03(Int.) MxPS= 64 Ivl=4096ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=0a(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Alper Ak Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 4dffcfefd62d..bfa55a7db9c4 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -272,6 +272,7 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_RM500Q 0x0800 #define QUECTEL_PRODUCT_RM520N 0x0801 #define QUECTEL_PRODUCT_EC200U 0x0901 +#define QUECTEL_PRODUCT_EG912Y 0x6001 #define QUECTEL_PRODUCT_EC200S_CN 0x6002 #define QUECTEL_PRODUCT_EC200A 0x6005 #define QUECTEL_PRODUCT_EM061K_LWW 0x6008 @@ -1244,6 +1245,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200U, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200S_CN, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200T, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EG912Y, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500K, 0xff, 0x00, 0x00) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) }, From 3cb3868ec7bd0f03b4530fc8fca2c2ebb88e9b7d Mon Sep 17 00:00:00 2001 From: Slark Xiao Date: Fri, 1 Dec 2023 10:09:50 +0800 Subject: [PATCH 1292/1397] USB: serial: option: add Foxconn T99W265 with new baseline commit 13fde9ac23ca8c6d1ac13cc9eefe1f1ac3ee30a4 upstream. This ID was added based on latest SDX12 code base line, and we made some changes with previous 0489:e0db. Test evidence as below: T: Bus=02 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 3 Spd=5000 MxCh= 0 D: Ver= 3.20 Cls=ef(misc ) Sub=02 Prot=01 MxPS= 9 #Cfgs= 2 P: Vendor=0489 ProdID=e0da Rev=05.04 S: Manufacturer=Qualcomm S: Product=Qualcomm Snapdragon X12 S: SerialNumber=2bda65fb C: #Ifs= 6 Cfg#= 2 Atr=a0 MxPwr=896mA I: If#=0x0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim I: If#=0x1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I: If#=0x2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option I: If#=0x3 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) I: If#=0x4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option I: If#=0x5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) 0&1: MBIM, 2: Modem, 3:GNSS, 4:Diag, 5:ADB Signed-off-by: Slark Xiao Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index bfa55a7db9c4..9fddd3b58baa 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -2244,6 +2244,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(0) | RSVD(1) | RSVD(6) }, { USB_DEVICE(0x0489, 0xe0b5), /* Foxconn T77W968 ESIM */ .driver_info = RSVD(0) | RSVD(1) | RSVD(6) }, + { USB_DEVICE_INTERFACE_CLASS(0x0489, 0xe0da, 0xff), /* Foxconn T99W265 MBIM variant */ + .driver_info = RSVD(3) | RSVD(5) }, { USB_DEVICE_INTERFACE_CLASS(0x0489, 0xe0db, 0xff), /* Foxconn T99W265 MBIM */ .driver_info = RSVD(3) }, { USB_DEVICE_INTERFACE_CLASS(0x0489, 0xe0ee, 0xff), /* Foxconn T99W368 MBIM */ From 27149e82d7cbe41b8a01e20528316c40126c2773 Mon Sep 17 00:00:00 2001 From: Reinhard Speyerer Date: Tue, 12 Dec 2023 18:15:38 +0100 Subject: [PATCH 1293/1397] USB: serial: option: add Quectel RM500Q R13 firmware support commit 06f22cd6635bdae7d73566fca9879b2026a08e00 upstream. Add support for Quectel RM500Q R13 firmware which uses Prot=40 for the NMEA port: T: Bus=02 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#= 8 Spd=5000 MxCh= 0 D: Ver= 3.20 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 9 #Cfgs= 1 P: Vendor=2c7c ProdID=0800 Rev= 4.14 S: Manufacturer=Quectel S: Product=RM500Q-AE S: SerialNumber=xxxxxxxx C:* #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=896mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=40 Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan E: Ad=88(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=8e(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=0f(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms Signed-off-by: Reinhard Speyerer Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 9fddd3b58baa..72390dbf0769 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1233,6 +1233,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, 0x0700, 0xff), /* BG95 */ .driver_info = RSVD(3) | ZLP }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0xff, 0x10), .driver_info = ZLP }, From ff02d91704fad1a58b145ff4625066f5726aa0ac Mon Sep 17 00:00:00 2001 From: Gergo Koteles Date: Fri, 15 Dec 2023 00:33:27 +0100 Subject: [PATCH 1294/1397] ALSA: hda/tas2781: select program 0, conf 0 by default commit ec1de5c214eb5a892fdb7c450748249d5e2840f5 upstream. Currently, cur_prog/cur_conf remains at the default value (-1), while program 0 has been loaded into the amplifiers. In the playback hook, tasdevice_tuning_switch tries to restore the cur_prog/cur_conf. In the runtime_resume/system_resume, tasdevice_prmg_load tries to load the cur_prog as well. Set cur_prog and cur_conf to 0 if available in the firmware. Fixes: 5be27f1e3ec9 ("ALSA: hda/tas2781: Add tas2781 HDA driver") CC: stable@vger.kernel.org Signed-off-by: Gergo Koteles Link: https://lore.kernel.org/r/038add0bdca1f979cc7abcce8f24cbcd3544084b.1702596646.git.soyer@irl.hu Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/tas2781_hda_i2c.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/hda/tas2781_hda_i2c.c b/sound/pci/hda/tas2781_hda_i2c.c index 63a90c7e8976..2fb1a7037c82 100644 --- a/sound/pci/hda/tas2781_hda_i2c.c +++ b/sound/pci/hda/tas2781_hda_i2c.c @@ -543,6 +543,10 @@ static void tasdev_fw_ready(const struct firmware *fmw, void *context) tas_priv->fw_state = TASDEVICE_DSP_FW_ALL_OK; tasdevice_prmg_load(tas_priv, 0); + if (tas_priv->fmw->nr_programs > 0) + tas_priv->cur_prog = 0; + if (tas_priv->fmw->nr_configurations > 0) + tas_priv->cur_conf = 0; /* If calibrated data occurs error, dsp will still works with default * calibrated data inside algo. From 23c2e6c093273a5c1af6a51c3d58f67a297a4d5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Villeret?= Date: Thu, 14 Dec 2023 21:36:32 +0100 Subject: [PATCH 1295/1397] ALSA: hda/realtek: Add quirk for ASUS ROG GV302XA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 02a460adfc4920d4da775fb59ab3e54036daef22 upstream. Asus ROG Flowx13 (GV302XA) seems require same patch as others asus products Signed-off-by: Clément Villeret Cc: Link: https://lore.kernel.org/r/0a27bf4b-3056-49ac-9651-ebd7f3e36328@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 2590879f0a84..343732c062c7 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9864,6 +9864,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1483, "ASUS GU603V", ALC285_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1493, "ASUS GV601V", ALC285_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1517, "Asus Zenbook UX31A", ALC269VB_FIXUP_ASUS_ZENBOOK_UX31A), + SND_PCI_QUIRK(0x1043, 0x1533, "ASUS GV302XA", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1573, "ASUS GZ301V", ALC285_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1662, "ASUS GV301QH", ALC294_FIXUP_ASUS_DUAL_SPK), SND_PCI_QUIRK(0x1043, 0x1663, "ASUS GU603ZV", ALC285_FIXUP_ASUS_HEADSET_MIC), From 9c7560192619eb9c7f3204ff15e03dce0a73d15d Mon Sep 17 00:00:00 2001 From: Gergo Koteles Date: Thu, 14 Dec 2023 23:04:44 +0100 Subject: [PATCH 1296/1397] ASoC: tas2781: check the validity of prm_no/cfg_no commit f32c80d34249e1cfb2e647ab3c8ef38a460c787f upstream. Add additional checks for program/config numbers to avoid loading from invalid addresses. If prm_no/cfg_no is negative, skip uploading program/config. The tas2781-hda driver caused a NULL pointer dereference after loading module, and before first runtime_suspend. the state was: tas_priv->cur_conf = -1; tas_priv->tasdevice[i].cur_conf = 0; program = &(tas_fmw->programs[-1]); BUG: kernel NULL pointer dereference, address: 0000000000000010 Call Trace: ? __die+0x23/0x70 ? page_fault_oops+0x171/0x4e0 ? vprintk_emit+0x175/0x2b0 ? exc_page_fault+0x7f/0x180 ? asm_exc_page_fault+0x26/0x30 ? tasdevice_load_block_kernel+0x21/0x310 [snd_soc_tas2781_fmwlib] tasdevice_select_tuningprm_cfg+0x268/0x3a0 [snd_soc_tas2781_fmwlib] tasdevice_tuning_switch+0x69/0x710 [snd_soc_tas2781_fmwlib] tas2781_hda_playback_hook+0xd4/0x110 [snd_hda_scodec_tas2781_i2c] Fixes: 915f5eadebd2 ("ASoC: tas2781: firmware lib") CC: Signed-off-by: Gergo Koteles Link: https://msgid.link/r/523780155bfdca9bc0acd39efc79ed039454818d.1702591356.git.soyer@irl.hu Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/tas2781-fmwlib.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/sound/soc/codecs/tas2781-fmwlib.c b/sound/soc/codecs/tas2781-fmwlib.c index eb55abae0d7b..1dfac9b2fca2 100644 --- a/sound/soc/codecs/tas2781-fmwlib.c +++ b/sound/soc/codecs/tas2781-fmwlib.c @@ -2219,11 +2219,11 @@ int tasdevice_select_tuningprm_cfg(void *context, int prm_no, goto out; } - conf = &(tas_fmw->configs[cfg_no]); for (i = 0, prog_status = 0; i < tas_priv->ndev; i++) { if (cfg_info[rca_conf_no]->active_dev & (1 << i)) { - if (tas_priv->tasdevice[i].cur_prog != prm_no - || tas_priv->force_fwload_status) { + if (prm_no >= 0 + && (tas_priv->tasdevice[i].cur_prog != prm_no + || tas_priv->force_fwload_status)) { tas_priv->tasdevice[i].cur_conf = -1; tas_priv->tasdevice[i].is_loading = true; prog_status++; @@ -2258,7 +2258,8 @@ int tasdevice_select_tuningprm_cfg(void *context, int prm_no, } for (i = 0, status = 0; i < tas_priv->ndev; i++) { - if (tas_priv->tasdevice[i].cur_conf != cfg_no + if (cfg_no >= 0 + && tas_priv->tasdevice[i].cur_conf != cfg_no && (cfg_info[rca_conf_no]->active_dev & (1 << i)) && (tas_priv->tasdevice[i].is_loaderr == false)) { status++; @@ -2268,6 +2269,7 @@ int tasdevice_select_tuningprm_cfg(void *context, int prm_no, } if (status) { + conf = &(tas_fmw->configs[cfg_no]); status = 0; tasdevice_load_data(tas_priv, &(conf->dev_data)); for (i = 0; i < tas_priv->ndev; i++) { @@ -2311,7 +2313,7 @@ int tasdevice_prmg_load(void *context, int prm_no) } for (i = 0, prog_status = 0; i < tas_priv->ndev; i++) { - if (tas_priv->tasdevice[i].cur_prog != prm_no) { + if (prm_no >= 0 && tas_priv->tasdevice[i].cur_prog != prm_no) { tas_priv->tasdevice[i].cur_conf = -1; tas_priv->tasdevice[i].is_loading = true; prog_status++; @@ -2356,7 +2358,7 @@ int tasdevice_prmg_calibdata_load(void *context, int prm_no) } for (i = 0, prog_status = 0; i < tas_priv->ndev; i++) { - if (tas_priv->tasdevice[i].cur_prog != prm_no) { + if (prm_no >= 0 && tas_priv->tasdevice[i].cur_prog != prm_no) { tas_priv->tasdevice[i].cur_conf = -1; tas_priv->tasdevice[i].is_loading = true; prog_status++; From 90d6a3974771395cebc8d97e8ec69d5444f65d5d Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 20 Nov 2023 10:04:39 -0500 Subject: [PATCH 1297/1397] Bluetooth: hci_event: Fix not checking if HCI_OP_INQUIRY has been sent commit 99e67d46e5ff3c7c901af6009edec72d3d363be8 upstream. Before setting HCI_INQUIRY bit check if HCI_OP_INQUIRY was really sent otherwise the controller maybe be generating invalid events or, more likely, it is a result of fuzzing tools attempting to test the right behavior of the stack when unexpected events are generated. Cc: stable@vger.kernel.org Link: https://bugzilla.kernel.org/show_bug.cgi?id=218151 Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hci_event.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 3661f8cdbab7..152a1834838d 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2305,7 +2305,8 @@ static void hci_cs_inquiry(struct hci_dev *hdev, __u8 status) return; } - set_bit(HCI_INQUIRY, &hdev->flags); + if (hci_sent_cmd_data(hdev, HCI_OP_INQUIRY)) + set_bit(HCI_INQUIRY, &hdev->flags); } static void hci_cs_create_conn(struct hci_dev *hdev, __u8 status) From 1d576c3a5af850bf11fbd103f9ba11aa6d6061fb Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Sat, 9 Dec 2023 05:55:18 -0500 Subject: [PATCH 1298/1397] Bluetooth: af_bluetooth: Fix Use-After-Free in bt_sock_recvmsg commit 2e07e8348ea454615e268222ae3fc240421be768 upstream. This can cause a race with bt_sock_ioctl() because bt_sock_recvmsg() gets the skb from sk->sk_receive_queue and then frees it without holding lock_sock. A use-after-free for a skb occurs with the following flow. ``` bt_sock_recvmsg() -> skb_recv_datagram() -> skb_free_datagram() bt_sock_ioctl() -> skb_peek() ``` Add lock_sock to bt_sock_recvmsg() to fix this issue. Cc: stable@vger.kernel.org Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Hyunwoo Kim Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/af_bluetooth.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 336a76165454..b93464ac3517 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -309,11 +309,14 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (flags & MSG_OOB) return -EOPNOTSUPP; + lock_sock(sk); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) { if (sk->sk_shutdown & RCV_SHUTDOWN) - return 0; + err = 0; + release_sock(sk); return err; } @@ -343,6 +346,8 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, skb_free_datagram(sk, skb); + release_sock(sk); + if (flags & MSG_TRUNC) copied = skblen; From 0974347ac10dd17b9278482d83c69605a178bb54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Danis?= Date: Fri, 8 Dec 2023 18:41:50 +0100 Subject: [PATCH 1299/1397] Bluetooth: L2CAP: Send reject on command corrupted request MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 78b99eb1faa7371bf9c534690f26a71b6996622d upstream. L2CAP/COS/CED/BI-02-C PTS test send a malformed L2CAP signaling packet with 2 commands in it (a connection request and an unknown command) and expect to get a connection response packet and a command reject packet. The second is currently not sent. Cc: stable@vger.kernel.org Signed-off-by: Frédéric Danis Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/l2cap_core.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 17ca13e8c044..baeebee41cd9 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -6492,6 +6492,14 @@ static inline void l2cap_le_sig_channel(struct l2cap_conn *conn, kfree_skb(skb); } +static inline void l2cap_sig_send_rej(struct l2cap_conn *conn, u16 ident) +{ + struct l2cap_cmd_rej_unk rej; + + rej.reason = cpu_to_le16(L2CAP_REJ_NOT_UNDERSTOOD); + l2cap_send_cmd(conn, ident, L2CAP_COMMAND_REJ, sizeof(rej), &rej); +} + static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb) { @@ -6517,23 +6525,24 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, if (len > skb->len || !cmd->ident) { BT_DBG("corrupted command"); + l2cap_sig_send_rej(conn, cmd->ident); break; } err = l2cap_bredr_sig_cmd(conn, cmd, len, skb->data); if (err) { - struct l2cap_cmd_rej_unk rej; - BT_ERR("Wrong link type (%d)", err); - - rej.reason = cpu_to_le16(L2CAP_REJ_NOT_UNDERSTOOD); - l2cap_send_cmd(conn, cmd->ident, L2CAP_COMMAND_REJ, - sizeof(rej), &rej); + l2cap_sig_send_rej(conn, cmd->ident); } skb_pull(skb, len); } + if (skb->len > 0) { + BT_DBG("corrupted command"); + l2cap_sig_send_rej(conn, 0); + } + drop: kfree_skb(skb); } From 865f1f43262cff2cabb07ac7c3b5ed89e283a0eb Mon Sep 17 00:00:00 2001 From: Xiao Yao Date: Tue, 12 Dec 2023 00:27:18 +0800 Subject: [PATCH 1300/1397] Bluetooth: MGMT/SMP: Fix address type when using SMP over BREDR/LE commit 59b047bc98084f8af2c41483e4d68a5adf2fa7f7 upstream. If two Bluetooth devices both support BR/EDR and BLE, and also support Secure Connections, then they only need to pair once. The LTK generated during the LE pairing process may be converted into a BR/EDR link key for BR/EDR transport, and conversely, a link key generated during the BR/EDR SSP pairing process can be converted into an LTK for LE transport. Hence, the link type of the link key and LTK is not fixed, they can be either an LE LINK or an ACL LINK. Currently, in the mgmt_new_irk/ltk/crsk/link_key functions, the link type is fixed, which could lead to incorrect address types being reported to the application layer. Therefore, it is necessary to add link_type/addr_type to the smp_irk/ltk/crsk and link_key, to ensure the generation of the correct address type. SMP over BREDR: Before Fix: > ACL Data RX: Handle 11 flags 0x02 dlen 12 BR/EDR SMP: Identity Address Information (0x09) len 7 Address: F8:7D:76:F2:12:F3 (OUI F8-7D-76) @ MGMT Event: New Identity Resolving Key (0x0018) plen 30 Random address: 00:00:00:00:00:00 (Non-Resolvable) LE Address: F8:7D:76:F2:12:F3 (OUI F8-7D-76) @ MGMT Event: New Long Term Key (0x000a) plen 37 LE Address: F8:7D:76:F2:12:F3 (OUI F8-7D-76) Key type: Authenticated key from P-256 (0x03) After Fix: > ACL Data RX: Handle 11 flags 0x02 dlen 12 BR/EDR SMP: Identity Address Information (0x09) len 7 Address: F8:7D:76:F2:12:F3 (OUI F8-7D-76) @ MGMT Event: New Identity Resolving Key (0x0018) plen 30 Random address: 00:00:00:00:00:00 (Non-Resolvable) BR/EDR Address: F8:7D:76:F2:12:F3 (OUI F8-7D-76) @ MGMT Event: New Long Term Key (0x000a) plen 37 BR/EDR Address: F8:7D:76:F2:12:F3 (OUI F8-7D-76) Key type: Authenticated key from P-256 (0x03) SMP over LE: Before Fix: @ MGMT Event: New Identity Resolving Key (0x0018) plen 30 Random address: 5F:5C:07:37:47:D5 (Resolvable) LE Address: F8:7D:76:F2:12:F3 (OUI F8-7D-76) @ MGMT Event: New Long Term Key (0x000a) plen 37 LE Address: F8:7D:76:F2:12:F3 (OUI F8-7D-76) Key type: Authenticated key from P-256 (0x03) @ MGMT Event: New Link Key (0x0009) plen 26 BR/EDR Address: F8:7D:76:F2:12:F3 (OUI F8-7D-76) Key type: Authenticated Combination key from P-256 (0x08) After Fix: @ MGMT Event: New Identity Resolving Key (0x0018) plen 30 Random address: 5E:03:1C:00:38:21 (Resolvable) LE Address: F8:7D:76:F2:12:F3 (OUI F8-7D-76) @ MGMT Event: New Long Term Key (0x000a) plen 37 LE Address: F8:7D:76:F2:12:F3 (OUI F8-7D-76) Key type: Authenticated key from P-256 (0x03) @ MGMT Event: New Link Key (0x0009) plen 26 Store hint: Yes (0x01) LE Address: F8:7D:76:F2:12:F3 (OUI F8-7D-76) Key type: Authenticated Combination key from P-256 (0x08) Cc: stable@vger.kernel.org Signed-off-by: Xiao Yao Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- include/net/bluetooth/hci_core.h | 5 +++++ net/bluetooth/mgmt.c | 25 ++++++++++++++++++------- net/bluetooth/smp.c | 7 +++++++ 3 files changed, 30 insertions(+), 7 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 22ce39a2aa7b..7a4be33bd07a 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -189,6 +189,7 @@ struct blocked_key { struct smp_csrk { bdaddr_t bdaddr; u8 bdaddr_type; + u8 link_type; u8 type; u8 val[16]; }; @@ -198,6 +199,7 @@ struct smp_ltk { struct rcu_head rcu; bdaddr_t bdaddr; u8 bdaddr_type; + u8 link_type; u8 authenticated; u8 type; u8 enc_size; @@ -212,6 +214,7 @@ struct smp_irk { bdaddr_t rpa; bdaddr_t bdaddr; u8 addr_type; + u8 link_type; u8 val[16]; }; @@ -219,6 +222,8 @@ struct link_key { struct list_head list; struct rcu_head rcu; bdaddr_t bdaddr; + u8 bdaddr_type; + u8 link_type; u8 type; u8 val[HCI_LINK_KEY_SIZE]; u8 pin_len; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index ba2e00646e8e..9dd815b6603f 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2897,7 +2897,8 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data, for (i = 0; i < key_count; i++) { struct mgmt_link_key_info *key = &cp->keys[i]; - if (key->addr.type != BDADDR_BREDR || key->type > 0x08) + /* Considering SMP over BREDR/LE, there is no need to check addr_type */ + if (key->type > 0x08) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, MGMT_STATUS_INVALID_PARAMS); @@ -7130,6 +7131,7 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data, for (i = 0; i < irk_count; i++) { struct mgmt_irk_info *irk = &cp->irks[i]; + u8 addr_type = le_addr_type(irk->addr.type); if (hci_is_blocked_key(hdev, HCI_BLOCKED_KEY_TYPE_IRK, @@ -7139,8 +7141,12 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data, continue; } + /* When using SMP over BR/EDR, the addr type should be set to BREDR */ + if (irk->addr.type == BDADDR_BREDR) + addr_type = BDADDR_BREDR; + hci_add_irk(hdev, &irk->addr.bdaddr, - le_addr_type(irk->addr.type), irk->val, + addr_type, irk->val, BDADDR_ANY); } @@ -7221,6 +7227,7 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, for (i = 0; i < key_count; i++) { struct mgmt_ltk_info *key = &cp->keys[i]; u8 type, authenticated; + u8 addr_type = le_addr_type(key->addr.type); if (hci_is_blocked_key(hdev, HCI_BLOCKED_KEY_TYPE_LTK, @@ -7255,8 +7262,12 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, continue; } + /* When using SMP over BR/EDR, the addr type should be set to BREDR */ + if (key->addr.type == BDADDR_BREDR) + addr_type = BDADDR_BREDR; + hci_add_ltk(hdev, &key->addr.bdaddr, - le_addr_type(key->addr.type), type, authenticated, + addr_type, type, authenticated, key->val, key->enc_size, key->ediv, key->rand); } @@ -9523,7 +9534,7 @@ void mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key, ev.store_hint = persistent; bacpy(&ev.key.addr.bdaddr, &key->bdaddr); - ev.key.addr.type = BDADDR_BREDR; + ev.key.addr.type = link_to_bdaddr(key->link_type, key->bdaddr_type); ev.key.type = key->type; memcpy(ev.key.val, key->val, HCI_LINK_KEY_SIZE); ev.key.pin_len = key->pin_len; @@ -9574,7 +9585,7 @@ void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent) ev.store_hint = persistent; bacpy(&ev.key.addr.bdaddr, &key->bdaddr); - ev.key.addr.type = link_to_bdaddr(LE_LINK, key->bdaddr_type); + ev.key.addr.type = link_to_bdaddr(key->link_type, key->bdaddr_type); ev.key.type = mgmt_ltk_type(key); ev.key.enc_size = key->enc_size; ev.key.ediv = key->ediv; @@ -9603,7 +9614,7 @@ void mgmt_new_irk(struct hci_dev *hdev, struct smp_irk *irk, bool persistent) bacpy(&ev.rpa, &irk->rpa); bacpy(&ev.irk.addr.bdaddr, &irk->bdaddr); - ev.irk.addr.type = link_to_bdaddr(LE_LINK, irk->addr_type); + ev.irk.addr.type = link_to_bdaddr(irk->link_type, irk->addr_type); memcpy(ev.irk.val, irk->val, sizeof(irk->val)); mgmt_event(MGMT_EV_NEW_IRK, hdev, &ev, sizeof(ev), NULL); @@ -9632,7 +9643,7 @@ void mgmt_new_csrk(struct hci_dev *hdev, struct smp_csrk *csrk, ev.store_hint = persistent; bacpy(&ev.key.addr.bdaddr, &csrk->bdaddr); - ev.key.addr.type = link_to_bdaddr(LE_LINK, csrk->bdaddr_type); + ev.key.addr.type = link_to_bdaddr(csrk->link_type, csrk->bdaddr_type); ev.key.type = csrk->type; memcpy(ev.key.val, csrk->val, sizeof(csrk->val)); diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index f1a9fc0012f0..37f95ea8c7db 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -1060,6 +1060,7 @@ static void smp_notify_keys(struct l2cap_conn *conn) } if (smp->remote_irk) { + smp->remote_irk->link_type = hcon->type; mgmt_new_irk(hdev, smp->remote_irk, persistent); /* Now that user space can be considered to know the @@ -1079,24 +1080,28 @@ static void smp_notify_keys(struct l2cap_conn *conn) } if (smp->csrk) { + smp->csrk->link_type = hcon->type; smp->csrk->bdaddr_type = hcon->dst_type; bacpy(&smp->csrk->bdaddr, &hcon->dst); mgmt_new_csrk(hdev, smp->csrk, persistent); } if (smp->responder_csrk) { + smp->responder_csrk->link_type = hcon->type; smp->responder_csrk->bdaddr_type = hcon->dst_type; bacpy(&smp->responder_csrk->bdaddr, &hcon->dst); mgmt_new_csrk(hdev, smp->responder_csrk, persistent); } if (smp->ltk) { + smp->ltk->link_type = hcon->type; smp->ltk->bdaddr_type = hcon->dst_type; bacpy(&smp->ltk->bdaddr, &hcon->dst); mgmt_new_ltk(hdev, smp->ltk, persistent); } if (smp->responder_ltk) { + smp->responder_ltk->link_type = hcon->type; smp->responder_ltk->bdaddr_type = hcon->dst_type; bacpy(&smp->responder_ltk->bdaddr, &hcon->dst); mgmt_new_ltk(hdev, smp->responder_ltk, persistent); @@ -1116,6 +1121,8 @@ static void smp_notify_keys(struct l2cap_conn *conn) key = hci_add_link_key(hdev, smp->conn->hcon, &hcon->dst, smp->link_key, type, 0, &persistent); if (key) { + key->link_type = hcon->type; + key->bdaddr_type = hcon->dst_type; mgmt_new_link_key(hdev, key, persistent); /* Don't keep debug keys around if the relevant From 34c032a72f97e05cd1388aaeb1b3b255ac3c9581 Mon Sep 17 00:00:00 2001 From: Alex Lu Date: Tue, 12 Dec 2023 10:30:34 +0800 Subject: [PATCH 1301/1397] Bluetooth: Add more enc key size check commit 04a342cc49a8522e99c9b3346371c329d841dcd2 upstream. When we are slave role and receives l2cap conn req when encryption has started, we should check the enc key size to avoid KNOB attack or BLUFFS attack. From SIG recommendation, implementations are advised to reject service-level connections on an encrypted baseband link with key strengths below 7 octets. A simple and clear way to achieve this is to place the enc key size check in hci_cc_read_enc_key_size() The btmon log below shows the case that lacks enc key size check. > HCI Event: Connect Request (0x04) plen 10 Address: BB:22:33:44:55:99 (OUI BB-22-33) Class: 0x480104 Major class: Computer (desktop, notebook, PDA, organizers) Minor class: Desktop workstation Capturing (Scanner, Microphone) Telephony (Cordless telephony, Modem, Headset) Link type: ACL (0x01) < HCI Command: Accept Connection Request (0x01|0x0009) plen 7 Address: BB:22:33:44:55:99 (OUI BB-22-33) Role: Peripheral (0x01) > HCI Event: Command Status (0x0f) plen 4 Accept Connection Request (0x01|0x0009) ncmd 2 Status: Success (0x00) > HCI Event: Connect Complete (0x03) plen 11 Status: Success (0x00) Handle: 1 Address: BB:22:33:44:55:99 (OUI BB-22-33) Link type: ACL (0x01) Encryption: Disabled (0x00) ... > HCI Event: Encryption Change (0x08) plen 4 Status: Success (0x00) Handle: 1 Address: BB:22:33:44:55:99 (OUI BB-22-33) Encryption: Enabled with E0 (0x01) < HCI Command: Read Encryption Key Size (0x05|0x0008) plen 2 Handle: 1 Address: BB:22:33:44:55:99 (OUI BB-22-33) > HCI Event: Command Complete (0x0e) plen 7 Read Encryption Key Size (0x05|0x0008) ncmd 2 Status: Success (0x00) Handle: 1 Address: BB:22:33:44:55:99 (OUI BB-22-33) Key size: 6 // We should check the enc key size ... > ACL Data RX: Handle 1 flags 0x02 dlen 12 L2CAP: Connection Request (0x02) ident 3 len 4 PSM: 25 (0x0019) Source CID: 64 < ACL Data TX: Handle 1 flags 0x00 dlen 16 L2CAP: Connection Response (0x03) ident 3 len 8 Destination CID: 64 Source CID: 64 Result: Connection pending (0x0001) Status: Authorization pending (0x0002) > HCI Event: Number of Completed Packets (0x13) plen 5 Num handles: 1 Handle: 1 Address: BB:22:33:44:55:99 (OUI BB-22-33) Count: 1 #35: len 16 (25 Kb/s) Latency: 5 msec (2-7 msec ~4 msec) < ACL Data TX: Handle 1 flags 0x00 dlen 16 L2CAP: Connection Response (0x03) ident 3 len 8 Destination CID: 64 Source CID: 64 Result: Connection successful (0x0000) Status: No further information available (0x0000) Cc: stable@vger.kernel.org Signed-off-by: Alex Lu Signed-off-by: Max Chou Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hci_event.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 152a1834838d..f7ebbbd30218 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -750,9 +750,23 @@ static u8 hci_cc_read_enc_key_size(struct hci_dev *hdev, void *data, } else { conn->enc_key_size = rp->key_size; status = 0; + + if (conn->enc_key_size < hdev->min_enc_key_size) { + /* As slave role, the conn->state has been set to + * BT_CONNECTED and l2cap conn req might not be received + * yet, at this moment the l2cap layer almost does + * nothing with the non-zero status. + * So we also clear encrypt related bits, and then the + * handler of l2cap conn req will get the right secure + * state at a later time. + */ + status = HCI_ERROR_AUTH_FAILURE; + clear_bit(HCI_CONN_ENCRYPT, &conn->flags); + clear_bit(HCI_CONN_AES_CCM, &conn->flags); + } } - hci_encrypt_cfm(conn, 0); + hci_encrypt_cfm(conn, status); done: hci_dev_unlock(hdev); From 847f8f529a13bd880aa271a76aa1c19408d03fa5 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 8 Dec 2023 13:36:02 +0100 Subject: [PATCH 1302/1397] usb: typec: ucsi: fix gpio-based orientation detection commit c994cb596bf7ef5928f06331c76f46e071b16f09 upstream. Fix the recently added connector sanity check which was off by one and prevented orientation notifications from being handled correctly for the second port when using GPIOs to determine orientation. Fixes: c6165ed2f425 ("usb: ucsi: glink: use the connector orientation GPIO to provide switch events") Cc: stable Cc: Neil Armstrong Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20231208123603.29957-1-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi_glink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/ucsi/ucsi_glink.c b/drivers/usb/typec/ucsi/ucsi_glink.c index db6e248f8208..4853141cd10c 100644 --- a/drivers/usb/typec/ucsi/ucsi_glink.c +++ b/drivers/usb/typec/ucsi/ucsi_glink.c @@ -228,7 +228,7 @@ static void pmic_glink_ucsi_notify(struct work_struct *work) con_num = UCSI_CCI_CONNECTOR(cci); if (con_num) { - if (con_num < PMIC_GLINK_MAX_PORTS && + if (con_num <= PMIC_GLINK_MAX_PORTS && ucsi->port_orientation[con_num - 1]) { int orientation = gpiod_get_value(ucsi->port_orientation[con_num - 1]); From ebd7bc419aebf1fbe1610be24722fd79f8a20ad1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 13 Dec 2023 16:22:43 +0300 Subject: [PATCH 1303/1397] usb: fotg210-hcd: delete an incorrect bounds test commit 7fbcd195e2b8cc952e4aeaeb50867b798040314c upstream. Here "temp" is the number of characters that we have written and "size" is the size of the buffer. The intent was clearly to say that if we have written to the end of the buffer then stop. However, for that to work the comparison should have been done on the original "size" value instead of the "size -= temp" value. Not only will that not trigger when we want to, but there is a small chance that it will trigger incorrectly before we want it to and we break from the loop slightly earlier than intended. This code was recently changed from using snprintf() to scnprintf(). With snprintf() we likely would have continued looping and passed a negative size parameter to snprintf(). This would have triggered an annoying WARN(). Now that we have converted to scnprintf() "size" will never drop below 1 and there is no real need for this test. We could change the condition to "if (temp <= 1) goto done;" but just deleting the test is cleanest. Fixes: 7d50195f6c50 ("usb: host: Faraday fotg210-hcd driver") Cc: stable Signed-off-by: Dan Carpenter Reviewed-by: Linus Walleij Reviewed-by: Lee Jones Link: https://lore.kernel.org/r/ZXmwIwHe35wGfgzu@suswa Signed-off-by: Greg Kroah-Hartman --- drivers/usb/fotg210/fotg210-hcd.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/usb/fotg210/fotg210-hcd.c b/drivers/usb/fotg210/fotg210-hcd.c index 929106c16b29..7bf810a0c98a 100644 --- a/drivers/usb/fotg210/fotg210-hcd.c +++ b/drivers/usb/fotg210/fotg210-hcd.c @@ -428,8 +428,6 @@ static void qh_lines(struct fotg210_hcd *fotg210, struct fotg210_qh *qh, temp = size; size -= temp; next += temp; - if (temp == size) - goto done; } temp = snprintf(next, size, "\n"); @@ -439,7 +437,6 @@ static void qh_lines(struct fotg210_hcd *fotg210, struct fotg210_qh *qh, size -= temp; next += temp; -done: *sizep = size; *nextp = next; } From d4ab5cfa098e8789e8db6bd7fbd82456b99d07d3 Mon Sep 17 00:00:00 2001 From: Jose Ignacio Tornos Martinez Date: Thu, 7 Dec 2023 18:50:07 +0100 Subject: [PATCH 1304/1397] net: usb: ax88179_178a: avoid failed operations when device is disconnected commit aef05e349bfd81c95adb4489639413fadbb74a83 upstream. When the device is disconnected we get the following messages showing failed operations: Nov 28 20:22:11 localhost kernel: usb 2-3: USB disconnect, device number 2 Nov 28 20:22:11 localhost kernel: ax88179_178a 2-3:1.0 enp2s0u3: unregister 'ax88179_178a' usb-0000:02:00.0-3, ASIX AX88179 USB 3.0 Gigabit Ethernet Nov 28 20:22:11 localhost kernel: ax88179_178a 2-3:1.0 enp2s0u3: Failed to read reg index 0x0002: -19 Nov 28 20:22:11 localhost kernel: ax88179_178a 2-3:1.0 enp2s0u3: Failed to write reg index 0x0002: -19 Nov 28 20:22:11 localhost kernel: ax88179_178a 2-3:1.0 enp2s0u3 (unregistered): Failed to write reg index 0x0002: -19 Nov 28 20:22:11 localhost kernel: ax88179_178a 2-3:1.0 enp2s0u3 (unregistered): Failed to write reg index 0x0001: -19 Nov 28 20:22:11 localhost kernel: ax88179_178a 2-3:1.0 enp2s0u3 (unregistered): Failed to write reg index 0x0002: -19 The reason is that although the device is detached, normal stop and unbind operations are commanded from the driver. These operations are not necessary in this situation, so avoid these logs when the device is detached if the result of the operation is -ENODEV and if the new flag informing about the disconnecting status is enabled. cc: Fixes: e2ca90c276e1f ("ax88179_178a: ASIX AX88179_178A USB 3.0/2.0 to gigabit ethernet adapter driver") Signed-off-by: Jose Ignacio Tornos Martinez Acked-by: Alan Stern Link: https://lore.kernel.org/r/20231207175007.263907-1-jtornosm@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/ax88179_178a.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index 4ea0e155bb0d..5a1bf42ce156 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -173,6 +173,7 @@ struct ax88179_data { u8 in_pm; u32 wol_supported; u32 wolopts; + u8 disconnecting; }; struct ax88179_int_data { @@ -208,6 +209,7 @@ static int __ax88179_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, { int ret; int (*fn)(struct usbnet *, u8, u8, u16, u16, void *, u16); + struct ax88179_data *ax179_data = dev->driver_priv; BUG_ON(!dev); @@ -219,7 +221,7 @@ static int __ax88179_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, ret = fn(dev, cmd, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, data, size); - if (unlikely(ret < 0)) + if (unlikely((ret < 0) && !(ret == -ENODEV && ax179_data->disconnecting))) netdev_warn(dev->net, "Failed to read reg index 0x%04x: %d\n", index, ret); @@ -231,6 +233,7 @@ static int __ax88179_write_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, { int ret; int (*fn)(struct usbnet *, u8, u8, u16, u16, const void *, u16); + struct ax88179_data *ax179_data = dev->driver_priv; BUG_ON(!dev); @@ -242,7 +245,7 @@ static int __ax88179_write_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, ret = fn(dev, cmd, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, data, size); - if (unlikely(ret < 0)) + if (unlikely((ret < 0) && !(ret == -ENODEV && ax179_data->disconnecting))) netdev_warn(dev->net, "Failed to write reg index 0x%04x: %d\n", index, ret); @@ -492,6 +495,20 @@ static int ax88179_resume(struct usb_interface *intf) return usbnet_resume(intf); } +static void ax88179_disconnect(struct usb_interface *intf) +{ + struct usbnet *dev = usb_get_intfdata(intf); + struct ax88179_data *ax179_data; + + if (!dev) + return; + + ax179_data = dev->driver_priv; + ax179_data->disconnecting = 1; + + usbnet_disconnect(intf); +} + static void ax88179_get_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo) { @@ -1906,7 +1923,7 @@ static struct usb_driver ax88179_178a_driver = { .suspend = ax88179_suspend, .resume = ax88179_resume, .reset_resume = ax88179_resume, - .disconnect = usbnet_disconnect, + .disconnect = ax88179_disconnect, .supports_autosuspend = 1, .disable_hub_initiated_lpm = 1, }; From cac200353b71dab2a292c389520164bf6093a1ce Mon Sep 17 00:00:00 2001 From: Christoffer Sandberg Date: Fri, 22 Dec 2023 23:25:38 -0800 Subject: [PATCH 1305/1397] Input: soc_button_array - add mapping for airplane mode button commit ea3715941a9b7d816a1e9096ac0577900af2a69e upstream. This add a mapping for the airplane mode button on the TUXEDO Pulse Gen3. While it is physically a key it behaves more like a switch, sending a key down on first press and a key up on 2nd press. Therefor the switch event is used here. Besides this behaviour it uses the HID usage-id 0xc6 (Wireless Radio Button) and not 0xc8 (Wireless Radio Slider Switch), but since neither 0xc6 nor 0xc8 are currently implemented at all in soc_button_array this not to standard behaviour is not put behind a quirk for the moment. Signed-off-by: Christoffer Sandberg Signed-off-by: Werner Sembach Link: https://lore.kernel.org/r/20231215171718.80229-1-wse@tuxedocomputers.com Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/misc/soc_button_array.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/input/misc/soc_button_array.c b/drivers/input/misc/soc_button_array.c index e79f5497948b..9116f4248fd0 100644 --- a/drivers/input/misc/soc_button_array.c +++ b/drivers/input/misc/soc_button_array.c @@ -299,6 +299,11 @@ static int soc_button_parse_btn_desc(struct device *dev, info->name = "power"; info->event_code = KEY_POWER; info->wakeup = true; + } else if (upage == 0x01 && usage == 0xc6) { + info->name = "airplane mode switch"; + info->event_type = EV_SW; + info->event_code = SW_RFKILL_ALL; + info->active_low = false; } else if (upage == 0x01 && usage == 0xca) { info->name = "rotation lock switch"; info->event_type = EV_SW; From 506ef81c993c7c19b7281d096aa3fba60e7f202e Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Wed, 6 Dec 2023 23:09:13 +0300 Subject: [PATCH 1306/1397] net: 9p: avoid freeing uninit memory in p9pdu_vreadf commit ff49bf1867578f23a5ffdd38f927f6e1e16796c4 upstream. If some of p9pdu_readf() calls inside case 'T' in p9pdu_vreadf() fails, the error path is not handled properly. *wnames or members of *wnames array may be left uninitialized and invalidly freed. Initialize *wnames to NULL in beginning of case 'T'. Initialize the first *wnames array element to NULL and nullify the failing *wnames element so that the error path freeing loop stops on the first NULL element and doesn't proceed further. Found by Linux Verification Center (linuxtesting.org). Fixes: ace51c4dd2f9 ("9p: add new protocol support code") Signed-off-by: Fedor Pchelkin Message-ID: <20231206200913.16135-1-pchelkin@ispras.ru> Cc: stable@vger.kernel.org Reviewed-by: Simon Horman Reviewed-by: Christian Schoenebeck Signed-off-by: Dominique Martinet Signed-off-by: Greg Kroah-Hartman --- net/9p/protocol.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/net/9p/protocol.c b/net/9p/protocol.c index 4e3a2a1ffcb3..0e6603b1ec90 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -394,6 +394,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, uint16_t *nwname = va_arg(ap, uint16_t *); char ***wnames = va_arg(ap, char ***); + *wnames = NULL; + errcode = p9pdu_readf(pdu, proto_version, "w", nwname); if (!errcode) { @@ -403,6 +405,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, GFP_NOFS); if (!*wnames) errcode = -ENOMEM; + else + (*wnames)[0] = NULL; } if (!errcode) { @@ -414,8 +418,10 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, proto_version, "s", &(*wnames)[i]); - if (errcode) + if (errcode) { + (*wnames)[i] = NULL; break; + } } } @@ -423,11 +429,14 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, if (*wnames) { int i; - for (i = 0; i < *nwname; i++) + for (i = 0; i < *nwname; i++) { + if (!(*wnames)[i]) + break; kfree((*wnames)[i]); + } + kfree(*wnames); + *wnames = NULL; } - kfree(*wnames); - *wnames = NULL; } } break; From d2821864c744373ead89204c22dca781f5b60fad Mon Sep 17 00:00:00 2001 From: Rouven Czerwinski Date: Thu, 7 Dec 2023 08:58:36 +0100 Subject: [PATCH 1307/1397] net: rfkill: gpio: set GPIO direction commit 23484d817082c3005252d8edfc8292c8a1006b5b upstream. Fix the undefined usage of the GPIO consumer API after retrieving the GPIO description with GPIO_ASIS. The API documentation mentions that GPIO_ASIS won't set a GPIO direction and requires the user to set a direction before using the GPIO. This can be confirmed on i.MX6 hardware, where rfkill-gpio is no longer able to enabled/disable a device, presumably because the GPIO controller was never configured for the output direction. Fixes: b2f750c3a80b ("net: rfkill: gpio: prevent value glitch during probe") Cc: stable@vger.kernel.org Signed-off-by: Rouven Czerwinski Link: https://msgid.link/20231207075835.3091694-1-r.czerwinski@pengutronix.de Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/rfkill/rfkill-gpio.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c index 5a81505fba9a..4e32d659524e 100644 --- a/net/rfkill/rfkill-gpio.c +++ b/net/rfkill/rfkill-gpio.c @@ -126,6 +126,14 @@ static int rfkill_gpio_probe(struct platform_device *pdev) return -EINVAL; } + ret = gpiod_direction_output(rfkill->reset_gpio, true); + if (ret) + return ret; + + ret = gpiod_direction_output(rfkill->shutdown_gpio, true); + if (ret) + return ret; + rfkill->rfkill_dev = rfkill_alloc(rfkill->name, &pdev->dev, rfkill->type, &rfkill_gpio_ops, rfkill); From 30302b41ffdcd194bef27fb3b1a9f2ca53dedb27 Mon Sep 17 00:00:00 2001 From: Ronald Wahl Date: Thu, 14 Dec 2023 19:11:12 +0100 Subject: [PATCH 1308/1397] net: ks8851: Fix TX stall caused by TX buffer overrun commit 3dc5d44545453de1de9c53cc529cc960a85933da upstream. There is a bug in the ks8851 Ethernet driver that more data is written to the hardware TX buffer than actually available. This is caused by wrong accounting of the free TX buffer space. The driver maintains a tx_space variable that represents the TX buffer space that is deemed to be free. The ks8851_start_xmit_spi() function adds an SKB to a queue if tx_space is large enough and reduces tx_space by the amount of buffer space it will later need in the TX buffer and then schedules a work item. If there is not enough space then the TX queue is stopped. The worker function ks8851_tx_work() dequeues all the SKBs and writes the data into the hardware TX buffer. The last packet will trigger an interrupt after it was send. Here it is assumed that all data fits into the TX buffer. In the interrupt routine (which runs asynchronously because it is a threaded interrupt) tx_space is updated with the current value from the hardware. Also the TX queue is woken up again. Now it could happen that after data was sent to the hardware and before handling the TX interrupt new data is queued in ks8851_start_xmit_spi() when the TX buffer space had still some space left. When the interrupt is actually handled tx_space is updated from the hardware but now we already have new SKBs queued that have not been written to the hardware TX buffer yet. Since tx_space has been overwritten by the value from the hardware the space is not accounted for. Now we have more data queued then buffer space available in the hardware and ks8851_tx_work() will potentially overrun the hardware TX buffer. In many cases it will still work because often the buffer is written out fast enough so that no overrun occurs but for example if the peer throttles us via flow control then an overrun may happen. This can be fixed in different ways. The most simple way would be to set tx_space to 0 before writing data to the hardware TX buffer preventing the queuing of more SKBs until the TX interrupt has been handled. I have chosen a slightly more efficient (and still rather simple) way and track the amount of data that is already queued and not yet written to the hardware. When new SKBs are to be queued the already queued amount of data is honoured when checking free TX buffer space. I tested this with a setup of two linked KS8851 running iperf3 between the two in bidirectional mode. Before the fix I got a stall after some minutes. With the fix I saw now issues anymore after hours. Fixes: 3ba81f3ece3c ("net: Micrel KS8851 SPI network driver") Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Ben Dooks Cc: Tristram Ha Cc: netdev@vger.kernel.org Cc: stable@vger.kernel.org # 5.10+ Signed-off-by: Ronald Wahl Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20231214181112.76052-1-rwahl@gmx.de Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/micrel/ks8851.h | 3 ++ drivers/net/ethernet/micrel/ks8851_common.c | 20 +++++----- drivers/net/ethernet/micrel/ks8851_spi.c | 42 +++++++++++++-------- 3 files changed, 40 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/micrel/ks8851.h b/drivers/net/ethernet/micrel/ks8851.h index fecd43754cea..e5ec0a363aff 100644 --- a/drivers/net/ethernet/micrel/ks8851.h +++ b/drivers/net/ethernet/micrel/ks8851.h @@ -350,6 +350,8 @@ union ks8851_tx_hdr { * @rxd: Space for receiving SPI data, in DMA-able space. * @txd: Space for transmitting SPI data, in DMA-able space. * @msg_enable: The message flags controlling driver output (see ethtool). + * @tx_space: Free space in the hardware TX buffer (cached copy of KS_TXMIR). + * @queued_len: Space required in hardware TX buffer for queued packets in txq. * @fid: Incrementing frame id tag. * @rc_ier: Cached copy of KS_IER. * @rc_ccr: Cached copy of KS_CCR. @@ -399,6 +401,7 @@ struct ks8851_net { struct work_struct rxctrl_work; struct sk_buff_head txq; + unsigned int queued_len; struct eeprom_93cx6 eeprom; struct regulator *vdd_reg; diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c index cfbc900d4aeb..0bf13b38b8f5 100644 --- a/drivers/net/ethernet/micrel/ks8851_common.c +++ b/drivers/net/ethernet/micrel/ks8851_common.c @@ -362,16 +362,18 @@ static irqreturn_t ks8851_irq(int irq, void *_ks) handled |= IRQ_RXPSI; if (status & IRQ_TXI) { - handled |= IRQ_TXI; + unsigned short tx_space = ks8851_rdreg16(ks, KS_TXMIR); - /* no lock here, tx queue should have been stopped */ + netif_dbg(ks, intr, ks->netdev, + "%s: txspace %d\n", __func__, tx_space); - /* update our idea of how much tx space is available to the - * system */ - ks->tx_space = ks8851_rdreg16(ks, KS_TXMIR); + spin_lock(&ks->statelock); + ks->tx_space = tx_space; + if (netif_queue_stopped(ks->netdev)) + netif_wake_queue(ks->netdev); + spin_unlock(&ks->statelock); - netif_dbg(ks, intr, ks->netdev, - "%s: txspace %d\n", __func__, ks->tx_space); + handled |= IRQ_TXI; } if (status & IRQ_RXI) @@ -414,9 +416,6 @@ static irqreturn_t ks8851_irq(int irq, void *_ks) if (status & IRQ_LCI) mii_check_link(&ks->mii); - if (status & IRQ_TXI) - netif_wake_queue(ks->netdev); - return IRQ_HANDLED; } @@ -500,6 +499,7 @@ static int ks8851_net_open(struct net_device *dev) ks8851_wrreg16(ks, KS_ISR, ks->rc_ier); ks8851_wrreg16(ks, KS_IER, ks->rc_ier); + ks->queued_len = 0; netif_start_queue(ks->netdev); netif_dbg(ks, ifup, ks->netdev, "network device up\n"); diff --git a/drivers/net/ethernet/micrel/ks8851_spi.c b/drivers/net/ethernet/micrel/ks8851_spi.c index 70bc7253454f..88e26c120b48 100644 --- a/drivers/net/ethernet/micrel/ks8851_spi.c +++ b/drivers/net/ethernet/micrel/ks8851_spi.c @@ -286,6 +286,18 @@ static void ks8851_wrfifo_spi(struct ks8851_net *ks, struct sk_buff *txp, netdev_err(ks->netdev, "%s: spi_sync() failed\n", __func__); } +/** + * calc_txlen - calculate size of message to send packet + * @len: Length of data + * + * Returns the size of the TXFIFO message needed to send + * this packet. + */ +static unsigned int calc_txlen(unsigned int len) +{ + return ALIGN(len + 4, 4); +} + /** * ks8851_rx_skb_spi - receive skbuff * @ks: The device state @@ -305,7 +317,9 @@ static void ks8851_rx_skb_spi(struct ks8851_net *ks, struct sk_buff *skb) */ static void ks8851_tx_work(struct work_struct *work) { + unsigned int dequeued_len = 0; struct ks8851_net_spi *kss; + unsigned short tx_space; struct ks8851_net *ks; unsigned long flags; struct sk_buff *txb; @@ -322,6 +336,8 @@ static void ks8851_tx_work(struct work_struct *work) last = skb_queue_empty(&ks->txq); if (txb) { + dequeued_len += calc_txlen(txb->len); + ks8851_wrreg16_spi(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA); ks8851_wrfifo_spi(ks, txb, last); @@ -332,6 +348,13 @@ static void ks8851_tx_work(struct work_struct *work) } } + tx_space = ks8851_rdreg16_spi(ks, KS_TXMIR); + + spin_lock(&ks->statelock); + ks->queued_len -= dequeued_len; + ks->tx_space = tx_space; + spin_unlock(&ks->statelock); + ks8851_unlock_spi(ks, &flags); } @@ -346,18 +369,6 @@ static void ks8851_flush_tx_work_spi(struct ks8851_net *ks) flush_work(&kss->tx_work); } -/** - * calc_txlen - calculate size of message to send packet - * @len: Length of data - * - * Returns the size of the TXFIFO message needed to send - * this packet. - */ -static unsigned int calc_txlen(unsigned int len) -{ - return ALIGN(len + 4, 4); -} - /** * ks8851_start_xmit_spi - transmit packet using SPI * @skb: The buffer to transmit @@ -386,16 +397,17 @@ static netdev_tx_t ks8851_start_xmit_spi(struct sk_buff *skb, spin_lock(&ks->statelock); - if (needed > ks->tx_space) { + if (ks->queued_len + needed > ks->tx_space) { netif_stop_queue(dev); ret = NETDEV_TX_BUSY; } else { - ks->tx_space -= needed; + ks->queued_len += needed; skb_queue_tail(&ks->txq, skb); } spin_unlock(&ks->statelock); - schedule_work(&kss->tx_work); + if (ret == NETDEV_TX_OK) + schedule_work(&kss->tx_work); return ret; } From 92b8881bf77651360cfaa142f5e79c1dfbb66713 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 18 Dec 2023 18:06:54 +0100 Subject: [PATCH 1309/1397] net: avoid build bug in skb extension length calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d6e5794b06c0fab74fe6e4fa55d508a5ceb14735 upstream. GCC seems to incorrectly fail to evaluate skb_ext_total_length() at compile time under certain conditions. The issue even occurs if all values in skb_ext_type_len[] are "0", ruling out the possibility of an actual overflow. As the patch has been in mainline since v6.6 without triggering the problem it seems to be a very uncommon occurrence. As the issue only occurs when -fno-tree-loop-im is specified as part of CFLAGS_GCOV, disable the BUILD_BUG_ON() only when building with coverage reporting enabled. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202312171924.4FozI5FG-lkp@intel.com/ Suggested-by: Arnd Bergmann Link: https://lore.kernel.org/lkml/487cfd35-fe68-416f-9bfd-6bb417f98304@app.fastmail.com/ Fixes: 5d21d0a65b57 ("net: generalize calculation of skb extensions length") Cc: Signed-off-by: Thomas Weißschuh Acked-by: Arnd Bergmann Link: https://lore.kernel.org/r/20231218-net-skbuff-build-bug-v1-1-eefc2fb0a7d3@weissschuh.net Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/core/skbuff.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 97b4a42e6e34..6d204cf54c57 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4810,7 +4810,9 @@ static __always_inline unsigned int skb_ext_total_length(void) static void skb_extensions_init(void) { BUILD_BUG_ON(SKB_EXT_NUM >= 8); +#if !IS_ENABLED(CONFIG_KCOV_INSTRUMENT_ALL) BUILD_BUG_ON(skb_ext_total_length() > 255); +#endif skbuff_ext_cache = kmem_cache_create("skbuff_ext_cache", SKB_EXT_ALIGN_VALUE * skb_ext_total_length(), From 93f763c28a8b7070ea2730c651a954e120c807bd Mon Sep 17 00:00:00 2001 From: Lai Peter Jun Ann Date: Mon, 18 Dec 2023 15:51:32 +0800 Subject: [PATCH 1310/1397] net: stmmac: fix incorrect flag check in timestamp interrupt commit bd7f77dae69532ffc027ee50ff99e3792dc30b7f upstream. The driver should continue get the timestamp if STMMAC_FLAG_EXT_SNAPSHOT_EN flag is set. Fixes: aa5513f5d95f ("net: stmmac: replace the ext_snapshot_en field with a flag") Cc: # 6.6 Signed-off-by: Song Yoong Siang Signed-off-by: Lai Peter Jun Ann Reviewed-by: Jacob Keller Reviewed-by: Serge Semin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c index 540f6a4ec0b8..f05bd757dfe5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c @@ -237,7 +237,7 @@ static void timestamp_interrupt(struct stmmac_priv *priv) */ ts_status = readl(priv->ioaddr + GMAC_TIMESTAMP_STATUS); - if (priv->plat->flags & STMMAC_FLAG_EXT_SNAPSHOT_EN) + if (!(priv->plat->flags & STMMAC_FLAG_EXT_SNAPSHOT_EN)) return; num_snapshot = (ts_status & GMAC_TIMESTAMP_ATSNS_MASK) >> From 7b5ef500d8b38961f449582e61c2aad4bb22f4f2 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 15 Dec 2023 11:13:57 +0000 Subject: [PATCH 1311/1397] dt-bindings: nvmem: mxs-ocotp: Document fsl,ocotp commit a2a8aefecbd0f87d6127951cef33b3def8439057 upstream. Both imx23.dtsi and imx28.dtsi describe the OCOTP nodes in the format: compatible = "fsl,imx28-ocotp", "fsl,ocotp"; Document the "fsl,ocotp" entry to fix the following schema warning: efuse@8002c000: compatible: ['fsl,imx23-ocotp', 'fsl,ocotp'] is too long from schema $id: http://devicetree.org/schemas/nvmem/mxs-ocotp.yaml# Fixes: 2c504460f502 ("dt-bindings: nvmem: Convert MXS OCOTP to json-schema") Cc: Signed-off-by: Fabio Estevam Acked-by: Conor Dooley Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20231215111358.316727-2-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/nvmem/mxs-ocotp.yaml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/nvmem/mxs-ocotp.yaml b/Documentation/devicetree/bindings/nvmem/mxs-ocotp.yaml index a9b822aeaa7e..e436650f0faf 100644 --- a/Documentation/devicetree/bindings/nvmem/mxs-ocotp.yaml +++ b/Documentation/devicetree/bindings/nvmem/mxs-ocotp.yaml @@ -14,9 +14,11 @@ allOf: properties: compatible: - enum: - - fsl,imx23-ocotp - - fsl,imx28-ocotp + items: + - enum: + - fsl,imx23-ocotp + - fsl,imx28-ocotp + - const: fsl,ocotp reg: maxItems: 1 @@ -34,7 +36,7 @@ unevaluatedProperties: false examples: - | ocotp: efuse@8002c000 { - compatible = "fsl,imx28-ocotp"; + compatible = "fsl,imx28-ocotp", "fsl,ocotp"; #address-cells = <1>; #size-cells = <1>; reg = <0x8002c000 0x2000>; From c21acd6731680a9af0bddd78f0920bda3af616bb Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 15 Dec 2023 11:56:31 +1100 Subject: [PATCH 1312/1397] nfsd: call nfsd_last_thread() before final nfsd_put() commit 2a501f55cd641eb4d3c16a2eab0d678693fac663 upstream. If write_ports_addfd or write_ports_addxprt fail, they call nfsd_put() without calling nfsd_last_thread(). This leaves nn->nfsd_serv pointing to a structure that has been freed. So remove 'static' from nfsd_last_thread() and call it when the nfsd_serv is about to be destroyed. Fixes: ec52361df99b ("SUNRPC: stop using ->sv_nrthreads as a refcount") Signed-off-by: NeilBrown Reviewed-by: Jeff Layton Cc: Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfsctl.c | 9 +++++++-- fs/nfsd/nfsd.h | 1 + fs/nfsd/nfssvc.c | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 7ed02fb88a36..98854dd3c150 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -704,8 +704,10 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred err = svc_addsock(nn->nfsd_serv, net, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred); - if (err >= 0 && - !nn->nfsd_serv->sv_nrthreads && !xchg(&nn->keep_active, 1)) + if (err < 0 && !nn->nfsd_serv->sv_nrthreads && !nn->keep_active) + nfsd_last_thread(net); + else if (err >= 0 && + !nn->nfsd_serv->sv_nrthreads && !xchg(&nn->keep_active, 1)) svc_get(nn->nfsd_serv); nfsd_put(net); @@ -756,6 +758,9 @@ static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cr svc_xprt_put(xprt); } out_err: + if (!nn->nfsd_serv->sv_nrthreads && !nn->keep_active) + nfsd_last_thread(net); + nfsd_put(net); return err; } diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 11c14faa6c67..63deed27a905 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -138,6 +138,7 @@ int nfsd_vers(struct nfsd_net *nn, int vers, enum vers_op change); int nfsd_minorversion(struct nfsd_net *nn, u32 minorversion, enum vers_op change); void nfsd_reset_versions(struct nfsd_net *nn); int nfsd_create_serv(struct net *net); +void nfsd_last_thread(struct net *net); extern int nfsd_max_blksize; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 211458203d14..7ef6af908faa 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -542,7 +542,7 @@ static struct notifier_block nfsd_inet6addr_notifier = { /* Only used under nfsd_mutex, so this atomic may be overkill: */ static atomic_t nfsd_notifier_refcount = ATOMIC_INIT(0); -static void nfsd_last_thread(struct net *net) +void nfsd_last_thread(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct svc_serv *serv = nn->nfsd_serv; From c0e98de95072f5462c475dcc1ae44ed9f500b2cb Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Wed, 13 Dec 2023 12:25:56 -0300 Subject: [PATCH 1313/1397] smb: client: fix OOB in cifsd when receiving compounded resps commit a8f68b11158f09754418de62e6b3e7b9b7a50cc9 upstream. Validate next header's offset in ->next_header() so that it isn't smaller than MID_HEADER_SIZE(server) and then standard_receive3() or ->receive() ends up writing off the end of the buffer because 'pdu_length - MID_HEADER_SIZE(server)' wraps up to a huge length: BUG: KASAN: slab-out-of-bounds in _copy_to_iter+0x4fc/0x840 Write of size 701 at addr ffff88800caf407f by task cifsd/1090 CPU: 0 PID: 1090 Comm: cifsd Not tainted 6.7.0-rc4 #5 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.2-3-gd478f380-rebuilt.opensuse.org 04/01/2014 Call Trace: dump_stack_lvl+0x4a/0x80 print_report+0xcf/0x650 ? srso_alias_return_thunk+0x5/0xfbef5 ? srso_alias_return_thunk+0x5/0xfbef5 ? __phys_addr+0x46/0x90 kasan_report+0xd8/0x110 ? _copy_to_iter+0x4fc/0x840 ? _copy_to_iter+0x4fc/0x840 kasan_check_range+0x105/0x1b0 __asan_memcpy+0x3c/0x60 _copy_to_iter+0x4fc/0x840 ? srso_alias_return_thunk+0x5/0xfbef5 ? hlock_class+0x32/0xc0 ? srso_alias_return_thunk+0x5/0xfbef5 ? __pfx__copy_to_iter+0x10/0x10 ? srso_alias_return_thunk+0x5/0xfbef5 ? lock_is_held_type+0x90/0x100 ? srso_alias_return_thunk+0x5/0xfbef5 ? __might_resched+0x278/0x360 ? __pfx___might_resched+0x10/0x10 ? srso_alias_return_thunk+0x5/0xfbef5 __skb_datagram_iter+0x2c2/0x460 ? __pfx_simple_copy_to_iter+0x10/0x10 skb_copy_datagram_iter+0x6c/0x110 tcp_recvmsg_locked+0x9be/0xf40 ? __pfx_tcp_recvmsg_locked+0x10/0x10 ? mark_held_locks+0x5d/0x90 ? srso_alias_return_thunk+0x5/0xfbef5 tcp_recvmsg+0xe2/0x310 ? __pfx_tcp_recvmsg+0x10/0x10 ? srso_alias_return_thunk+0x5/0xfbef5 ? srso_alias_return_thunk+0x5/0xfbef5 ? lock_acquire+0x14a/0x3a0 ? srso_alias_return_thunk+0x5/0xfbef5 inet_recvmsg+0xd0/0x370 ? __pfx_inet_recvmsg+0x10/0x10 ? __pfx_lock_release+0x10/0x10 ? do_raw_spin_trylock+0xd1/0x120 sock_recvmsg+0x10d/0x150 cifs_readv_from_socket+0x25a/0x490 [cifs] ? __pfx_cifs_readv_from_socket+0x10/0x10 [cifs] ? srso_alias_return_thunk+0x5/0xfbef5 cifs_read_from_socket+0xb5/0x100 [cifs] ? __pfx_cifs_read_from_socket+0x10/0x10 [cifs] ? __pfx_lock_release+0x10/0x10 ? do_raw_spin_trylock+0xd1/0x120 ? _raw_spin_unlock+0x23/0x40 ? srso_alias_return_thunk+0x5/0xfbef5 ? __smb2_find_mid+0x126/0x230 [cifs] cifs_demultiplex_thread+0xd39/0x1270 [cifs] ? __pfx_cifs_demultiplex_thread+0x10/0x10 [cifs] ? __pfx_lock_release+0x10/0x10 ? srso_alias_return_thunk+0x5/0xfbef5 ? mark_held_locks+0x1a/0x90 ? lockdep_hardirqs_on_prepare+0x136/0x210 ? srso_alias_return_thunk+0x5/0xfbef5 ? srso_alias_return_thunk+0x5/0xfbef5 ? __kthread_parkme+0xce/0xf0 ? __pfx_cifs_demultiplex_thread+0x10/0x10 [cifs] kthread+0x18d/0x1d0 ? kthread+0xdb/0x1d0 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x34/0x60 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 Fixes: 8ce79ec359ad ("cifs: update multiplex loop to handle compounded responses") Cc: stable@vger.kernel.org Reported-by: Robert Morris Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/cifsglob.h | 3 ++- fs/smb/client/connect.c | 7 ++++++- fs/smb/client/smb2ops.c | 19 ++++++++++++------- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index bd7fc20c49de..4eac7dcb82f9 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -532,7 +532,8 @@ struct smb_version_operations { struct mid_q_entry **, char **, int *); enum securityEnum (*select_sectype)(struct TCP_Server_Info *, enum securityEnum); - int (*next_header)(char *); + int (*next_header)(struct TCP_Server_Info *server, char *buf, + unsigned int *noff); /* ioctl passthrough for query_info */ int (*ioctl_query_info)(const unsigned int xid, struct cifs_tcon *tcon, diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index d517651d7bce..76ccbdba5855 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -1180,7 +1180,12 @@ cifs_demultiplex_thread(void *p) server->total_read += length; if (server->ops->next_header) { - next_offset = server->ops->next_header(buf); + if (server->ops->next_header(server, buf, &next_offset)) { + cifs_dbg(VFS, "%s: malformed response (next_offset=%u)\n", + __func__, next_offset); + cifs_reconnect(server, true); + continue; + } if (next_offset) server->pdu_size = next_offset; } diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index dbcfdf7bc270..2187921580ac 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -5072,17 +5072,22 @@ smb3_handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid) NULL, 0, false); } -static int -smb2_next_header(char *buf) +static int smb2_next_header(struct TCP_Server_Info *server, char *buf, + unsigned int *noff) { struct smb2_hdr *hdr = (struct smb2_hdr *)buf; struct smb2_transform_hdr *t_hdr = (struct smb2_transform_hdr *)buf; - if (hdr->ProtocolId == SMB2_TRANSFORM_PROTO_NUM) - return sizeof(struct smb2_transform_hdr) + - le32_to_cpu(t_hdr->OriginalMessageSize); - - return le32_to_cpu(hdr->NextCommand); + if (hdr->ProtocolId == SMB2_TRANSFORM_PROTO_NUM) { + *noff = le32_to_cpu(t_hdr->OriginalMessageSize); + if (unlikely(check_add_overflow(*noff, sizeof(*t_hdr), noff))) + return -EINVAL; + } else { + *noff = le32_to_cpu(hdr->NextCommand); + } + if (unlikely(*noff && *noff < MID_HEADER_SIZE(server))) + return -EINVAL; + return 0; } static int From 6630441cc2e8f1f132636c992d65afbe269f3ad7 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Sat, 16 Dec 2023 01:10:04 -0300 Subject: [PATCH 1314/1397] smb: client: fix potential OOB in cifs_dump_detail() commit b50492b05fd02887b46aef079592207fb5c97a4c upstream. Validate SMB message with ->check_message() before calling ->calc_smb_size(). Signed-off-by: Paulo Alcantara (SUSE) Cc: stable@vger.kernel.org Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/cifs_debug.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index 16282ecfe17a..a2584ad8808a 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -40,11 +40,13 @@ void cifs_dump_detail(void *buf, struct TCP_Server_Info *server) #ifdef CONFIG_CIFS_DEBUG2 struct smb_hdr *smb = buf; - cifs_dbg(VFS, "Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d\n", - smb->Command, smb->Status.CifsError, - smb->Flags, smb->Flags2, smb->Mid, smb->Pid); - cifs_dbg(VFS, "smb buf %p len %u\n", smb, - server->ops->calc_smb_size(smb)); + cifs_dbg(VFS, "Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d Wct: %d\n", + smb->Command, smb->Status.CifsError, smb->Flags, + smb->Flags2, smb->Mid, smb->Pid, smb->WordCount); + if (!server->ops->check_message(buf, server->total_read, server)) { + cifs_dbg(VFS, "smb buf %p len %u\n", smb, + server->ops->calc_smb_size(smb)); + } #endif /* CONFIG_CIFS_DEBUG2 */ } From 3b5f0d0a2bf07bd0477f78ccdda3aa866aa3be7f Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Wed, 13 Dec 2023 12:25:57 -0300 Subject: [PATCH 1315/1397] smb: client: fix OOB in SMB2_query_info_init() commit 33eae65c6f49770fec7a662935d4eb4a6406d24b upstream. A small CIFS buffer (448 bytes) isn't big enough to hold SMB2_QUERY_INFO request along with user's input data from CIFS_QUERY_INFO ioctl. That is, if the user passed an input buffer > 344 bytes, the client will memcpy() off the end of @req->Buffer in SMB2_query_info_init() thus causing the following KASAN splat: BUG: KASAN: slab-out-of-bounds in SMB2_query_info_init+0x242/0x250 [cifs] Write of size 1023 at addr ffff88801308c5a8 by task a.out/1240 CPU: 1 PID: 1240 Comm: a.out Not tainted 6.7.0-rc4 #5 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.2-3-gd478f380-rebuilt.opensuse.org 04/01/2014 Call Trace: dump_stack_lvl+0x4a/0x80 print_report+0xcf/0x650 ? srso_alias_return_thunk+0x5/0xfbef5 ? srso_alias_return_thunk+0x5/0xfbef5 ? srso_alias_return_thunk+0x5/0xfbef5 ? __phys_addr+0x46/0x90 kasan_report+0xd8/0x110 ? SMB2_query_info_init+0x242/0x250 [cifs] ? SMB2_query_info_init+0x242/0x250 [cifs] kasan_check_range+0x105/0x1b0 __asan_memcpy+0x3c/0x60 SMB2_query_info_init+0x242/0x250 [cifs] ? __pfx_SMB2_query_info_init+0x10/0x10 [cifs] ? srso_alias_return_thunk+0x5/0xfbef5 ? smb_rqst_len+0xa6/0xc0 [cifs] smb2_ioctl_query_info+0x4f4/0x9a0 [cifs] ? __pfx_smb2_ioctl_query_info+0x10/0x10 [cifs] ? __pfx_cifsConvertToUTF16+0x10/0x10 [cifs] ? kasan_set_track+0x25/0x30 ? srso_alias_return_thunk+0x5/0xfbef5 ? __kasan_kmalloc+0x8f/0xa0 ? srso_alias_return_thunk+0x5/0xfbef5 ? cifs_strndup_to_utf16+0x12d/0x1a0 [cifs] ? __build_path_from_dentry_optional_prefix+0x19d/0x2d0 [cifs] ? __pfx_smb2_ioctl_query_info+0x10/0x10 [cifs] cifs_ioctl+0x11c7/0x1de0 [cifs] ? __pfx_cifs_ioctl+0x10/0x10 [cifs] ? srso_alias_return_thunk+0x5/0xfbef5 ? rcu_is_watching+0x23/0x50 ? srso_alias_return_thunk+0x5/0xfbef5 ? __rseq_handle_notify_resume+0x6cd/0x850 ? __pfx___schedule+0x10/0x10 ? blkcg_iostat_update+0x250/0x290 ? srso_alias_return_thunk+0x5/0xfbef5 ? ksys_write+0xe9/0x170 __x64_sys_ioctl+0xc9/0x100 do_syscall_64+0x47/0xf0 entry_SYSCALL_64_after_hwframe+0x6f/0x77 RIP: 0033:0x7f893dde49cf Code: 00 48 89 44 24 18 31 c0 48 8d 44 24 60 c7 04 24 10 00 00 00 48 89 44 24 08 48 8d 44 24 20 48 89 44 24 10 b8 10 00 00 00 0f 05 <89> c2 3d 00 f0 ff ff 77 18 48 8b 44 24 18 64 48 2b 04 25 28 00 00 RSP: 002b:00007ffc03ff4160 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007ffc03ff4378 RCX: 00007f893dde49cf RDX: 00007ffc03ff41d0 RSI: 00000000c018cf07 RDI: 0000000000000003 RBP: 00007ffc03ff4260 R08: 0000000000000410 R09: 0000000000000001 R10: 00007f893dce7300 R11: 0000000000000246 R12: 0000000000000000 R13: 00007ffc03ff4388 R14: 00007f893df15000 R15: 0000000000406de0 Fix this by increasing size of SMB2_QUERY_INFO request buffers and validating input length to prevent other callers from overflowing @req in SMB2_query_info_init() as well. Fixes: f5b05d622a3e ("cifs: add IOCTL for QUERY_INFO passthrough to userspace") Cc: stable@vger.kernel.org Reported-by: Robert Morris Signed-off-by: Paulo Alcantara Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/smb2pdu.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 2df118540e89..76a0b9dbcf75 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -376,10 +376,15 @@ static int __smb2_plain_req_init(__le16 smb2_command, struct cifs_tcon *tcon, void **request_buf, unsigned int *total_len) { /* BB eventually switch this to SMB2 specific small buf size */ - if (smb2_command == SMB2_SET_INFO) + switch (smb2_command) { + case SMB2_SET_INFO: + case SMB2_QUERY_INFO: *request_buf = cifs_buf_get(); - else + break; + default: *request_buf = cifs_small_buf_get(); + break; + } if (*request_buf == NULL) { /* BB should we add a retry in here if not a writepage? */ return -ENOMEM; @@ -3494,8 +3499,13 @@ SMB2_query_info_init(struct cifs_tcon *tcon, struct TCP_Server_Info *server, struct smb2_query_info_req *req; struct kvec *iov = rqst->rq_iov; unsigned int total_len; + size_t len; int rc; + if (unlikely(check_add_overflow(input_len, sizeof(*req), &len) || + len > CIFSMaxBufSize)) + return -EINVAL; + rc = smb2_plain_req_init(SMB2_QUERY_INFO, tcon, server, (void **) &req, &total_len); if (rc) @@ -3517,7 +3527,7 @@ SMB2_query_info_init(struct cifs_tcon *tcon, struct TCP_Server_Info *server, iov[0].iov_base = (char *)req; /* 1 for Buffer */ - iov[0].iov_len = total_len - 1 + input_len; + iov[0].iov_len = len; return 0; } @@ -3525,7 +3535,7 @@ void SMB2_query_info_free(struct smb_rqst *rqst) { if (rqst && rqst->rq_iov) - cifs_small_buf_release(rqst->rq_iov[0].iov_base); /* request */ + cifs_buf_release(rqst->rq_iov[0].iov_base); /* request */ } static int @@ -5392,6 +5402,11 @@ build_qfs_info_req(struct kvec *iov, struct cifs_tcon *tcon, return 0; } +static inline void free_qfs_info_req(struct kvec *iov) +{ + cifs_buf_release(iov->iov_base); +} + int SMB311_posix_qfs_info(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, struct kstatfs *fsdata) @@ -5423,7 +5438,7 @@ SMB311_posix_qfs_info(const unsigned int xid, struct cifs_tcon *tcon, rc = cifs_send_recv(xid, ses, server, &rqst, &resp_buftype, flags, &rsp_iov); - cifs_small_buf_release(iov.iov_base); + free_qfs_info_req(&iov); if (rc) { cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE); goto posix_qfsinf_exit; @@ -5474,7 +5489,7 @@ SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon, rc = cifs_send_recv(xid, ses, server, &rqst, &resp_buftype, flags, &rsp_iov); - cifs_small_buf_release(iov.iov_base); + free_qfs_info_req(&iov); if (rc) { cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE); goto qfsinf_exit; @@ -5541,7 +5556,7 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon, rc = cifs_send_recv(xid, ses, server, &rqst, &resp_buftype, flags, &rsp_iov); - cifs_small_buf_release(iov.iov_base); + free_qfs_info_req(&iov); if (rc) { cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE); goto qfsattr_exit; From ac48fcef5ec2e9ac85c0b39045d874e60eac75d7 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Fri, 15 Dec 2023 19:59:14 -0300 Subject: [PATCH 1316/1397] smb: client: fix OOB in smbCalcSize() commit b35858b3786ddbb56e1c35138ba25d6adf8d0bef upstream. Validate @smb->WordCount to avoid reading off the end of @smb and thus causing the following KASAN splat: BUG: KASAN: slab-out-of-bounds in smbCalcSize+0x32/0x40 [cifs] Read of size 2 at addr ffff88801c024ec5 by task cifsd/1328 CPU: 1 PID: 1328 Comm: cifsd Not tainted 6.7.0-rc5 #9 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.2-3-gd478f380-rebuilt.opensuse.org 04/01/2014 Call Trace: dump_stack_lvl+0x4a/0x80 print_report+0xcf/0x650 ? srso_alias_return_thunk+0x5/0xfbef5 ? srso_alias_return_thunk+0x5/0xfbef5 ? __phys_addr+0x46/0x90 kasan_report+0xd8/0x110 ? smbCalcSize+0x32/0x40 [cifs] ? smbCalcSize+0x32/0x40 [cifs] kasan_check_range+0x105/0x1b0 smbCalcSize+0x32/0x40 [cifs] checkSMB+0x162/0x370 [cifs] ? __pfx_checkSMB+0x10/0x10 [cifs] cifs_handle_standard+0xbc/0x2f0 [cifs] ? srso_alias_return_thunk+0x5/0xfbef5 cifs_demultiplex_thread+0xed1/0x1360 [cifs] ? __pfx_cifs_demultiplex_thread+0x10/0x10 [cifs] ? srso_alias_return_thunk+0x5/0xfbef5 ? lockdep_hardirqs_on_prepare+0x136/0x210 ? __pfx_lock_release+0x10/0x10 ? srso_alias_return_thunk+0x5/0xfbef5 ? mark_held_locks+0x1a/0x90 ? lockdep_hardirqs_on_prepare+0x136/0x210 ? srso_alias_return_thunk+0x5/0xfbef5 ? srso_alias_return_thunk+0x5/0xfbef5 ? __kthread_parkme+0xce/0xf0 ? __pfx_cifs_demultiplex_thread+0x10/0x10 [cifs] kthread+0x18d/0x1d0 ? kthread+0xdb/0x1d0 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x34/0x60 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 This fixes CVE-2023-6606. Reported-by: j51569436@gmail.com Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218218 Cc: stable@vger.kernel.org Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/misc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c index 35b176457bbe..c2137ea3c253 100644 --- a/fs/smb/client/misc.c +++ b/fs/smb/client/misc.c @@ -363,6 +363,10 @@ checkSMB(char *buf, unsigned int total_read, struct TCP_Server_Info *server) cifs_dbg(VFS, "Length less than smb header size\n"); } return -EIO; + } else if (total_read < sizeof(*smb) + 2 * smb->WordCount) { + cifs_dbg(VFS, "%s: can't read BCC due to invalid WordCount(%u)\n", + __func__, smb->WordCount); + return -EIO; } /* otherwise, there is enough to get to the BCC */ From 706b554adfe9d42e7c366ea14b10433e93dc7d54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 11 Dec 2023 10:11:34 +0200 Subject: [PATCH 1317/1397] drm/i915: Reject async flips with bigjoiner MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 88a173e5dd05e788068e8fa20a8c37c44bd8f416 upstream. Currently async flips are busted when bigjoiner is in use. As a short term fix simply reject async flips in that case. Cc: stable@vger.kernel.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/9769 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231211081134.2698-1-ville.syrjala@linux.intel.com Reviewed-by: Stanislav Lisovskiy (cherry picked from commit e93bffc2ac0a833b42841f31fff955549d38ce98) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/display/intel_display.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 2d9d96ecbb25..2e0daad23aa6 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -5977,6 +5977,17 @@ static int intel_async_flip_check_hw(struct intel_atomic_state *state, struct in return -EINVAL; } + /* + * FIXME: Bigjoiner+async flip is busted currently. + * Remove this check once the issues are fixed. + */ + if (new_crtc_state->bigjoiner_pipes) { + drm_dbg_kms(&i915->drm, + "[CRTC:%d:%s] async flip disallowed with bigjoiner\n", + crtc->base.base.id, crtc->base.name); + return -EINVAL; + } + for_each_oldnew_intel_plane_in_state(state, plane, old_plane_state, new_plane_state, i) { if (plane->pipe != crtc->pipe) From e0730d7edbc324344915f0c4db30325cb5c87277 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 11 Dec 2023 23:37:47 +0200 Subject: [PATCH 1318/1397] drm/i915/dmc: Don't enable any pipe DMC events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 49e0a85ec3441edc6c77aa40206d6e5ee4597efc upstream. The pipe DMC seems to be making a mess of things in ADL. Various weird symptoms have been observed such as missing vblank irqs, typicalle happening when using multiple displays. Keep all pipe DMC event handlers disabled until needed (which is never atm). This is also what Windows does on ADL+. We can also drop DG2 from disable_all_flip_queue_events() since on DG2 the pipe DMC is the one that handles the flip queue events. Cc: stable@vger.kernel.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/8685 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20231211213750.27109-2-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak (cherry picked from commit 648d7be8ecf47b0556e32550145c70db153b16fb) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/display/intel_dmc.c | 43 ++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c index 5f479f3828bb..8751973b5730 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc.c +++ b/drivers/gpu/drm/i915/display/intel_dmc.c @@ -389,7 +389,7 @@ disable_all_flip_queue_events(struct drm_i915_private *i915) enum intel_dmc_id dmc_id; /* TODO: check if the following applies to all D13+ platforms. */ - if (!IS_DG2(i915) && !IS_TIGERLAKE(i915)) + if (!IS_TIGERLAKE(i915)) return; for_each_dmc_id(dmc_id) { @@ -493,6 +493,45 @@ void intel_dmc_disable_pipe(struct drm_i915_private *i915, enum pipe pipe) intel_de_rmw(i915, PIPEDMC_CONTROL(pipe), PIPEDMC_ENABLE, 0); } +static bool is_dmc_evt_ctl_reg(struct drm_i915_private *i915, + enum intel_dmc_id dmc_id, i915_reg_t reg) +{ + u32 offset = i915_mmio_reg_offset(reg); + u32 start = i915_mmio_reg_offset(DMC_EVT_CTL(i915, dmc_id, 0)); + u32 end = i915_mmio_reg_offset(DMC_EVT_CTL(i915, dmc_id, DMC_EVENT_HANDLER_COUNT_GEN12)); + + return offset >= start && offset < end; +} + +static bool disable_dmc_evt(struct drm_i915_private *i915, + enum intel_dmc_id dmc_id, + i915_reg_t reg, u32 data) +{ + if (!is_dmc_evt_ctl_reg(i915, dmc_id, reg)) + return false; + + /* keep all pipe DMC events disabled by default */ + if (dmc_id != DMC_FW_MAIN) + return true; + + return false; +} + +static u32 dmc_mmiodata(struct drm_i915_private *i915, + struct intel_dmc *dmc, + enum intel_dmc_id dmc_id, int i) +{ + if (disable_dmc_evt(i915, dmc_id, + dmc->dmc_info[dmc_id].mmioaddr[i], + dmc->dmc_info[dmc_id].mmiodata[i])) + return REG_FIELD_PREP(DMC_EVT_CTL_TYPE_MASK, + DMC_EVT_CTL_TYPE_EDGE_0_1) | + REG_FIELD_PREP(DMC_EVT_CTL_EVENT_ID_MASK, + DMC_EVT_CTL_EVENT_ID_FALSE); + else + return dmc->dmc_info[dmc_id].mmiodata[i]; +} + /** * intel_dmc_load_program() - write the firmware from memory to register. * @i915: i915 drm device. @@ -532,7 +571,7 @@ void intel_dmc_load_program(struct drm_i915_private *i915) for_each_dmc_id(dmc_id) { for (i = 0; i < dmc->dmc_info[dmc_id].mmio_count; i++) { intel_de_write(i915, dmc->dmc_info[dmc_id].mmioaddr[i], - dmc->dmc_info[dmc_id].mmiodata[i]); + dmc_mmiodata(i915, dmc, dmc_id, i)); } } From 0590874226f310f1cee8c744821aca8cffc62d36 Mon Sep 17 00:00:00 2001 From: JP Kobryn Date: Mon, 4 Dec 2023 12:23:20 -0800 Subject: [PATCH 1319/1397] 9p: prevent read overrun in protocol dump tracepoint commit a931c6816078af3e306e0f444f492396ce40de31 upstream. An out of bounds read can occur within the tracepoint 9p_protocol_dump. In the fast assign, there is a memcpy that uses a constant size of 32 (macro named P9_PROTO_DUMP_SZ). When the copy is invoked, the source buffer is not guaranteed match this size. It was found that in some cases the source buffer size is less than 32, resulting in a read that overruns. The size of the source buffer seems to be known at the time of the tracepoint being invoked. The allocations happen within p9_fcall_init(), where the capacity field is set to the allocated size of the payload buffer. This patch tries to fix the overrun by changing the fixed array to a dynamically sized array and using the minimum of the capacity value or P9_PROTO_DUMP_SZ as its length. The trace log statement is adjusted to account for this. Note that the trace log no longer splits the payload on the first 16 bytes. The full payload is now logged to a single line. To repro the orignal problem, operations to a plan 9 managed resource can be used. The simplest approach might just be mounting a shared filesystem (between host and guest vm) using the plan 9 protocol while the tracepoint is enabled. mount -t 9p -o trans=virtio The bpftrace program below can be used to show the out of bounds read. Note that a recent version of bpftrace is needed for the raw tracepoint support. The script was tested using v0.19.0. /* from include/net/9p/9p.h */ struct p9_fcall { u32 size; u8 id; u16 tag; size_t offset; size_t capacity; struct kmem_cache *cache; u8 *sdata; bool zc; }; tracepoint:9p:9p_protocol_dump { /* out of bounds read can happen when this tracepoint is enabled */ } rawtracepoint:9p_protocol_dump { $pdu = (struct p9_fcall *)arg1; $dump_sz = (uint64)32; if ($dump_sz > $pdu->capacity) { printf("reading %zu bytes from src buffer of %zu bytes\n", $dump_sz, $pdu->capacity); } } Signed-off-by: JP Kobryn Message-ID: <20231204202321.22730-1-inwardvessel@gmail.com> Fixes: 60ece0833b6c ("net/9p: allocate appropriate reduced message buffers") Cc: stable@vger.kernel.org Reviewed-by: Christian Schoenebeck Signed-off-by: Dominique Martinet Signed-off-by: Greg Kroah-Hartman --- include/trace/events/9p.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/include/trace/events/9p.h b/include/trace/events/9p.h index 4dfa6d7f83ba..cd104a1343e2 100644 --- a/include/trace/events/9p.h +++ b/include/trace/events/9p.h @@ -178,18 +178,21 @@ TRACE_EVENT(9p_protocol_dump, __field( void *, clnt ) __field( __u8, type ) __field( __u16, tag ) - __array( unsigned char, line, P9_PROTO_DUMP_SZ ) + __dynamic_array(unsigned char, line, + min_t(size_t, pdu->capacity, P9_PROTO_DUMP_SZ)) ), TP_fast_assign( __entry->clnt = clnt; __entry->type = pdu->id; __entry->tag = pdu->tag; - memcpy(__entry->line, pdu->sdata, P9_PROTO_DUMP_SZ); + memcpy(__get_dynamic_array(line), pdu->sdata, + __get_dynamic_array_len(line)); ), - TP_printk("clnt %lu %s(tag = %d)\n%.3x: %16ph\n%.3x: %16ph\n", + TP_printk("clnt %lu %s(tag = %d)\n%*ph\n", (unsigned long)__entry->clnt, show_9p_op(__entry->type), - __entry->tag, 0, __entry->line, 16, __entry->line + 16) + __entry->tag, __get_dynamic_array_len(line), + __get_dynamic_array(line)) ); From 82aaf7fc98659f9ed0fd505302a3abf40a52449e Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 12 Dec 2023 14:30:49 -0500 Subject: [PATCH 1320/1397] ring-buffer: Fix 32-bit rb_time_read() race with rb_time_cmpxchg() [ Upstream commit dec890089bf79a4954b61482715ee2d084364856 ] The following race can cause rb_time_read() to observe a corrupted time stamp: rb_time_cmpxchg() [...] if (!rb_time_read_cmpxchg(&t->msb, msb, msb2)) return false; if (!rb_time_read_cmpxchg(&t->top, top, top2)) return false; __rb_time_read() [...] do { c = local_read(&t->cnt); top = local_read(&t->top); bottom = local_read(&t->bottom); msb = local_read(&t->msb); } while (c != local_read(&t->cnt)); *cnt = rb_time_cnt(top); /* If top and msb counts don't match, this interrupted a write */ if (*cnt != rb_time_cnt(msb)) return false; ^ this check fails to catch that "bottom" is still not updated. So the old "bottom" value is returned, which is wrong. Fix this by checking that all three of msb, top, and bottom 2-bit cnt values match. The reason to favor checking all three fields over requiring a specific update order for both rb_time_set() and rb_time_cmpxchg() is because checking all three fields is more robust to handle partial failures of rb_time_cmpxchg() when interrupted by nested rb_time_set(). Link: https://lore.kernel.org/lkml/20231211201324.652870-1-mathieu.desnoyers@efficios.com/ Link: https://lore.kernel.org/linux-trace-kernel/20231212193049.680122-1-mathieu.desnoyers@efficios.com Fixes: f458a1453424e ("ring-buffer: Test last update in 32bit version of __rb_time_read()") Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin --- kernel/trace/ring_buffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index af08a1a411e3..070566baa0ca 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -644,8 +644,8 @@ static inline bool __rb_time_read(rb_time_t *t, u64 *ret, unsigned long *cnt) *cnt = rb_time_cnt(top); - /* If top and msb counts don't match, this interrupted a write */ - if (*cnt != rb_time_cnt(msb)) + /* If top, msb or bottom counts don't match, this interrupted a write */ + if (*cnt != rb_time_cnt(msb) || *cnt != rb_time_cnt(bottom)) return false; /* The shift to msb will lose its cnt bits */ From 307f56f2606a3e2ba5be5c830df221b85f3f016e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Fri, 15 Dec 2023 08:18:10 -0500 Subject: [PATCH 1321/1397] ring-buffer: Remove useless update to write_stamp in rb_try_to_discard() [ Upstream commit 083e9f65bd215582bf8f6a920db729fadf16704f ] When filtering is enabled, a temporary buffer is created to place the content of the trace event output so that the filter logic can decide from the trace event output if the trace event should be filtered out or not. If it is to be filtered out, the content in the temporary buffer is simply discarded, otherwise it is written into the trace buffer. But if an interrupt were to come in while a previous event was using that temporary buffer, the event written by the interrupt would actually go into the ring buffer itself to prevent corrupting the data on the temporary buffer. If the event is to be filtered out, the event in the ring buffer is discarded, or if it fails to discard because another event were to have already come in, it is turned into padding. The update to the write_stamp in the rb_try_to_discard() happens after a fix was made to force the next event after the discard to use an absolute timestamp by setting the before_stamp to zero so it does not match the write_stamp (which causes an event to use the absolute timestamp). But there's an effort in rb_try_to_discard() to put back the write_stamp to what it was before the event was added. But this is useless and wasteful because nothing is going to be using that write_stamp for calculations as it still will not match the before_stamp. Remove this useless update, and in doing so, we remove another cmpxchg64()! Also update the comments to reflect this change as well as remove some extra white space in another comment. Link: https://lore.kernel.org/linux-trace-kernel/20231215081810.1f4f38fe@rorschach.local.home Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Joel Fernandes Cc: Vincent Donnefort Fixes: b2dd797543cf ("ring-buffer: Force absolute timestamp on discard of event") Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin --- kernel/trace/ring_buffer.c | 47 +++++++++----------------------------- 1 file changed, 11 insertions(+), 36 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 070566baa0ca..4c97160ab9c1 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2987,25 +2987,6 @@ static unsigned rb_calculate_event_length(unsigned length) return length; } -static u64 rb_time_delta(struct ring_buffer_event *event) -{ - switch (event->type_len) { - case RINGBUF_TYPE_PADDING: - return 0; - - case RINGBUF_TYPE_TIME_EXTEND: - return rb_event_time_stamp(event); - - case RINGBUF_TYPE_TIME_STAMP: - return 0; - - case RINGBUF_TYPE_DATA: - return event->time_delta; - default: - return 0; - } -} - static inline bool rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, struct ring_buffer_event *event) @@ -3013,8 +2994,6 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, unsigned long new_index, old_index; struct buffer_page *bpage; unsigned long addr; - u64 write_stamp; - u64 delta; new_index = rb_event_index(event); old_index = new_index + rb_event_ts_length(event); @@ -3023,14 +3002,10 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, bpage = READ_ONCE(cpu_buffer->tail_page); - delta = rb_time_delta(event); - - if (!rb_time_read(&cpu_buffer->write_stamp, &write_stamp)) - return false; - - /* Make sure the write stamp is read before testing the location */ - barrier(); - + /* + * Make sure the tail_page is still the same and + * the next write location is the end of this event + */ if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) { unsigned long write_mask = local_read(&bpage->write) & ~RB_WRITE_MASK; @@ -3041,20 +3016,20 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, * to make sure that the next event adds an absolute * value and does not rely on the saved write stamp, which * is now going to be bogus. + * + * By setting the before_stamp to zero, the next event + * is not going to use the write_stamp and will instead + * create an absolute timestamp. This means there's no + * reason to update the wirte_stamp! */ rb_time_set(&cpu_buffer->before_stamp, 0); - /* Something came in, can't discard */ - if (!rb_time_cmpxchg(&cpu_buffer->write_stamp, - write_stamp, write_stamp - delta)) - return false; - /* * If an event were to come in now, it would see that the * write_stamp and the before_stamp are different, and assume * that this event just added itself before updating * the write stamp. The interrupting event will fix the - * write stamp for us, and use the before stamp as its delta. + * write stamp for us, and use an absolute timestamp. */ /* @@ -3491,7 +3466,7 @@ static void check_buffer(struct ring_buffer_per_cpu *cpu_buffer, return; /* - * If this interrupted another event, + * If this interrupted another event, */ if (atomic_inc_return(this_cpu_ptr(&checking)) != 1) goto out; From bddd8b50bfe2b6bc16cc2810392ebd99e1b4574b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Mon, 18 Dec 2023 23:07:12 -0500 Subject: [PATCH 1322/1397] ring-buffer: Fix slowpath of interrupted event [ Upstream commit b803d7c664d55705831729d2f2e29c874bcd62ea ] To synchronize the timestamps with the ring buffer reservation, there are two timestamps that are saved in the buffer meta data. 1. before_stamp 2. write_stamp When the two are equal, the write_stamp is considered valid, as in, it may be used to calculate the delta of the next event as the write_stamp is the timestamp of the previous reserved event on the buffer. This is done by the following: /*A*/ w = current position on the ring buffer before = before_stamp after = write_stamp ts = read current timestamp if (before != after) { write_stamp is not valid, force adding an absolute timestamp. } /*B*/ before_stamp = ts /*C*/ write = local_add_return(event length, position on ring buffer) if (w == write - event length) { /* Nothing interrupted between A and C */ /*E*/ write_stamp = ts; delta = ts - after /* * If nothing interrupted again, * before_stamp == write_stamp and write_stamp * can be used to calculate the delta for * events that come in after this one. */ } else { /* * The slow path! * Was interrupted between A and C. */ This is the place that there's a bug. We currently have: after = write_stamp ts = read current timestamp /*F*/ if (write == current position on the ring buffer && after < ts && cmpxchg(write_stamp, after, ts)) { delta = ts - after; } else { delta = 0; } The assumption is that if the current position on the ring buffer hasn't moved between C and F, then it also was not interrupted, and that the last event written has a timestamp that matches the write_stamp. That is the write_stamp is valid. But this may not be the case: If a task context event was interrupted by softirq between B and C. And the softirq wrote an event that got interrupted by a hard irq between C and E. and the hard irq wrote an event (does not need to be interrupted) We have: /*B*/ before_stamp = ts of normal context ---> interrupted by softirq /*B*/ before_stamp = ts of softirq context ---> interrupted by hardirq /*B*/ before_stamp = ts of hard irq context /*E*/ write_stamp = ts of hard irq context /* matches and write_stamp valid */ <---- /*E*/ write_stamp = ts of softirq context /* No longer matches before_stamp, write_stamp is not valid! */ <--- w != write - length, go to slow path // Right now the order of events in the ring buffer is: // // |-- softirq event --|-- hard irq event --|-- normal context event --| // after = write_stamp (this is the ts of softirq) ts = read current timestamp if (write == current position on the ring buffer [true] && after < ts [true] && cmpxchg(write_stamp, after, ts) [true]) { delta = ts - after [Wrong!] The delta is to be between the hard irq event and the normal context event, but the above logic made the delta between the softirq event and the normal context event, where the hard irq event is between the two. This will shift all the remaining event timestamps on the sub-buffer incorrectly. The write_stamp is only valid if it matches the before_stamp. The cmpxchg does nothing to help this. Instead, the following logic can be done to fix this: before = before_stamp ts = read current timestamp before_stamp = ts after = write_stamp if (write == current position on the ring buffer && after == before && after < ts) { delta = ts - after } else { delta = 0; } The above will only use the write_stamp if it still matches before_stamp and was tested to not have changed since C. As a bonus, with this logic we do not need any 64-bit cmpxchg() at all! This means the 32-bit rb_time_t workaround can finally be removed. But that's for a later time. Link: https://lore.kernel.org/linux-trace-kernel/20231218175229.58ec3daf@gandalf.local.home/ Link: https://lore.kernel.org/linux-trace-kernel/20231218230712.3a76b081@gandalf.local.home Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Linus Torvalds Fixes: dd93942570789 ("ring-buffer: Do not try to put back write_stamp") Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin --- kernel/trace/ring_buffer.c | 79 ++++++++++++-------------------------- 1 file changed, 24 insertions(+), 55 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 4c97160ab9c1..783a500e89c5 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -700,48 +700,6 @@ rb_time_read_cmpxchg(local_t *l, unsigned long expect, unsigned long set) return local_try_cmpxchg(l, &expect, set); } -static bool rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set) -{ - unsigned long cnt, top, bottom, msb; - unsigned long cnt2, top2, bottom2, msb2; - u64 val; - - /* Any interruptions in this function should cause a failure */ - cnt = local_read(&t->cnt); - - /* The cmpxchg always fails if it interrupted an update */ - if (!__rb_time_read(t, &val, &cnt2)) - return false; - - if (val != expect) - return false; - - if ((cnt & 3) != cnt2) - return false; - - cnt2 = cnt + 1; - - rb_time_split(val, &top, &bottom, &msb); - msb = rb_time_val_cnt(msb, cnt); - top = rb_time_val_cnt(top, cnt); - bottom = rb_time_val_cnt(bottom, cnt); - - rb_time_split(set, &top2, &bottom2, &msb2); - msb2 = rb_time_val_cnt(msb2, cnt); - top2 = rb_time_val_cnt(top2, cnt2); - bottom2 = rb_time_val_cnt(bottom2, cnt2); - - if (!rb_time_read_cmpxchg(&t->cnt, cnt, cnt2)) - return false; - if (!rb_time_read_cmpxchg(&t->msb, msb, msb2)) - return false; - if (!rb_time_read_cmpxchg(&t->top, top, top2)) - return false; - if (!rb_time_read_cmpxchg(&t->bottom, bottom, bottom2)) - return false; - return true; -} - #else /* 64 bits */ /* local64_t always succeeds */ @@ -755,11 +713,6 @@ static void rb_time_set(rb_time_t *t, u64 val) { local64_set(&t->time, val); } - -static bool rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set) -{ - return local64_try_cmpxchg(&t->time, &expect, set); -} #endif /* @@ -3610,20 +3563,36 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, } else { u64 ts; /* SLOW PATH - Interrupted between A and C */ - a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after); - /* Was interrupted before here, write_stamp must be valid */ + + /* Save the old before_stamp */ + a_ok = rb_time_read(&cpu_buffer->before_stamp, &info->before); RB_WARN_ON(cpu_buffer, !a_ok); + + /* + * Read a new timestamp and update the before_stamp to make + * the next event after this one force using an absolute + * timestamp. This is in case an interrupt were to come in + * between E and F. + */ ts = rb_time_stamp(cpu_buffer->buffer); + rb_time_set(&cpu_buffer->before_stamp, ts); + + barrier(); + /*E*/ a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after); + /* Was interrupted before here, write_stamp must be valid */ + RB_WARN_ON(cpu_buffer, !a_ok); barrier(); - /*E*/ if (write == (local_read(&tail_page->write) & RB_WRITE_MASK) && - info->after < ts && - rb_time_cmpxchg(&cpu_buffer->write_stamp, - info->after, ts)) { - /* Nothing came after this event between C and E */ + /*F*/ if (write == (local_read(&tail_page->write) & RB_WRITE_MASK) && + info->after == info->before && info->after < ts) { + /* + * Nothing came after this event between C and F, it is + * safe to use info->after for the delta as it + * matched info->before and is still valid. + */ info->delta = ts - info->after; } else { /* - * Interrupted between C and E: + * Interrupted between C and F: * Lost the previous events time stamp. Just set the * delta to zero, and this will be the same time as * the event this event interrupted. And the events that From 662ae991759a77c2507c53d57e6b63b7e42527e0 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 27 Nov 2023 10:58:41 +0100 Subject: [PATCH 1323/1397] spi: atmel: Do not cancel a transfer upon any signal commit 1ca2761a7734928ffe0678f88789266cf3d05362 upstream. The intended move from wait_for_completion_*() to wait_for_completion_interruptible_*() was to allow (very) long spi memory transfers to be stopped upon user request instead of freezing the machine forever as the timeout value could now be significantly bigger. However, depending on the user logic, applications can receive many signals for their own "internal" purpose and have nothing to do with the requested kernel operations, hence interrupting spi transfers upon any signal is probably not a wise choice. Instead, let's switch to wait_for_completion_killable_*() to only catch the "important" signals. This was likely the intended behavior anyway. Fixes: e0205d6203c2 ("spi: atmel: Prevent false timeouts on long transfers") Cc: stable@vger.kernel.org Reported-by: Ronald Wahl Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/r/20231127095842.389631-1-miquel.raynal@bootlin.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-atmel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c index 6aa8adbe4170..2e8860865af9 100644 --- a/drivers/spi/spi-atmel.c +++ b/drivers/spi/spi-atmel.c @@ -1336,8 +1336,8 @@ static int atmel_spi_one_transfer(struct spi_controller *host, } dma_timeout = msecs_to_jiffies(spi_controller_xfer_timeout(host, xfer)); - ret_timeout = wait_for_completion_interruptible_timeout(&as->xfer_completion, - dma_timeout); + ret_timeout = wait_for_completion_killable_timeout(&as->xfer_completion, + dma_timeout); if (ret_timeout <= 0) { dev_err(&spi->dev, "spi transfer %s\n", !ret_timeout ? "timeout" : "canceled"); From 4b74558ab3ca79ed3aeb05c844ec4c7743dc74bc Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 5 Dec 2023 09:31:02 +0100 Subject: [PATCH 1324/1397] spi: atmel: Prevent spi transfers from being killed commit 890188d2d7e4ac6c131ba166ca116cb315e752ee upstream. Upstream commit e0205d6203c2 ("spi: atmel: Prevent false timeouts on long transfers") has tried to mitigate the problem of getting spi transfers canceled because they were lasting too long. On slow buses, transfers in the MiB range can take more than one second and thus a calculation was added to progressively increment the timeout value. In order to not be too problematic from a user point of view (waiting dozen of seconds or even minutes), the wait call was turned interruptible. Turning the wait interruptible was a mistake as what we really wanted to do was to be able to kill a transfer. Any signal interrupting our transfer would not be suitable at all so a second attempt was made at turning the wait killable instead. Link: https://lore.kernel.org/linux-spi/20231127095842.389631-1-miquel.raynal@bootlin.com/ All being well, it was reported that JFFS2 was showing a splat when interrupting a transfer. After some more debate about whether JFFS2 should be fixed and how, it was also pointed out that the whole consistency of the filesystem in case of parallel I/O would be compromised. Changing JFFS2 behavior would in theory be possible but nobody has the energy and time and knowledge to do this now, so better prevent spi transfers to be interrupted by the user. Partially revert the blamed commit to no longer use the interruptible nor the killable variant of wait_for_completion(). Fixes: e0205d6203c2 ("spi: atmel: Prevent false timeouts on long transfers") Cc: Signed-off-by: Miquel Raynal Tested-by: Ronald Wahl Link: https://lore.kernel.org/r/20231205083102.16946-1-miquel.raynal@bootlin.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-atmel.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c index 2e8860865af9..3383a5b4b5ed 100644 --- a/drivers/spi/spi-atmel.c +++ b/drivers/spi/spi-atmel.c @@ -1336,12 +1336,10 @@ static int atmel_spi_one_transfer(struct spi_controller *host, } dma_timeout = msecs_to_jiffies(spi_controller_xfer_timeout(host, xfer)); - ret_timeout = wait_for_completion_killable_timeout(&as->xfer_completion, - dma_timeout); - if (ret_timeout <= 0) { - dev_err(&spi->dev, "spi transfer %s\n", - !ret_timeout ? "timeout" : "canceled"); - as->done_status = ret_timeout < 0 ? ret_timeout : -EIO; + ret_timeout = wait_for_completion_timeout(&as->xfer_completion, dma_timeout); + if (!ret_timeout) { + dev_err(&spi->dev, "spi transfer timeout\n"); + as->done_status = -EIO; } if (as->done_status) From dbf0c97f52d63e211983d592f6422177a8178902 Mon Sep 17 00:00:00 2001 From: Louis Chauvet Date: Mon, 4 Dec 2023 16:49:03 +0100 Subject: [PATCH 1325/1397] spi: atmel: Fix clock issue when using devices with different polarities commit fc70d643a2f6678cbe0f5c86433c1aeb4d613fcc upstream. The current Atmel SPI controller driver (v2) behaves incorrectly when using two SPI devices with different clock polarities and GPIO CS. When switching from one device to another, the controller driver first enables the CS and then applies whatever configuration suits the targeted device (typically, the polarities). The side effect of such order is the apparition of a spurious clock edge after enabling the CS when the clock polarity needs to be inverted wrt. the previous configuration of the controller. This parasitic clock edge is problematic when the SPI device uses that edge for internal processing, which is perfectly legitimate given that its CS was asserted. Indeed, devices such as HVS8080 driven by driver gpio-sr in the kernel are shift registers and will process this first clock edge to perform a first register shift. In this case, the first bit gets lost and the whole data block that will later be read by the kernel is all shifted by one. Current behavior: The actual switching of the clock polarity only occurs after the CS when the controller sends the first message: CLK ------------\ /-\ /-\ | | | | | . . . \---/ \-/ \ CS -----\ | \------------------ ^ ^ ^ | | | | | Actual clock of the message sent | | | Change of clock polarity, which occurs with the first | write to the bus. This edge occurs when the CS is | already asserted, and can be interpreted as | the first clock edge by the receiver. | GPIO CS toggle This issue is specific to this controller because while the SPI core performs the operations in the right order, the controller however does not. In practice, the controller only applies the clock configuration right before the first transmission. So this is not a problem when using the controller's dedicated CS, as the controller does things correctly, but it becomes a problem when you need to change the clock polarity and use an external GPIO for the CS. One possible approach to solve this problem is to send a dummy message before actually activating the CS, so that the controller applies the clock polarity beforehand. New behavior: CLK ------\ /-\ /-\ /-\ /-\ | | | ... | | | | ... | | \------/ \- -/ \------/ \- -/ \------ CS -\/-----------------------\ || | \/ \--------------------- ^ ^ ^ ^ ^ | | | | | | | | | Expected clock cycles when | | | | sending the message | | | | | | | Actual GPIO CS activation, occurs inside | | | the driver | | | | | Dummy message, to trigger clock polarity | | reconfiguration. This message is not received and | | processed by the device because CS is low. | | | Change of clock polarity, forced by the dummy message. This | time, the edge is not detected by the receiver. | This small spike in CS activation is due to the fact that the spi-core activates the CS gpio before calling the driver's set_cs callback, which deactivates this gpio again until the clock polarity is correct. To avoid having to systematically send a dummy packet, the driver keeps track of the clock's current polarity. In this way, it only sends the dummy packet when necessary, ensuring that the clock will have the correct polarity when the CS is toggled. There could be two hardware problems with this patch: 1- Maybe the small CS activation peak can confuse SPI devices 2- If on a design, a single wire is used to select two devices depending on its state, the dummy message may disturb them. Fixes: 5ee36c989831 ("spi: atmel_spi update chipselect handling") Cc: Signed-off-by: Louis Chauvet Link: https://msgid.link/r/20231204154903.11607-1-louis.chauvet@bootlin.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-atmel.c | 82 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 81 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c index 3383a5b4b5ed..e073d54873b1 100644 --- a/drivers/spi/spi-atmel.c +++ b/drivers/spi/spi-atmel.c @@ -22,6 +22,7 @@ #include #include #include +#include #include /* SPI register offsets */ @@ -279,6 +280,7 @@ struct atmel_spi { bool keep_cs; u32 fifo_size; + bool last_polarity; u8 native_cs_free; u8 native_cs_for_gpio; }; @@ -291,6 +293,22 @@ struct atmel_spi_device { #define SPI_MAX_DMA_XFER 65535 /* true for both PDC and DMA */ #define INVALID_DMA_ADDRESS 0xffffffff +/* + * This frequency can be anything supported by the controller, but to avoid + * unnecessary delay, the highest possible frequency is chosen. + * + * This frequency is the highest possible which is not interfering with other + * chip select registers (see Note for Serial Clock Bit Rate configuration in + * Atmel-11121F-ATARM-SAMA5D3-Series-Datasheet_02-Feb-16, page 1283) + */ +#define DUMMY_MSG_FREQUENCY 0x02 +/* + * 8 bits is the minimum data the controller is capable of sending. + * + * This message can be anything as it should not be treated by any SPI device. + */ +#define DUMMY_MSG 0xAA + /* * Version 2 of the SPI controller has * - CR.LASTXFER @@ -304,6 +322,43 @@ static bool atmel_spi_is_v2(struct atmel_spi *as) return as->caps.is_spi2; } +/* + * Send a dummy message. + * + * This is sometimes needed when using a CS GPIO to force clock transition when + * switching between devices with different polarities. + */ +static void atmel_spi_send_dummy(struct atmel_spi *as, struct spi_device *spi, int chip_select) +{ + u32 status; + u32 csr; + + /* + * Set a clock frequency to allow sending message on SPI bus. + * The frequency here can be anything, but is needed for + * the controller to send the data. + */ + csr = spi_readl(as, CSR0 + 4 * chip_select); + csr = SPI_BFINS(SCBR, DUMMY_MSG_FREQUENCY, csr); + spi_writel(as, CSR0 + 4 * chip_select, csr); + + /* + * Read all data coming from SPI bus, needed to be able to send + * the message. + */ + spi_readl(as, RDR); + while (spi_readl(as, SR) & SPI_BIT(RDRF)) { + spi_readl(as, RDR); + cpu_relax(); + } + + spi_writel(as, TDR, DUMMY_MSG); + + readl_poll_timeout_atomic(as->regs + SPI_SR, status, + (status & SPI_BIT(TXEMPTY)), 1, 1000); +} + + /* * Earlier SPI controllers (e.g. on at91rm9200) have a design bug whereby * they assume that spi slave device state will not change on deselect, so @@ -320,11 +375,17 @@ static bool atmel_spi_is_v2(struct atmel_spi *as) * Master on Chip Select 0.") No workaround exists for that ... so for * nCS0 on that chip, we (a) don't use the GPIO, (b) can't support CS_HIGH, * and (c) will trigger that first erratum in some cases. + * + * When changing the clock polarity, the SPI controller waits for the next + * transmission to enforce the default clock state. This may be an issue when + * using a GPIO as Chip Select: the clock level is applied only when the first + * packet is sent, once the CS has already been asserted. The workaround is to + * avoid this by sending a first (dummy) message before toggling the CS state. */ - static void cs_activate(struct atmel_spi *as, struct spi_device *spi) { struct atmel_spi_device *asd = spi->controller_state; + bool new_polarity; int chip_select; u32 mr; @@ -353,6 +414,25 @@ static void cs_activate(struct atmel_spi *as, struct spi_device *spi) } mr = spi_readl(as, MR); + + /* + * Ensures the clock polarity is valid before we actually + * assert the CS to avoid spurious clock edges to be + * processed by the spi devices. + */ + if (spi_get_csgpiod(spi, 0)) { + new_polarity = (asd->csr & SPI_BIT(CPOL)) != 0; + if (new_polarity != as->last_polarity) { + /* + * Need to disable the GPIO before sending the dummy + * message because it is already set by the spi core. + */ + gpiod_set_value_cansleep(spi_get_csgpiod(spi, 0), 0); + atmel_spi_send_dummy(as, spi, chip_select); + as->last_polarity = new_polarity; + gpiod_set_value_cansleep(spi_get_csgpiod(spi, 0), 1); + } + } } else { u32 cpol = (spi->mode & SPI_CPOL) ? SPI_BIT(CPOL) : 0; int i; From 09283d60bc332b19d6e76cd88bd1f2cadfa49e2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Fri, 15 Dec 2023 11:13:58 +0000 Subject: [PATCH 1326/1397] nvmem: brcm_nvram: store a copy of NVRAM content MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 1e37bf84afacd5ba17b7a13a18ca2bc78aff05c0 upstream. This driver uses MMIO access for reading NVRAM from a flash device. Underneath there is a flash controller that reads data and provides mapping window. Using MMIO interface affects controller configuration and may break real controller driver. It was reported by multiple users of devices with NVRAM stored on NAND. Modify driver to read & cache NVRAM content during init and use that copy to provide NVMEM data when requested. On NAND flashes due to their alignment NVRAM partitions can be quite big (1 MiB and more) while actual NVRAM content stays quite small (usually 16 to 32 KiB). To avoid allocating so much memory check for actual data length. Link: https://lore.kernel.org/linux-mtd/CACna6rwf3_9QVjYcM+847biTX=K0EoWXuXcSMkJO1Vy_5vmVqA@mail.gmail.com/ Fixes: 3fef9ed0627a ("nvmem: brcm_nvram: new driver exposing Broadcom's NVRAM") Cc: Cc: Arınç ÜNAL Cc: Florian Fainelli Cc: Scott Branden Signed-off-by: Rafał Miłecki Acked-by: Arınç ÜNAL Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20231215111358.316727-3-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/brcm_nvram.c | 134 ++++++++++++++++++++++++++----------- 1 file changed, 94 insertions(+), 40 deletions(-) diff --git a/drivers/nvmem/brcm_nvram.c b/drivers/nvmem/brcm_nvram.c index 9737104f3b76..5cdf339cfbec 100644 --- a/drivers/nvmem/brcm_nvram.c +++ b/drivers/nvmem/brcm_nvram.c @@ -17,9 +17,23 @@ #define NVRAM_MAGIC "FLSH" +/** + * struct brcm_nvram - driver state internal struct + * + * @dev: NVMEM device pointer + * @nvmem_size: Size of the whole space available for NVRAM + * @data: NVRAM data copy stored to avoid poking underlaying flash controller + * @data_len: NVRAM data size + * @padding_byte: Padding value used to fill remaining space + * @cells: Array of discovered NVMEM cells + * @ncells: Number of elements in cells + */ struct brcm_nvram { struct device *dev; - void __iomem *base; + size_t nvmem_size; + uint8_t *data; + size_t data_len; + uint8_t padding_byte; struct nvmem_cell_info *cells; int ncells; }; @@ -36,10 +50,47 @@ static int brcm_nvram_read(void *context, unsigned int offset, void *val, size_t bytes) { struct brcm_nvram *priv = context; - u8 *dst = val; + size_t to_copy; + + if (offset + bytes > priv->data_len) + to_copy = max_t(ssize_t, (ssize_t)priv->data_len - offset, 0); + else + to_copy = bytes; + + memcpy(val, priv->data + offset, to_copy); + + memset((uint8_t *)val + to_copy, priv->padding_byte, bytes - to_copy); + + return 0; +} + +static int brcm_nvram_copy_data(struct brcm_nvram *priv, struct platform_device *pdev) +{ + struct resource *res; + void __iomem *base; + + base = devm_platform_get_and_ioremap_resource(pdev, 0, &res); + if (IS_ERR(base)) + return PTR_ERR(base); + + priv->nvmem_size = resource_size(res); + + priv->padding_byte = readb(base + priv->nvmem_size - 1); + for (priv->data_len = priv->nvmem_size; + priv->data_len; + priv->data_len--) { + if (readb(base + priv->data_len - 1) != priv->padding_byte) + break; + } + WARN(priv->data_len > SZ_128K, "Unexpected (big) NVRAM size: %zu B\n", priv->data_len); + + priv->data = devm_kzalloc(priv->dev, priv->data_len, GFP_KERNEL); + if (!priv->data) + return -ENOMEM; + + memcpy_fromio(priv->data, base, priv->data_len); - while (bytes--) - *dst++ = readb(priv->base + offset++); + bcm47xx_nvram_init_from_iomem(base, priv->data_len); return 0; } @@ -67,8 +118,13 @@ static int brcm_nvram_add_cells(struct brcm_nvram *priv, uint8_t *data, size_t len) { struct device *dev = priv->dev; - char *var, *value, *eq; + char *var, *value; + uint8_t tmp; int idx; + int err = 0; + + tmp = priv->data[len - 1]; + priv->data[len - 1] = '\0'; priv->ncells = 0; for (var = data + sizeof(struct brcm_nvram_header); @@ -78,67 +134,68 @@ static int brcm_nvram_add_cells(struct brcm_nvram *priv, uint8_t *data, } priv->cells = devm_kcalloc(dev, priv->ncells, sizeof(*priv->cells), GFP_KERNEL); - if (!priv->cells) - return -ENOMEM; + if (!priv->cells) { + err = -ENOMEM; + goto out; + } for (var = data + sizeof(struct brcm_nvram_header), idx = 0; var < (char *)data + len && *var; var = value + strlen(value) + 1, idx++) { + char *eq, *name; + eq = strchr(var, '='); if (!eq) break; *eq = '\0'; + name = devm_kstrdup(dev, var, GFP_KERNEL); + *eq = '='; + if (!name) { + err = -ENOMEM; + goto out; + } value = eq + 1; - priv->cells[idx].name = devm_kstrdup(dev, var, GFP_KERNEL); - if (!priv->cells[idx].name) - return -ENOMEM; + priv->cells[idx].name = name; priv->cells[idx].offset = value - (char *)data; priv->cells[idx].bytes = strlen(value); priv->cells[idx].np = of_get_child_by_name(dev->of_node, priv->cells[idx].name); - if (!strcmp(var, "et0macaddr") || - !strcmp(var, "et1macaddr") || - !strcmp(var, "et2macaddr")) { + if (!strcmp(name, "et0macaddr") || + !strcmp(name, "et1macaddr") || + !strcmp(name, "et2macaddr")) { priv->cells[idx].raw_len = strlen(value); priv->cells[idx].bytes = ETH_ALEN; priv->cells[idx].read_post_process = brcm_nvram_read_post_process_macaddr; } } - return 0; +out: + priv->data[len - 1] = tmp; + return err; } static int brcm_nvram_parse(struct brcm_nvram *priv) { + struct brcm_nvram_header *header = (struct brcm_nvram_header *)priv->data; struct device *dev = priv->dev; - struct brcm_nvram_header header; - uint8_t *data; size_t len; int err; - memcpy_fromio(&header, priv->base, sizeof(header)); - - if (memcmp(header.magic, NVRAM_MAGIC, 4)) { + if (memcmp(header->magic, NVRAM_MAGIC, 4)) { dev_err(dev, "Invalid NVRAM magic\n"); return -EINVAL; } - len = le32_to_cpu(header.len); - - data = kzalloc(len, GFP_KERNEL); - if (!data) - return -ENOMEM; - - memcpy_fromio(data, priv->base, len); - data[len - 1] = '\0'; - - err = brcm_nvram_add_cells(priv, data, len); - if (err) { - dev_err(dev, "Failed to add cells: %d\n", err); - return err; + len = le32_to_cpu(header->len); + if (len > priv->nvmem_size) { + dev_err(dev, "NVRAM length (%zd) exceeds mapped size (%zd)\n", len, + priv->nvmem_size); + return -EINVAL; } - kfree(data); + err = brcm_nvram_add_cells(priv, priv->data, len); + if (err) + dev_err(dev, "Failed to add cells: %d\n", err); return 0; } @@ -150,7 +207,6 @@ static int brcm_nvram_probe(struct platform_device *pdev) .reg_read = brcm_nvram_read, }; struct device *dev = &pdev->dev; - struct resource *res; struct brcm_nvram *priv; int err; @@ -159,21 +215,19 @@ static int brcm_nvram_probe(struct platform_device *pdev) return -ENOMEM; priv->dev = dev; - priv->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res); - if (IS_ERR(priv->base)) - return PTR_ERR(priv->base); + err = brcm_nvram_copy_data(priv, pdev); + if (err) + return err; err = brcm_nvram_parse(priv); if (err) return err; - bcm47xx_nvram_init_from_iomem(priv->base, resource_size(res)); - config.dev = dev; config.cells = priv->cells; config.ncells = priv->ncells; config.priv = priv; - config.size = resource_size(res); + config.size = priv->nvmem_size; return PTR_ERR_OR_ZERO(devm_nvmem_register(dev, &config)); } From 71758d4d87ef0ba9f2295eaa9a478f1868064db1 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 8 Dec 2023 12:09:38 -0500 Subject: [PATCH 1327/1397] Revert "scsi: aacraid: Reply queue mapping to CPUs based on IRQ affinity" commit c5becf57dd5659c687d41d623a69f42d63f59eb2 upstream. This reverts commit 9dc704dcc09eae7d21b5da0615eb2ed79278f63e. Several reports have been made indicating that this commit caused hangs. Numerous attempts at root causing and fixing the issue have been unsuccessful so let's revert for now. Link: https://bugzilla.kernel.org/show_bug.cgi?id=217599 Cc: Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/aacraid/aacraid.h | 1 - drivers/scsi/aacraid/commsup.c | 6 +----- drivers/scsi/aacraid/linit.c | 14 -------------- drivers/scsi/aacraid/src.c | 25 ++----------------------- 4 files changed, 3 insertions(+), 43 deletions(-) diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h index 73b6ac0c01f5..7d5a155073c6 100644 --- a/drivers/scsi/aacraid/aacraid.h +++ b/drivers/scsi/aacraid/aacraid.h @@ -1678,7 +1678,6 @@ struct aac_dev u32 handle_pci_error; bool init_reset; u8 soft_reset_support; - u8 use_map_queue; }; #define aac_adapter_interrupt(dev) \ diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index 013a9a334972..25cee03d7f97 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -223,12 +223,8 @@ int aac_fib_setup(struct aac_dev * dev) struct fib *aac_fib_alloc_tag(struct aac_dev *dev, struct scsi_cmnd *scmd) { struct fib *fibptr; - u32 blk_tag; - int i; - blk_tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmd)); - i = blk_mq_unique_tag_to_tag(blk_tag); - fibptr = &dev->fibs[i]; + fibptr = &dev->fibs[scsi_cmd_to_rq(scmd)->tag]; /* * Null out fields that depend on being zero at the start of * each I/O diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index c4a36c0be527..68f4dbcfff49 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -19,7 +19,6 @@ #include #include -#include #include #include #include @@ -505,15 +504,6 @@ static int aac_slave_configure(struct scsi_device *sdev) return 0; } -static void aac_map_queues(struct Scsi_Host *shost) -{ - struct aac_dev *aac = (struct aac_dev *)shost->hostdata; - - blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT], - aac->pdev, 0); - aac->use_map_queue = true; -} - /** * aac_change_queue_depth - alter queue depths * @sdev: SCSI device we are considering @@ -1498,7 +1488,6 @@ static const struct scsi_host_template aac_driver_template = { .bios_param = aac_biosparm, .shost_groups = aac_host_groups, .slave_configure = aac_slave_configure, - .map_queues = aac_map_queues, .change_queue_depth = aac_change_queue_depth, .sdev_groups = aac_dev_groups, .eh_abort_handler = aac_eh_abort, @@ -1786,8 +1775,6 @@ static int aac_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) shost->max_lun = AAC_MAX_LUN; pci_set_drvdata(pdev, shost); - shost->nr_hw_queues = aac->max_msix; - shost->host_tagset = 1; error = scsi_add_host(shost, &pdev->dev); if (error) @@ -1919,7 +1906,6 @@ static void aac_remove_one(struct pci_dev *pdev) struct aac_dev *aac = (struct aac_dev *)shost->hostdata; aac_cancel_rescan_worker(aac); - aac->use_map_queue = false; scsi_remove_host(shost); __aac_shutdown(aac); diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c index 61949f374188..11ef58204e96 100644 --- a/drivers/scsi/aacraid/src.c +++ b/drivers/scsi/aacraid/src.c @@ -493,10 +493,6 @@ static int aac_src_deliver_message(struct fib *fib) #endif u16 vector_no; - struct scsi_cmnd *scmd; - u32 blk_tag; - struct Scsi_Host *shost = dev->scsi_host_ptr; - struct blk_mq_queue_map *qmap; atomic_inc(&q->numpending); @@ -509,25 +505,8 @@ static int aac_src_deliver_message(struct fib *fib) if ((dev->comm_interface == AAC_COMM_MESSAGE_TYPE3) && dev->sa_firmware) vector_no = aac_get_vector(dev); - else { - if (!fib->vector_no || !fib->callback_data) { - if (shost && dev->use_map_queue) { - qmap = &shost->tag_set.map[HCTX_TYPE_DEFAULT]; - vector_no = qmap->mq_map[raw_smp_processor_id()]; - } - /* - * We hardcode the vector_no for - * reserved commands as a valid shost is - * absent during the init - */ - else - vector_no = 0; - } else { - scmd = (struct scsi_cmnd *)fib->callback_data; - blk_tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmd)); - vector_no = blk_mq_unique_tag_to_hwq(blk_tag); - } - } + else + vector_no = fib->vector_no; if (native_hba) { if (fib->flags & FIB_CONTEXT_FLAG_NATIVE_HBA_TMF) { From d7ef2eeec259eb28c0273132c1a73aa069cab13a Mon Sep 17 00:00:00 2001 From: Alexander Atanasov Date: Fri, 15 Dec 2023 14:10:08 +0200 Subject: [PATCH 1328/1397] scsi: core: Always send batch on reset or error handling command commit 066c5b46b6eaf2f13f80c19500dbb3b84baabb33 upstream. In commit 8930a6c20791 ("scsi: core: add support for request batching") the block layer bd->last flag was mapped to SCMD_LAST and used as an indicator to send the batch for the drivers that implement this feature. However, the error handling code was not updated accordingly. scsi_send_eh_cmnd() is used to send error handling commands and request sense. The problem is that request sense comes as a single command that gets into the batch queue and times out. As a result the device goes offline after several failed resets. This was observed on virtio_scsi during a device resize operation. [ 496.316946] sd 0:0:4:0: [sdd] tag#117 scsi_eh_0: requesting sense [ 506.786356] sd 0:0:4:0: [sdd] tag#117 scsi_send_eh_cmnd timeleft: 0 [ 506.787981] sd 0:0:4:0: [sdd] tag#117 abort To fix this always set SCMD_LAST flag in scsi_send_eh_cmnd() and scsi_reset_ioctl(). Fixes: 8930a6c20791 ("scsi: core: add support for request batching") Cc: Signed-off-by: Alexander Atanasov Link: https://lore.kernel.org/r/20231215121008.2881653-1-alexander.atanasov@virtuozzo.com Reviewed-by: Ming Lei Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_error.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index c67cdcdc3ba8..1223d34c04da 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -1152,6 +1152,7 @@ static enum scsi_disposition scsi_send_eh_cmnd(struct scsi_cmnd *scmd, scsi_log_send(scmd); scmd->submitter = SUBMITTED_BY_SCSI_ERROR_HANDLER; + scmd->flags |= SCMD_LAST; /* * Lock sdev->state_mutex to avoid that scsi_device_quiesce() can @@ -2459,6 +2460,7 @@ scsi_ioctl_reset(struct scsi_device *dev, int __user *arg) scsi_init_command(dev, scmd); scmd->submitter = SUBMITTED_BY_SCSI_RESET_IOCTL; + scmd->flags |= SCMD_LAST; memset(&scmd->sdb, 0, sizeof(scmd->sdb)); scmd->cmd_len = 0; From 7e39c55ee095bbc7c325a6cabaa074b034c24e4a Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Wed, 20 Dec 2023 11:15:25 -0500 Subject: [PATCH 1329/1397] tracing / synthetic: Disable events after testing in synth_event_gen_test_init() commit 88b30c7f5d27e1594d70dc2bd7199b18f2b57fa9 upstream. The synth_event_gen_test module can be built in, if someone wants to run the tests at boot up and not have to load them. The synth_event_gen_test_init() function creates and enables the synthetic events and runs its tests. The synth_event_gen_test_exit() disables the events it created and destroys the events. If the module is builtin, the events are never disabled. The issue is, the events should be disable after the tests are run. This could be an issue if the rest of the boot up tests are enabled, as they expect the events to be in a known state before testing. That known state happens to be disabled. When CONFIG_SYNTH_EVENT_GEN_TEST=y and CONFIG_EVENT_TRACE_STARTUP_TEST=y a warning will trigger: Running tests on trace events: Testing event create_synth_test: Enabled event during self test! ------------[ cut here ]------------ WARNING: CPU: 2 PID: 1 at kernel/trace/trace_events.c:4150 event_trace_self_tests+0x1c2/0x480 Modules linked in: CPU: 2 PID: 1 Comm: swapper/0 Not tainted 6.7.0-rc2-test-00031-gb803d7c664d5-dirty #276 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 RIP: 0010:event_trace_self_tests+0x1c2/0x480 Code: bb e8 a2 ab 5d fc 48 8d 7b 48 e8 f9 3d 99 fc 48 8b 73 48 40 f6 c6 01 0f 84 d6 fe ff ff 48 c7 c7 20 b6 ad bb e8 7f ab 5d fc 90 <0f> 0b 90 48 89 df e8 d3 3d 99 fc 48 8b 1b 4c 39 f3 0f 85 2c ff ff RSP: 0000:ffffc9000001fdc0 EFLAGS: 00010246 RAX: 0000000000000029 RBX: ffff88810399ca80 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffffffffb9f19478 RDI: ffff88823c734e64 RBP: ffff88810399f300 R08: 0000000000000000 R09: fffffbfff79eb32a R10: ffffffffbcf59957 R11: 0000000000000001 R12: ffff888104068090 R13: ffffffffbc89f0a0 R14: ffffffffbc8a0f08 R15: 0000000000000078 FS: 0000000000000000(0000) GS:ffff88823c700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000001f6282001 CR4: 0000000000170ef0 Call Trace: ? __warn+0xa5/0x200 ? event_trace_self_tests+0x1c2/0x480 ? report_bug+0x1f6/0x220 ? handle_bug+0x6f/0x90 ? exc_invalid_op+0x17/0x50 ? asm_exc_invalid_op+0x1a/0x20 ? tracer_preempt_on+0x78/0x1c0 ? event_trace_self_tests+0x1c2/0x480 ? __pfx_event_trace_self_tests_init+0x10/0x10 event_trace_self_tests_init+0x27/0xe0 do_one_initcall+0xd6/0x3c0 ? __pfx_do_one_initcall+0x10/0x10 ? kasan_set_track+0x25/0x30 ? rcu_is_watching+0x38/0x60 kernel_init_freeable+0x324/0x450 ? __pfx_kernel_init+0x10/0x10 kernel_init+0x1f/0x1e0 ? _raw_spin_unlock_irq+0x33/0x50 ret_from_fork+0x34/0x60 ? __pfx_kernel_init+0x10/0x10 ret_from_fork_asm+0x1b/0x30 This is because the synth_event_gen_test_init() left the synthetic events that it created enabled. By having it disable them after testing, the other selftests will run fine. Link: https://lore.kernel.org/linux-trace-kernel/20231220111525.2f0f49b0@gandalf.local.home Cc: stable@vger.kernel.org Cc: Mathieu Desnoyers Cc: Tom Zanussi Fixes: 9fe41efaca084 ("tracing: Add synth event generation test module") Acked-by: Masami Hiramatsu (Google) Reported-by: Alexander Graf Tested-by: Alexander Graf Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/synth_event_gen_test.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/kernel/trace/synth_event_gen_test.c b/kernel/trace/synth_event_gen_test.c index 8dfe85499d4a..354c2117be43 100644 --- a/kernel/trace/synth_event_gen_test.c +++ b/kernel/trace/synth_event_gen_test.c @@ -477,6 +477,17 @@ static int __init synth_event_gen_test_init(void) ret = test_trace_synth_event(); WARN_ON(ret); + + /* Disable when done */ + trace_array_set_clr_event(gen_synth_test->tr, + "synthetic", + "gen_synth_test", false); + trace_array_set_clr_event(empty_synth_test->tr, + "synthetic", + "empty_synth_test", false); + trace_array_set_clr_event(create_synth_test->tr, + "synthetic", + "create_synth_test", false); out: return ret; } From 3b664557b12fcf8acd3df595afbd184c82a97f8b Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 5 Dec 2023 16:39:16 +0100 Subject: [PATCH 1330/1397] dm-integrity: don't modify bio's immutable bio_vec in integrity_metadata() commit b86f4b790c998afdbc88fe1aa55cfe89c4068726 upstream. __bio_for_each_segment assumes that the first struct bio_vec argument doesn't change - it calls "bio_advance_iter_single((bio), &(iter), (bvl).bv_len)" to advance the iterator. Unfortunately, the dm-integrity code changes the bio_vec with "bv.bv_len -= pos". When this code path is taken, the iterator would be out of sync and dm-integrity would report errors. This happens if the machine is out of memory and "kmalloc" fails. Fix this bug by making a copy of "bv" and changing the copy instead. Fixes: 7eada909bfd7 ("dm: add integrity target") Cc: stable@vger.kernel.org # v4.12+ Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-integrity.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 97a8d5fc9ebb..9261bbebd662 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -1765,11 +1765,12 @@ static void integrity_metadata(struct work_struct *w) sectors_to_process = dio->range.n_sectors; __bio_for_each_segment(bv, bio, iter, dio->bio_details.bi_iter) { + struct bio_vec bv_copy = bv; unsigned int pos; char *mem, *checksums_ptr; again: - mem = bvec_kmap_local(&bv); + mem = bvec_kmap_local(&bv_copy); pos = 0; checksums_ptr = checksums; do { @@ -1778,7 +1779,7 @@ static void integrity_metadata(struct work_struct *w) sectors_to_process -= ic->sectors_per_block; pos += ic->sectors_per_block << SECTOR_SHIFT; sector += ic->sectors_per_block; - } while (pos < bv.bv_len && sectors_to_process && checksums != checksums_onstack); + } while (pos < bv_copy.bv_len && sectors_to_process && checksums != checksums_onstack); kunmap_local(mem); r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset, @@ -1803,9 +1804,9 @@ static void integrity_metadata(struct work_struct *w) if (!sectors_to_process) break; - if (unlikely(pos < bv.bv_len)) { - bv.bv_offset += pos; - bv.bv_len -= pos; + if (unlikely(pos < bv_copy.bv_len)) { + bv_copy.bv_offset += pos; + bv_copy.bv_len -= pos; goto again; } } From c18cf955d11c17eabed67094b3e7091c1c22fdcc Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 15 Dec 2023 17:04:24 +0100 Subject: [PATCH 1331/1397] selftests: mptcp: join: fix subflow_send_ack lookup commit c8f021eec5817601dbd25ab7e3ad5c720965c688 upstream. MPC backups tests will skip unexpected sometimes (For example, when compiling kernel with an older version of gcc, such as gcc-8), since static functions like mptcp_subflow_send_ack also be listed in /proc/kallsyms, with a 't' in front of it, not 'T' ('T' is for a global function): > grep "mptcp_subflow_send_ack" /proc/kallsyms 0000000000000000 T __pfx___mptcp_subflow_send_ack 0000000000000000 T __mptcp_subflow_send_ack 0000000000000000 t __pfx_mptcp_subflow_send_ack 0000000000000000 t mptcp_subflow_send_ack In this case, mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$" will be false, MPC backups tests will skip. This is not what we expected. The correct logic here should be: if mptcp_subflow_send_ack is not a global function in /proc/kallsyms, do these MPC backups tests. So a 'T' must be added in front of mptcp_subflow_send_ack. Fixes: 632978f0a961 ("selftests: mptcp: join: skip MPC backups tests if not supported") Cc: stable@vger.kernel.org Signed-off-by: Geliang Tang Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 8eec7d2c1fc6..4632a954c73e 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -2773,7 +2773,7 @@ backup_tests() fi if reset "mpc backup" && - continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then + continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 @@ -2782,7 +2782,7 @@ backup_tests() fi if reset "mpc backup both sides" && - continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then + continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow,backup pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup speed=slow \ @@ -2792,7 +2792,7 @@ backup_tests() fi if reset "mpc switch to backup" && - continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then + continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow sflags=backup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 @@ -2801,7 +2801,7 @@ backup_tests() fi if reset "mpc switch to backup both sides" && - continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then + continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow sflags=backup speed=slow \ From dc958dd32ce850e97377482d4b6489bfbead9613 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Fri, 1 Dec 2023 10:23:28 +0100 Subject: [PATCH 1332/1397] pinctrl: starfive: jh7110: ignore disabled device tree nodes commit f6e3b40a2c89c1d832ed9cb031dc9825bbf43b7c upstream. The driver always registers pin configurations in device tree. This can cause some inconvenience to users, as pin configurations in the base device tree cannot be disabled in the device tree overlay, even when the relevant devices are not used. Ignore disabled pin configuration nodes in device tree. Fixes: 447976ab62c5 ("pinctrl: starfive: Add StarFive JH7110 sys controller driver") Cc: Signed-off-by: Nam Cao Link: https://lore.kernel.org/r/fd8bf044799ae50a6291ae150ef87b4f1923cacb.1701422582.git.namcao@linutronix.de Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c b/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c index 640f827a9b2c..b4f799572689 100644 --- a/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c +++ b/drivers/pinctrl/starfive/pinctrl-starfive-jh7110.c @@ -135,7 +135,7 @@ static int jh7110_dt_node_to_map(struct pinctrl_dev *pctldev, int ret; ngroups = 0; - for_each_child_of_node(np, child) + for_each_available_child_of_node(np, child) ngroups += 1; nmaps = 2 * ngroups; @@ -150,7 +150,7 @@ static int jh7110_dt_node_to_map(struct pinctrl_dev *pctldev, nmaps = 0; ngroups = 0; mutex_lock(&sfp->mutex); - for_each_child_of_node(np, child) { + for_each_available_child_of_node(np, child) { int npins = of_property_count_u32_elems(child, "pinmux"); int *pins; u32 *pinmux; From 6e827b18219e8437f3840cc0ca1c9b5d6b85b03f Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Fri, 1 Dec 2023 10:23:29 +0100 Subject: [PATCH 1333/1397] pinctrl: starfive: jh7100: ignore disabled device tree nodes commit 5c584f175d32f9cc66c909f851cd905da58b39ea upstream. The driver always registers pin configurations in device tree. This can cause some inconvenience to users, as pin configurations in the base device tree cannot be disabled in the device tree overlay, even when the relevant devices are not used. Ignore disabled pin configuration nodes in device tree. Fixes: ec648f6b7686 ("pinctrl: starfive: Add pinctrl driver for StarFive SoCs") Cc: Signed-off-by: Nam Cao Link: https://lore.kernel.org/r/fe4c15dcc3074412326b8dc296b0cbccf79c49bf.1701422582.git.namcao@linutronix.de Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/starfive/pinctrl-starfive-jh7100.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/starfive/pinctrl-starfive-jh7100.c b/drivers/pinctrl/starfive/pinctrl-starfive-jh7100.c index 530fe340a9a1..561fd0c6b9b0 100644 --- a/drivers/pinctrl/starfive/pinctrl-starfive-jh7100.c +++ b/drivers/pinctrl/starfive/pinctrl-starfive-jh7100.c @@ -492,7 +492,7 @@ static int starfive_dt_node_to_map(struct pinctrl_dev *pctldev, nmaps = 0; ngroups = 0; - for_each_child_of_node(np, child) { + for_each_available_child_of_node(np, child) { int npinmux = of_property_count_u32_elems(child, "pinmux"); int npins = of_property_count_u32_elems(child, "pins"); @@ -527,7 +527,7 @@ static int starfive_dt_node_to_map(struct pinctrl_dev *pctldev, nmaps = 0; ngroups = 0; mutex_lock(&sfp->mutex); - for_each_child_of_node(np, child) { + for_each_available_child_of_node(np, child) { int npins; int i; From add8973e3de6a3557ef5fd0ecc317b24fc6efe6e Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 24 Nov 2023 10:50:56 +0200 Subject: [PATCH 1334/1397] bus: ti-sysc: Flush posted write only after srst_udelay commit f71f6ff8c1f682a1cae4e8d7bdeed9d7f76b8f75 upstream. Commit 34539b442b3b ("bus: ti-sysc: Flush posted write on enable before reset") caused a regression reproducable on omap4 duovero where the ISS target module can produce interconnect errors on boot. Turns out the registers are not accessible until after a delay for devices needing a ti,sysc-delay-us value. Let's fix this by flushing the posted write only after the reset delay. We do flushing also for ti,sysc-delay-us using devices as that should trigger an interconnect error if the delay is not properly configured. Let's also add some comments while at it. Fixes: 34539b442b3b ("bus: ti-sysc: Flush posted write on enable before reset") Cc: stable@vger.kernel.org Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- drivers/bus/ti-sysc.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index d57bc066dce6..9ed9239b1228 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -2158,13 +2158,23 @@ static int sysc_reset(struct sysc *ddata) sysc_val = sysc_read_sysconfig(ddata); sysc_val |= sysc_mask; sysc_write(ddata, sysc_offset, sysc_val); - /* Flush posted write */ + + /* + * Some devices need a delay before reading registers + * after reset. Presumably a srst_udelay is not needed + * for devices that use a rstctrl register reset. + */ + if (ddata->cfg.srst_udelay) + fsleep(ddata->cfg.srst_udelay); + + /* + * Flush posted write. For devices needing srst_udelay + * this should trigger an interconnect error if the + * srst_udelay value is needed but not configured. + */ sysc_val = sysc_read_sysconfig(ddata); } - if (ddata->cfg.srst_udelay) - fsleep(ddata->cfg.srst_udelay); - if (ddata->post_reset_quirk) ddata->post_reset_quirk(ddata); From 9a6ed4ea985ac4b7a566faddeb16271b8ea54d36 Mon Sep 17 00:00:00 2001 From: xiongxin Date: Wed, 20 Dec 2023 10:29:01 +0800 Subject: [PATCH 1335/1397] gpio: dwapb: mask/unmask IRQ when disable/enale it commit 1cc3542c76acb5f59001e3e562eba672f1983355 upstream. In the hardware implementation of the I2C HID driver based on DesignWare GPIO IRQ chip, when the user continues to use the I2C HID device in the suspend process, the I2C HID interrupt will be masked after the resume process is finished. This is because the disable_irq()/enable_irq() of the DesignWare GPIO driver does not synchronize the IRQ mask register state. In normal use of the I2C HID procedure, the GPIO IRQ irq_mask()/irq_unmask() functions are called in pairs. In case of an exception, i2c_hid_core_suspend() calls disable_irq() to disable the GPIO IRQ. With low probability, this causes irq_unmask() to not be called, which causes the GPIO IRQ to be masked and not unmasked in enable_irq(), raising an exception. Add synchronization to the masked register state in the dwapb_irq_enable()/dwapb_irq_disable() function. mask the GPIO IRQ before disabling it. After enabling the GPIO IRQ, unmask the IRQ. Fixes: 7779b3455697 ("gpio: add a driver for the Synopsys DesignWare APB GPIO block") Cc: stable@kernel.org Co-developed-by: Riwen Lu Signed-off-by: Riwen Lu Signed-off-by: xiongxin Acked-by: Serge Semin Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-dwapb.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpio-dwapb.c b/drivers/gpio/gpio-dwapb.c index c22fcaa44a61..6b7d47a52b10 100644 --- a/drivers/gpio/gpio-dwapb.c +++ b/drivers/gpio/gpio-dwapb.c @@ -283,13 +283,15 @@ static void dwapb_irq_enable(struct irq_data *d) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); struct dwapb_gpio *gpio = to_dwapb_gpio(gc); + irq_hw_number_t hwirq = irqd_to_hwirq(d); unsigned long flags; u32 val; raw_spin_lock_irqsave(&gc->bgpio_lock, flags); - val = dwapb_read(gpio, GPIO_INTEN); - val |= BIT(irqd_to_hwirq(d)); + val = dwapb_read(gpio, GPIO_INTEN) | BIT(hwirq); dwapb_write(gpio, GPIO_INTEN, val); + val = dwapb_read(gpio, GPIO_INTMASK) & ~BIT(hwirq); + dwapb_write(gpio, GPIO_INTMASK, val); raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags); } @@ -297,12 +299,14 @@ static void dwapb_irq_disable(struct irq_data *d) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); struct dwapb_gpio *gpio = to_dwapb_gpio(gc); + irq_hw_number_t hwirq = irqd_to_hwirq(d); unsigned long flags; u32 val; raw_spin_lock_irqsave(&gc->bgpio_lock, flags); - val = dwapb_read(gpio, GPIO_INTEN); - val &= ~BIT(irqd_to_hwirq(d)); + val = dwapb_read(gpio, GPIO_INTMASK) | BIT(hwirq); + dwapb_write(gpio, GPIO_INTMASK, val); + val = dwapb_read(gpio, GPIO_INTEN) & ~BIT(hwirq); dwapb_write(gpio, GPIO_INTEN, val); raw_spin_unlock_irqrestore(&gc->bgpio_lock, flags); } From 50ae1c470491481328363639c227020691469f29 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Tue, 14 Nov 2023 16:26:55 +0100 Subject: [PATCH 1336/1397] lib/vsprintf: Fix %pfwf when current node refcount == 0 commit 5c47251e8c4903111608ddcba2a77c0c425c247c upstream. A refcount issue can appeared in __fwnode_link_del() due to the pr_debug() call: WARNING: CPU: 0 PID: 901 at lib/refcount.c:25 refcount_warn_saturate+0xe5/0x110 Call Trace: ... of_node_get+0x1e/0x30 of_fwnode_get+0x28/0x40 fwnode_full_name_string+0x34/0x90 fwnode_string+0xdb/0x140 ... vsnprintf+0x17b/0x630 ... __fwnode_link_del+0x25/0xa0 fwnode_links_purge+0x39/0xb0 of_node_release+0xd9/0x180 ... Indeed, an fwnode (of_node) is being destroyed and so, of_node_release() is called because the of_node refcount reached 0. From of_node_release() several function calls are done and lead to a pr_debug() calls with %pfwf to print the fwnode full name. The issue is not present if we change %pfwf to %pfwP. To print the full name, %pfwf iterates over the current node and its parents and obtain/drop a reference to all nodes involved. In order to allow to print the full name (%pfwf) of a node while it is being destroyed, do not obtain/drop a reference to this current node. Fixes: a92eb7621b9f ("lib/vsprintf: Make use of fwnode API to obtain node names and separators") Cc: stable@vger.kernel.org Signed-off-by: Herve Codina Reviewed-by: Sakari Ailus Reviewed-by: Andy Shevchenko Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20231114152655.409331-1-herve.codina@bootlin.com Signed-off-by: Greg Kroah-Hartman --- lib/vsprintf.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/lib/vsprintf.c b/lib/vsprintf.c index afb88b24fa74..2aa408441cd3 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -2110,15 +2110,20 @@ char *fwnode_full_name_string(struct fwnode_handle *fwnode, char *buf, /* Loop starting from the root node to the current node. */ for (depth = fwnode_count_parents(fwnode); depth >= 0; depth--) { - struct fwnode_handle *__fwnode = - fwnode_get_nth_parent(fwnode, depth); + /* + * Only get a reference for other nodes (i.e. parent nodes). + * fwnode refcount may be 0 here. + */ + struct fwnode_handle *__fwnode = depth ? + fwnode_get_nth_parent(fwnode, depth) : fwnode; buf = string(buf, end, fwnode_get_name_prefix(__fwnode), default_str_spec); buf = string(buf, end, fwnode_get_name(__fwnode), default_str_spec); - fwnode_handle_put(__fwnode); + if (depth) + fwnode_handle_put(__fwnode); } return buf; From 6fbaeffc3a92b4a32b8e320e486cf644ff30d240 Mon Sep 17 00:00:00 2001 From: Yaxiong Tian Date: Wed, 22 Nov 2023 16:02:43 +0800 Subject: [PATCH 1337/1397] thunderbolt: Fix memory leak in margining_port_remove() commit ac43c9122e4287bbdbe91e980fc2528acb72cc1e upstream. The dentry returned by debugfs_lookup() needs to be released by calling dput() which is missing in margining_port_remove(). Fix this by calling debugfs_lookup_and_remove() that combines both and avoids the memory leak. Fixes: d0f1e0c2a699 ("thunderbolt: Add support for receiver lane margining") Cc: stable@vger.kernel.org Signed-off-by: Yaxiong Tian Signed-off-by: Mika Westerberg Signed-off-by: Greg Kroah-Hartman --- drivers/thunderbolt/debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thunderbolt/debugfs.c b/drivers/thunderbolt/debugfs.c index c9ddd49138d8..e324cd899719 100644 --- a/drivers/thunderbolt/debugfs.c +++ b/drivers/thunderbolt/debugfs.c @@ -959,7 +959,7 @@ static void margining_port_remove(struct tb_port *port) snprintf(dir_name, sizeof(dir_name), "port%d", port->port); parent = debugfs_lookup(dir_name, port->sw->debugfs_dir); if (parent) - debugfs_remove_recursive(debugfs_lookup("margining", parent)); + debugfs_lookup_and_remove("margining", parent); kfree(port->usb4->margining); port->usb4->margining = NULL; From 71c631a9fb211df4ea5a59ac68a310c5854f07f1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 7 Dec 2023 15:11:57 +0000 Subject: [PATCH 1338/1397] KVM: arm64: vgic: Simplify kvm_vgic_destroy() commit 01ad29d224ff73bc4e16e0ef9ece17f28598c4a4 upstream. When destroying a vgic, we have rather cumbersome rules about when slots_lock and config_lock are held, resulting in fun buglets. The first port of call is to simplify kvm_vgic_map_resources() so that there is only one call to kvm_vgic_destroy() instead of two, with the second only holding half of the locks. For that, we kill the non-locking primitive and move the call outside of the locking altogether. This doesn't change anything (we re-acquire the locks and teardown the whole vgic), and simplifies the code significantly. Cc: stable@vger.kernel.org Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20231207151201.3028710-2-maz@kernel.org Signed-off-by: Oliver Upton Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kvm/vgic/vgic-init.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index c8c3cb812783..ad7e86879eb9 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -382,26 +382,24 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF; } -static void __kvm_vgic_destroy(struct kvm *kvm) +void kvm_vgic_destroy(struct kvm *kvm) { struct kvm_vcpu *vcpu; unsigned long i; - lockdep_assert_held(&kvm->arch.config_lock); + mutex_lock(&kvm->slots_lock); vgic_debug_destroy(kvm); kvm_for_each_vcpu(i, vcpu, kvm) kvm_vgic_vcpu_destroy(vcpu); + mutex_lock(&kvm->arch.config_lock); + kvm_vgic_dist_destroy(kvm); -} -void kvm_vgic_destroy(struct kvm *kvm) -{ - mutex_lock(&kvm->arch.config_lock); - __kvm_vgic_destroy(kvm); mutex_unlock(&kvm->arch.config_lock); + mutex_unlock(&kvm->slots_lock); } /** @@ -469,25 +467,26 @@ int kvm_vgic_map_resources(struct kvm *kvm) type = VGIC_V3; } - if (ret) { - __kvm_vgic_destroy(kvm); + if (ret) goto out; - } + dist->ready = true; dist_base = dist->vgic_dist_base; mutex_unlock(&kvm->arch.config_lock); ret = vgic_register_dist_iodev(kvm, dist_base, type); - if (ret) { + if (ret) kvm_err("Unable to register VGIC dist MMIO regions\n"); - kvm_vgic_destroy(kvm); - } - mutex_unlock(&kvm->slots_lock); - return ret; + goto out_slots; out: mutex_unlock(&kvm->arch.config_lock); +out_slots: mutex_unlock(&kvm->slots_lock); + + if (ret) + kvm_vgic_destroy(kvm); + return ret; } From 05d47e26c2518317993ec7f9a7853b035472a462 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 7 Dec 2023 15:11:58 +0000 Subject: [PATCH 1339/1397] KVM: arm64: vgic: Add a non-locking primitive for kvm_vgic_vcpu_destroy() commit d26b9cb33c2d1ba68d1f26bb06c40300f16a3799 upstream. As we are going to need to call into kvm_vgic_vcpu_destroy() without prior holding of the slots_lock, introduce __kvm_vgic_vcpu_destroy() as a non-locking primitive of kvm_vgic_vcpu_destroy(). Cc: stable@vger.kernel.org Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20231207151201.3028710-3-maz@kernel.org Signed-off-by: Oliver Upton Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kvm/vgic/vgic-init.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index ad7e86879eb9..a86f300321a7 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -368,7 +368,7 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm) vgic_v4_teardown(kvm); } -void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) +static void __kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; @@ -382,6 +382,15 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF; } +void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + + mutex_lock(&kvm->slots_lock); + __kvm_vgic_vcpu_destroy(vcpu); + mutex_unlock(&kvm->slots_lock); +} + void kvm_vgic_destroy(struct kvm *kvm) { struct kvm_vcpu *vcpu; @@ -392,7 +401,7 @@ void kvm_vgic_destroy(struct kvm *kvm) vgic_debug_destroy(kvm); kvm_for_each_vcpu(i, vcpu, kvm) - kvm_vgic_vcpu_destroy(vcpu); + __kvm_vgic_vcpu_destroy(vcpu); mutex_lock(&kvm->arch.config_lock); From e13ce009ecab2b4336ae857c0d790e11398f2ae2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 7 Dec 2023 15:11:59 +0000 Subject: [PATCH 1340/1397] KVM: arm64: vgic: Force vcpu vgic teardown on vcpu destroy commit 02e3858f08faabab9503ae2911cf7c7e27702257 upstream. When failing to create a vcpu because (for example) it has a duplicate vcpu_id, we destroy the vcpu. Amusingly, this leaves the redistributor registered with the KVM_MMIO bus. This is no good, and we should properly clean the mess. Force a teardown of the vgic vcpu interface, including the RD device before returning to the caller. Cc: stable@vger.kernel.org Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20231207151201.3028710-4-maz@kernel.org Signed-off-by: Oliver Upton Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kvm/arm.c | 2 +- arch/arm64/kvm/vgic/vgic-init.c | 5 ++++- arch/arm64/kvm/vgic/vgic-mmio-v3.c | 2 +- arch/arm64/kvm/vgic/vgic.h | 1 + 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 4866b3f7b4ea..685cc436146a 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -407,7 +407,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); kvm_timer_vcpu_terminate(vcpu); kvm_pmu_vcpu_destroy(vcpu); - + kvm_vgic_vcpu_destroy(vcpu); kvm_arm_vcpu_destroy(vcpu); } diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index a86f300321a7..e949e1d0fd9f 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -379,7 +379,10 @@ static void __kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) vgic_flush_pending_lpis(vcpu); INIT_LIST_HEAD(&vgic_cpu->ap_list_head); - vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF; + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { + vgic_unregister_redist_iodev(vcpu); + vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF; + } } void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index 188d2187eede..871a45d4fc84 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -820,7 +820,7 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu) return ret; } -static void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu) +void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu) { struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev; diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 0ab09b0d4440..8d134569d0a1 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -241,6 +241,7 @@ int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq); int vgic_v3_save_pending_tables(struct kvm *kvm); int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count); int vgic_register_redist_iodev(struct kvm_vcpu *vcpu); +void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu); bool vgic_v3_check_base(struct kvm *kvm); void vgic_v3_load(struct kvm_vcpu *vcpu); From 5518f168ae6d7eb6666227b92836bb75051d5de2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 7 Dec 2023 20:49:24 +0100 Subject: [PATCH 1341/1397] x86/alternatives: Sync core before enabling interrupts commit 3ea1704a92967834bf0e64ca1205db4680d04048 upstream. text_poke_early() does: local_irq_save(flags); memcpy(addr, opcode, len); local_irq_restore(flags); sync_core(); That's not really correct because the synchronization should happen before interrupts are re-enabled to ensure that a pending interrupt observes the complete update of the opcodes. It's not entirely clear whether the interrupt entry provides enough serialization already, but moving the sync_core() invocation into interrupt disabled region does no harm and is obviously correct. Fixes: 6fffacb30349 ("x86/alternatives, jumplabel: Use text_poke_early() before mm_init()") Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov (AMD) Acked-by: Peter Zijlstra (Intel) Cc: Link: https://lore.kernel.org/r/ZT6narvE%2BLxX%2B7Be@windriver.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/alternative.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 73be3931e4f0..fd44739828f7 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -1685,8 +1685,8 @@ void __init_or_module text_poke_early(void *addr, const void *opcode, } else { local_irq_save(flags); memcpy(addr, opcode, len); - local_irq_restore(flags); sync_core(); + local_irq_restore(flags); /* * Could also do a CLFLUSH here to speed up CPU recovery; but From 6778977590dacbd8964ef1da715f5b07e9e04302 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 7 Dec 2023 20:49:26 +0100 Subject: [PATCH 1342/1397] x86/alternatives: Disable interrupts and sync when optimizing NOPs in place commit 2dc4196138055eb0340231aecac4d78c2ec2bea5 upstream. apply_alternatives() treats alternatives with the ALT_FLAG_NOT flag set special as it optimizes the existing NOPs in place. Unfortunately, this happens with interrupts enabled and does not provide any form of core synchronization. So an interrupt hitting in the middle of the update and using the affected code path will observe a half updated NOP and crash and burn. The following 3 NOP sequence was observed to expose this crash halfway reliably under QEMU 32bit: 0x90 0x90 0x90 which is replaced by the optimized 3 byte NOP: 0x8d 0x76 0x00 So an interrupt can observe: 1) 0x90 0x90 0x90 nop nop nop 2) 0x8d 0x90 0x90 undefined 3) 0x8d 0x76 0x90 lea -0x70(%esi),%esi 4) 0x8d 0x76 0x00 lea 0x0(%esi),%esi Where only #1 and #4 are true NOPs. The same problem exists for 64bit obviously. Disable interrupts around this NOP optimization and invoke sync_core() before re-enabling them. Fixes: 270a69c4485d ("x86/alternative: Support relocations in alternatives") Reported-by: Paul Gortmaker Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov (AMD) Acked-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/ZT6narvE%2BLxX%2B7Be@windriver.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/alternative.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index fd44739828f7..aae7456ece07 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -255,6 +255,16 @@ static void __init_or_module noinline optimize_nops(u8 *instr, size_t len) } } +static void __init_or_module noinline optimize_nops_inplace(u8 *instr, size_t len) +{ + unsigned long flags; + + local_irq_save(flags); + optimize_nops(instr, len); + sync_core(); + local_irq_restore(flags); +} + /* * In this context, "source" is where the instructions are placed in the * section .altinstr_replacement, for example during kernel build by the @@ -438,7 +448,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, * patch if feature is *NOT* present. */ if (!boot_cpu_has(a->cpuid) == !(a->flags & ALT_FLAG_NOT)) { - optimize_nops(instr, a->instrlen); + optimize_nops_inplace(instr, a->instrlen); continue; } From ddc4ad52c022c366c3b48f7eaed7c36f92f95db4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Dec 2023 09:58:58 +0100 Subject: [PATCH 1343/1397] x86/smpboot/64: Handle X2APIC BIOS inconsistency gracefully commit 69a7386c1ec25476a0c78ffeb59de08a2a08f495 upstream. Chris reported that a Dell PowerEdge T340 system stopped to boot when upgrading to a kernel which contains the parallel hotplug changes. Disabling parallel hotplug on the kernel command line makes it boot again. It turns out that the Dell BIOS has x2APIC enabled and the boot CPU comes up in X2APIC mode, but the APs come up inconsistently in xAPIC mode. Parallel hotplug requires that the upcoming CPU reads out its APIC ID from the local APIC in order to map it to the Linux CPU number. In this particular case the readout on the APs uses the MMIO mapped registers because the BIOS failed to enable x2APIC mode. That readout results in a page fault because the kernel does not have the APIC MMIO space mapped when X2APIC mode was enabled by the BIOS on the boot CPU and the kernel switched to X2APIC mode early. That page fault can't be handled on the upcoming CPU that early and results in a silent boot failure. If parallel hotplug is disabled the system boots because in that case the APIC ID read is not required as the Linux CPU number is provided to the AP in the smpboot control word. When the kernel uses x2APIC mode then the APs are switched to x2APIC mode too slightly later in the bringup process, but there is no reason to do it that late. Cure the BIOS bogosity by checking in the parallel bootup path whether the kernel uses x2APIC mode and if so switching over the APs to x2APIC mode before the APIC ID readout. Fixes: 0c7ffa32dbd6 ("x86/smpboot/64: Implement arch_cpuhp_init_parallel_bringup() and enable it") Reported-by: Chris Lindee Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Ashok Raj Tested-by: Chris Lindee Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/CA%2B2tU59853R49EaU_tyvOZuOTDdcU0RshGyydccp9R1NX9bEeQ@mail.gmail.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/head_64.S | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index ea6995920b7a..e6eaee8509ce 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -256,6 +256,22 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) testl $X2APIC_ENABLE, %eax jnz .Lread_apicid_msr +#ifdef CONFIG_X86_X2APIC + /* + * If system is in X2APIC mode then MMIO base might not be + * mapped causing the MMIO read below to fault. Faults can't + * be handled at that point. + */ + cmpl $0, x2apic_mode(%rip) + jz .Lread_apicid_mmio + + /* Force the AP into X2APIC mode. */ + orl $X2APIC_ENABLE, %eax + wrmsr + jmp .Lread_apicid_msr +#endif + +.Lread_apicid_mmio: /* Read the APIC ID from the fix-mapped MMIO space. */ movq apic_mmio_base(%rip), %rcx addq $APIC_ID, %rcx From 883b9f52b705278eca84357dd09ac3983c64535f Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Wed, 6 Dec 2023 15:52:33 +0100 Subject: [PATCH 1344/1397] spi: cadence: revert "Add SPI transfer delays" commit 7a733e060bd20edb63b1f27f0b29cf9b184e0e8b upstream. The commit 855a40cd8ccc ("spi: cadence: Add SPI transfer delays") adds a delay after each transfer into the driver's transfer_one(). However, the delay is already done in SPI core. So this commit unnecessarily doubles the delay amount. Revert this commit. Signed-off-by: Nam Cao Link: https://lore.kernel.org/r/20231206145233.74982-1-namcao@linutronix.de Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-cadence.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/spi/spi-cadence.c b/drivers/spi/spi-cadence.c index 12c940ba074a..bd96d8b546cd 100644 --- a/drivers/spi/spi-cadence.c +++ b/drivers/spi/spi-cadence.c @@ -451,7 +451,6 @@ static int cdns_transfer_one(struct spi_controller *ctlr, udelay(10); cdns_spi_process_fifo(xspi, xspi->tx_fifo_depth, 0); - spi_transfer_delay_exec(transfer); cdns_spi_write(xspi, CDNS_SPI_IER, CDNS_SPI_IXR_DEFAULT); return transfer->len; From 5e9df83a705290c4d974693097df1da9cbe25854 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 1 Jan 2024 12:42:47 +0000 Subject: [PATCH 1345/1397] Linux 6.6.9 Link: https://lore.kernel.org/r/20231230115812.333117904@linuxfoundation.org Tested-by: Ricardo B. Marliere Tested-by: Takeshi Ogasawara Tested-by: Ronald Warsow Tested-by: Florian Fainelli Tested-by: SeongJae Park Tested-by: Salvatore Bonaccorso Tested-by: Ron Economos Tested-by: Bagas Sanjaya Tested-by: Linux Kernel Functional Testing Tested-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 891ef640396c..4d1d5e925bb2 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 6 -SUBLEVEL = 8 +SUBLEVEL = 9 EXTRAVERSION = NAME = Hurr durr I'ma ninja sloth From 3ba08c420d05bbefcb9421a740be6788ee33f7df Mon Sep 17 00:00:00 2001 From: Cheng-Han Wu Date: Sun, 31 Dec 2023 16:19:01 +0900 Subject: [PATCH 1346/1397] ksmbd: Remove unused field in ksmbd_user struct [ Upstream commit eacc655e18d1dec9b50660d16a1ddeeb4d6c48f2 ] fs/smb/server/mgmt/user_config.h:21: Remove the unused field 'failed_login_count' from the ksmbd_user struct. Signed-off-by: Cheng-Han Wu Acked-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/server/mgmt/user_config.h | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/smb/server/mgmt/user_config.h b/fs/smb/server/mgmt/user_config.h index 6a44109617f1..e068a19fd904 100644 --- a/fs/smb/server/mgmt/user_config.h +++ b/fs/smb/server/mgmt/user_config.h @@ -18,7 +18,6 @@ struct ksmbd_user { size_t passkey_sz; char *passkey; - unsigned int failed_login_count; }; static inline bool user_guest(struct ksmbd_user *user) From d73737884ea4d51c81a382259c64bf1c5804f961 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 31 Dec 2023 16:19:02 +0900 Subject: [PATCH 1347/1397] ksmbd: reorganize ksmbd_iov_pin_rsp() [ Upstream commit 1819a904299942b309f687cc0f08b123500aa178 ] If ksmbd_iov_pin_rsp fail, io vertor should be rollback. This patch moves memory allocations to before setting the io vector to avoid rollbacks. Fixes: e2b76ab8b5c9 ("ksmbd: add support for read compound") Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/server/ksmbd_work.c | 43 +++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/fs/smb/server/ksmbd_work.c b/fs/smb/server/ksmbd_work.c index 51def3ca74c0..a2ed441e837a 100644 --- a/fs/smb/server/ksmbd_work.c +++ b/fs/smb/server/ksmbd_work.c @@ -95,11 +95,28 @@ bool ksmbd_queue_work(struct ksmbd_work *work) return queue_work(ksmbd_wq, &work->work); } -static int ksmbd_realloc_iov_pin(struct ksmbd_work *work, void *ib, - unsigned int ib_len) +static inline void __ksmbd_iov_pin(struct ksmbd_work *work, void *ib, + unsigned int ib_len) { + work->iov[++work->iov_idx].iov_base = ib; + work->iov[work->iov_idx].iov_len = ib_len; + work->iov_cnt++; +} + +static int __ksmbd_iov_pin_rsp(struct ksmbd_work *work, void *ib, int len, + void *aux_buf, unsigned int aux_size) +{ + struct aux_read *ar; + int need_iov_cnt = 1; - if (work->iov_alloc_cnt <= work->iov_cnt) { + if (aux_size) { + need_iov_cnt++; + ar = kmalloc(sizeof(struct aux_read), GFP_KERNEL); + if (!ar) + return -ENOMEM; + } + + if (work->iov_alloc_cnt < work->iov_cnt + need_iov_cnt) { struct kvec *new; work->iov_alloc_cnt += 4; @@ -111,16 +128,6 @@ static int ksmbd_realloc_iov_pin(struct ksmbd_work *work, void *ib, work->iov = new; } - work->iov[++work->iov_idx].iov_base = ib; - work->iov[work->iov_idx].iov_len = ib_len; - work->iov_cnt++; - - return 0; -} - -static int __ksmbd_iov_pin_rsp(struct ksmbd_work *work, void *ib, int len, - void *aux_buf, unsigned int aux_size) -{ /* Plus rfc_length size on first iov */ if (!work->iov_idx) { work->iov[work->iov_idx].iov_base = work->response_buf; @@ -129,19 +136,13 @@ static int __ksmbd_iov_pin_rsp(struct ksmbd_work *work, void *ib, int len, work->iov_cnt++; } - ksmbd_realloc_iov_pin(work, ib, len); + __ksmbd_iov_pin(work, ib, len); inc_rfc1001_len(work->iov[0].iov_base, len); if (aux_size) { - struct aux_read *ar; - - ksmbd_realloc_iov_pin(work, aux_buf, aux_size); + __ksmbd_iov_pin(work, aux_buf, aux_size); inc_rfc1001_len(work->iov[0].iov_base, aux_size); - ar = kmalloc(sizeof(struct aux_read), GFP_KERNEL); - if (!ar) - return -ENOMEM; - ar->buf = aux_buf; list_add(&ar->entry, &work->aux_read_list); } From 31c453b3743fbc04935f3f5d2dd4c34ef7851e4b Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 31 Dec 2023 16:19:03 +0900 Subject: [PATCH 1348/1397] ksmbd: fix kernel-doc comment of ksmbd_vfs_setxattr() [ Upstream commit 3354db668808d5b6d7c5e0cb19ff4c9da4bb5e58 ] Fix argument list that the kdoc format and script verified in ksmbd_vfs_setxattr(). fs/smb/server/vfs.c:929: warning: Function parameter or member 'path' not described in 'ksmbd_vfs_setxattr' Reported-by: kernel test robot Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/server/vfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 5a41c0b4e933..183e36cda59e 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -906,7 +906,7 @@ ssize_t ksmbd_vfs_getxattr(struct mnt_idmap *idmap, /** * ksmbd_vfs_setxattr() - vfs helper for smb set extended attributes value * @idmap: idmap of the relevant mount - * @dentry: dentry to set XATTR at + * @path: path of dentry to set XATTR at * @attr_name: xattr name for setxattr * @attr_value: xattr value to set * @attr_size: size of xattr value From dca63bad39501aee8bc5069c69680400dcc2fc86 Mon Sep 17 00:00:00 2001 From: Kangjing Huang Date: Sun, 31 Dec 2023 16:19:04 +0900 Subject: [PATCH 1349/1397] ksmbd: fix missing RDMA-capable flag for IPoIB device in ksmbd_rdma_capable_netdev() [ Upstream commit ecce70cf17d91c3dd87a0c4ea00b2d1387729701 ] Physical ib_device does not have an underlying net_device, thus its association with IPoIB net_device cannot be retrieved via ops.get_netdev() or ib_device_get_by_netdev(). ksmbd reads physical ib_device port GUID from the lower 16 bytes of the hardware addresses on IPoIB net_device and match its underlying ib_device using ib_find_gid() Signed-off-by: Kangjing Huang Acked-by: Namjae Jeon Reviewed-by: Tom Talpey Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/server/transport_rdma.c | 40 +++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 3b269e1f523a..c5629a68c8b7 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -2140,8 +2140,7 @@ static int smb_direct_ib_client_add(struct ib_device *ib_dev) if (ib_dev->node_type != RDMA_NODE_IB_CA) smb_direct_port = SMB_DIRECT_PORT_IWARP; - if (!ib_dev->ops.get_netdev || - !rdma_frwr_is_supported(&ib_dev->attrs)) + if (!rdma_frwr_is_supported(&ib_dev->attrs)) return 0; smb_dev = kzalloc(sizeof(*smb_dev), GFP_KERNEL); @@ -2241,17 +2240,38 @@ bool ksmbd_rdma_capable_netdev(struct net_device *netdev) for (i = 0; i < smb_dev->ib_dev->phys_port_cnt; i++) { struct net_device *ndev; - ndev = smb_dev->ib_dev->ops.get_netdev(smb_dev->ib_dev, - i + 1); - if (!ndev) - continue; + if (smb_dev->ib_dev->ops.get_netdev) { + ndev = smb_dev->ib_dev->ops.get_netdev( + smb_dev->ib_dev, i + 1); + if (!ndev) + continue; - if (ndev == netdev) { + if (ndev == netdev) { + dev_put(ndev); + rdma_capable = true; + goto out; + } dev_put(ndev); - rdma_capable = true; - goto out; + /* if ib_dev does not implement ops.get_netdev + * check for matching infiniband GUID in hw_addr + */ + } else if (netdev->type == ARPHRD_INFINIBAND) { + struct netdev_hw_addr *ha; + union ib_gid gid; + u32 port_num; + int ret; + + netdev_hw_addr_list_for_each( + ha, &netdev->dev_addrs) { + memcpy(&gid, ha->addr + 4, sizeof(gid)); + ret = ib_find_gid(smb_dev->ib_dev, &gid, + &port_num, NULL); + if (!ret) { + rdma_capable = true; + goto out; + } + } } - dev_put(ndev); } } out: From 0bd595cb8e8bc6719262c4ac0949eeeb5f8fb385 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 31 Dec 2023 16:19:05 +0900 Subject: [PATCH 1350/1397] ksmbd: add support for surrogate pair conversion [ Upstream commit 0c180317c654a494fe429adbf7bc9b0793caf9e2 ] ksmbd is missing supporting to convert filename included surrogate pair characters. It triggers a "file or folder does not exist" error in Windows client. [Steps to Reproduce for bug] 1. Create surrogate pair file touch $(echo -e '\xf0\x9d\x9f\xa3') touch $(echo -e '\xf0\x9d\x9f\xa4') 2. Try to open these files in ksmbd share through Windows client. This patch update unicode functions not to consider about surrogate pair (and IVS). Reviewed-by: Marios Makassikis Tested-by: Marios Makassikis Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/server/unicode.c | 187 +++++++++++++++++++++++++++++----------- 1 file changed, 138 insertions(+), 49 deletions(-) diff --git a/fs/smb/server/unicode.c b/fs/smb/server/unicode.c index 393dd4a7432b..43ed29ee44ea 100644 --- a/fs/smb/server/unicode.c +++ b/fs/smb/server/unicode.c @@ -13,46 +13,10 @@ #include "unicode.h" #include "smb_common.h" -/* - * smb_utf16_bytes() - how long will a string be after conversion? - * @from: pointer to input string - * @maxbytes: don't go past this many bytes of input string - * @codepage: destination codepage - * - * Walk a utf16le string and return the number of bytes that the string will - * be after being converted to the given charset, not including any null - * termination required. Don't walk past maxbytes in the source buffer. - * - * Return: string length after conversion - */ -static int smb_utf16_bytes(const __le16 *from, int maxbytes, - const struct nls_table *codepage) -{ - int i; - int charlen, outlen = 0; - int maxwords = maxbytes / 2; - char tmp[NLS_MAX_CHARSET_SIZE]; - __u16 ftmp; - - for (i = 0; i < maxwords; i++) { - ftmp = get_unaligned_le16(&from[i]); - if (ftmp == 0) - break; - - charlen = codepage->uni2char(ftmp, tmp, NLS_MAX_CHARSET_SIZE); - if (charlen > 0) - outlen += charlen; - else - outlen++; - } - - return outlen; -} - /* * cifs_mapchar() - convert a host-endian char to proper char in codepage * @target: where converted character should be copied - * @src_char: 2 byte host-endian source character + * @from: host-endian source string * @cp: codepage to which character should be converted * @mapchar: should character be mapped according to mapchars mount option? * @@ -63,10 +27,13 @@ static int smb_utf16_bytes(const __le16 *from, int maxbytes, * Return: string length after conversion */ static int -cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp, +cifs_mapchar(char *target, const __u16 *from, const struct nls_table *cp, bool mapchar) { int len = 1; + __u16 src_char; + + src_char = *from; if (!mapchar) goto cp_convert; @@ -104,12 +71,66 @@ cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp, cp_convert: len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE); - if (len <= 0) { - *target = '?'; - len = 1; - } + if (len <= 0) + goto surrogate_pair; goto out; + +surrogate_pair: + /* convert SURROGATE_PAIR and IVS */ + if (strcmp(cp->charset, "utf8")) + goto unknown; + len = utf16s_to_utf8s(from, 3, UTF16_LITTLE_ENDIAN, target, 6); + if (len <= 0) + goto unknown; + return len; + +unknown: + *target = '?'; + len = 1; + goto out; +} + +/* + * smb_utf16_bytes() - compute converted string length + * @from: pointer to input string + * @maxbytes: input string length + * @codepage: destination codepage + * + * Walk a utf16le string and return the number of bytes that the string will + * be after being converted to the given charset, not including any null + * termination required. Don't walk past maxbytes in the source buffer. + * + * Return: string length after conversion + */ +static int smb_utf16_bytes(const __le16 *from, int maxbytes, + const struct nls_table *codepage) +{ + int i, j; + int charlen, outlen = 0; + int maxwords = maxbytes / 2; + char tmp[NLS_MAX_CHARSET_SIZE]; + __u16 ftmp[3]; + + for (i = 0; i < maxwords; i++) { + ftmp[0] = get_unaligned_le16(&from[i]); + if (ftmp[0] == 0) + break; + for (j = 1; j <= 2; j++) { + if (i + j < maxwords) + ftmp[j] = get_unaligned_le16(&from[i + j]); + else + ftmp[j] = 0; + } + + charlen = cifs_mapchar(tmp, ftmp, codepage, 0); + if (charlen > 0) + outlen += charlen; + else + outlen++; + } + + return outlen; } /* @@ -139,12 +160,12 @@ cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp, static int smb_from_utf16(char *to, const __le16 *from, int tolen, int fromlen, const struct nls_table *codepage, bool mapchar) { - int i, charlen, safelen; + int i, j, charlen, safelen; int outlen = 0; int nullsize = nls_nullsize(codepage); int fromwords = fromlen / 2; char tmp[NLS_MAX_CHARSET_SIZE]; - __u16 ftmp; + __u16 ftmp[3]; /* ftmp[3] = 3array x 2bytes = 6bytes UTF-16 */ /* * because the chars can be of varying widths, we need to take care @@ -155,9 +176,15 @@ static int smb_from_utf16(char *to, const __le16 *from, int tolen, int fromlen, safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize); for (i = 0; i < fromwords; i++) { - ftmp = get_unaligned_le16(&from[i]); - if (ftmp == 0) + ftmp[0] = get_unaligned_le16(&from[i]); + if (ftmp[0] == 0) break; + for (j = 1; j <= 2; j++) { + if (i + j < fromwords) + ftmp[j] = get_unaligned_le16(&from[i + j]); + else + ftmp[j] = 0; + } /* * check to see if converting this character might make the @@ -172,6 +199,19 @@ static int smb_from_utf16(char *to, const __le16 *from, int tolen, int fromlen, /* put converted char into 'to' buffer */ charlen = cifs_mapchar(&to[outlen], ftmp, codepage, mapchar); outlen += charlen; + + /* + * charlen (=bytes of UTF-8 for 1 character) + * 4bytes UTF-8(surrogate pair) is charlen=4 + * (4bytes UTF-16 code) + * 7-8bytes UTF-8(IVS) is charlen=3+4 or 4+4 + * (2 UTF-8 pairs divided to 2 UTF-16 pairs) + */ + if (charlen == 4) + i++; + else if (charlen >= 5) + /* 5-6bytes UTF-8 */ + i += 2; } /* properly null-terminate string */ @@ -306,6 +346,9 @@ int smbConvertToUTF16(__le16 *target, const char *source, int srclen, char src_char; __le16 dst_char; wchar_t tmp; + wchar_t wchar_to[6]; /* UTF-16 */ + int ret; + unicode_t u; if (!mapchars) return smb_strtoUTF16(target, source, srclen, cp); @@ -348,11 +391,57 @@ int smbConvertToUTF16(__le16 *target, const char *source, int srclen, * if no match, use question mark, which at least in * some cases serves as wild card */ - if (charlen < 1) { - dst_char = cpu_to_le16(0x003f); - charlen = 1; + if (charlen > 0) + goto ctoUTF16; + + /* convert SURROGATE_PAIR */ + if (strcmp(cp->charset, "utf8")) + goto unknown; + if (*(source + i) & 0x80) { + charlen = utf8_to_utf32(source + i, 6, &u); + if (charlen < 0) + goto unknown; + } else + goto unknown; + ret = utf8s_to_utf16s(source + i, charlen, + UTF16_LITTLE_ENDIAN, + wchar_to, 6); + if (ret < 0) + goto unknown; + + i += charlen; + dst_char = cpu_to_le16(*wchar_to); + if (charlen <= 3) + /* 1-3bytes UTF-8 to 2bytes UTF-16 */ + put_unaligned(dst_char, &target[j]); + else if (charlen == 4) { + /* + * 4bytes UTF-8(surrogate pair) to 4bytes UTF-16 + * 7-8bytes UTF-8(IVS) divided to 2 UTF-16 + * (charlen=3+4 or 4+4) + */ + put_unaligned(dst_char, &target[j]); + dst_char = cpu_to_le16(*(wchar_to + 1)); + j++; + put_unaligned(dst_char, &target[j]); + } else if (charlen >= 5) { + /* 5-6bytes UTF-8 to 6bytes UTF-16 */ + put_unaligned(dst_char, &target[j]); + dst_char = cpu_to_le16(*(wchar_to + 1)); + j++; + put_unaligned(dst_char, &target[j]); + dst_char = cpu_to_le16(*(wchar_to + 2)); + j++; + put_unaligned(dst_char, &target[j]); } + continue; + +unknown: + dst_char = cpu_to_le16(0x003f); + charlen = 1; } + +ctoUTF16: /* * character may take more than one byte in the source string, * but will take exactly two bytes in the target string From b48bb8c2ecdb7dd0bd00a642819a71a228570d9c Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 31 Dec 2023 16:19:06 +0900 Subject: [PATCH 1351/1397] ksmbd: no need to wait for binded connection termination at logoff [ Upstream commit 67797da8a4b82446d42c52b6ee1419a3100d78ff ] The connection could be binded to the existing session for Multichannel. session will be destroyed when binded connections are released. So no need to wait for that's connection at logoff. Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/server/connection.c | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c index 4b38c3a285f6..b6fa1e285c40 100644 --- a/fs/smb/server/connection.c +++ b/fs/smb/server/connection.c @@ -167,23 +167,7 @@ void ksmbd_all_conn_set_status(u64 sess_id, u32 status) void ksmbd_conn_wait_idle(struct ksmbd_conn *conn, u64 sess_id) { - struct ksmbd_conn *bind_conn; - wait_event(conn->req_running_q, atomic_read(&conn->req_running) < 2); - - down_read(&conn_list_lock); - list_for_each_entry(bind_conn, &conn_list, conns_list) { - if (bind_conn == conn) - continue; - - if ((bind_conn->binding || xa_load(&bind_conn->sessions, sess_id)) && - !ksmbd_conn_releasing(bind_conn) && - atomic_read(&bind_conn->req_running)) { - wait_event(bind_conn->req_running_q, - atomic_read(&bind_conn->req_running) == 0); - } - } - up_read(&conn_list_lock); } int ksmbd_conn_write(struct ksmbd_work *work) From cdb93ef9cfccc1338f49c6780af3d8e0bc34024a Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 31 Dec 2023 16:19:07 +0900 Subject: [PATCH 1352/1397] ksmbd: fix kernel-doc comment of ksmbd_vfs_kern_path_locked() [ Upstream commit f6049712e520287ad695e9d4f1572ab76807fa0c ] Fix argument list that the kdoc format and script verified in ksmbd_vfs_kern_path_locked(). fs/smb/server/vfs.c:1207: warning: Function parameter or member 'parent_path' not described in 'ksmbd_vfs_kern_path_locked' Reported-by: kernel test robot Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/server/vfs.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 183e36cda59e..533257b46fc1 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -1186,9 +1186,10 @@ static int ksmbd_vfs_lookup_in_dir(const struct path *dir, char *name, /** * ksmbd_vfs_kern_path_locked() - lookup a file and get path info - * @name: file path that is relative to share - * @flags: lookup flags - * @path: if lookup succeed, return path info + * @name: file path that is relative to share + * @flags: lookup flags + * @parent_path: if lookup succeed, return parent_path info + * @path: if lookup succeed, return path info * @caseless: caseless filename lookup * * Return: 0 on success, otherwise error From 8d69547b94e079a61e43d017f92ecc86256b1eea Mon Sep 17 00:00:00 2001 From: Zongmin Zhou Date: Sun, 31 Dec 2023 16:19:08 +0900 Subject: [PATCH 1353/1397] ksmbd: prevent memory leak on error return [ Upstream commit 90044481e7cca6cb3125b3906544954a25f1309f ] When allocated memory for 'new' failed,just return will cause memory leak of 'ar'. Fixes: 1819a9042999 ("ksmbd: reorganize ksmbd_iov_pin_rsp()") Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202311031837.H3yo7JVl-lkp@intel.com/ Signed-off-by: Zongmin Zhou Acked-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/server/ksmbd_work.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/smb/server/ksmbd_work.c b/fs/smb/server/ksmbd_work.c index a2ed441e837a..2510b9f3c8c1 100644 --- a/fs/smb/server/ksmbd_work.c +++ b/fs/smb/server/ksmbd_work.c @@ -106,7 +106,7 @@ static inline void __ksmbd_iov_pin(struct ksmbd_work *work, void *ib, static int __ksmbd_iov_pin_rsp(struct ksmbd_work *work, void *ib, int len, void *aux_buf, unsigned int aux_size) { - struct aux_read *ar; + struct aux_read *ar = NULL; int need_iov_cnt = 1; if (aux_size) { @@ -123,8 +123,11 @@ static int __ksmbd_iov_pin_rsp(struct ksmbd_work *work, void *ib, int len, new = krealloc(work->iov, sizeof(struct kvec) * work->iov_alloc_cnt, GFP_KERNEL | __GFP_ZERO); - if (!new) + if (!new) { + kfree(ar); + work->iov_alloc_cnt -= 4; return -ENOMEM; + } work->iov = new; } From f263652dc6c95d973dcd97ec4bcc9b2c66471a3e Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 31 Dec 2023 16:19:09 +0900 Subject: [PATCH 1354/1397] ksmbd: separately allocate ci per dentry [ Upstream commit 4274a9dc6aeb9fea66bffba15697a35ae8983b6a ] xfstests generic/002 test fail when enabling smb2 leases feature. This test create hard link file, but removeal failed. ci has a file open count to count file open through the smb client, but in the case of hard link files, The allocation of ci per inode cause incorrectly open count for file deletion. This patch allocate ci per dentry to counts open counts for hard link. Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/server/smb2pdu.c | 2 +- fs/smb/server/vfs.c | 2 +- fs/smb/server/vfs_cache.c | 33 +++++++++++++-------------------- fs/smb/server/vfs_cache.h | 6 +++--- 4 files changed, 18 insertions(+), 25 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 2b248d45d40a..28b61dad2749 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -3039,7 +3039,7 @@ int smb2_open(struct ksmbd_work *work) } } - rc = ksmbd_query_inode_status(d_inode(path.dentry->d_parent)); + rc = ksmbd_query_inode_status(path.dentry->d_parent); if (rc == KSMBD_INODE_STATUS_PENDING_DELETE) { rc = -EBUSY; goto err_out; diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 533257b46fc1..9091dcd7a310 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -719,7 +719,7 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path, goto out3; } - parent_fp = ksmbd_lookup_fd_inode(d_inode(old_child->d_parent)); + parent_fp = ksmbd_lookup_fd_inode(old_child->d_parent); if (parent_fp) { if (parent_fp->daccess & FILE_DELETE_LE) { pr_err("parent dir is opened with delete access\n"); diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c index c91eac6514dd..ddf233994ddb 100644 --- a/fs/smb/server/vfs_cache.c +++ b/fs/smb/server/vfs_cache.c @@ -66,14 +66,14 @@ static unsigned long inode_hash(struct super_block *sb, unsigned long hashval) return tmp & inode_hash_mask; } -static struct ksmbd_inode *__ksmbd_inode_lookup(struct inode *inode) +static struct ksmbd_inode *__ksmbd_inode_lookup(struct dentry *de) { struct hlist_head *head = inode_hashtable + - inode_hash(inode->i_sb, inode->i_ino); + inode_hash(d_inode(de)->i_sb, (unsigned long)de); struct ksmbd_inode *ci = NULL, *ret_ci = NULL; hlist_for_each_entry(ci, head, m_hash) { - if (ci->m_inode == inode) { + if (ci->m_de == de) { if (atomic_inc_not_zero(&ci->m_count)) ret_ci = ci; break; @@ -84,26 +84,16 @@ static struct ksmbd_inode *__ksmbd_inode_lookup(struct inode *inode) static struct ksmbd_inode *ksmbd_inode_lookup(struct ksmbd_file *fp) { - return __ksmbd_inode_lookup(file_inode(fp->filp)); + return __ksmbd_inode_lookup(fp->filp->f_path.dentry); } -static struct ksmbd_inode *ksmbd_inode_lookup_by_vfsinode(struct inode *inode) -{ - struct ksmbd_inode *ci; - - read_lock(&inode_hash_lock); - ci = __ksmbd_inode_lookup(inode); - read_unlock(&inode_hash_lock); - return ci; -} - -int ksmbd_query_inode_status(struct inode *inode) +int ksmbd_query_inode_status(struct dentry *dentry) { struct ksmbd_inode *ci; int ret = KSMBD_INODE_STATUS_UNKNOWN; read_lock(&inode_hash_lock); - ci = __ksmbd_inode_lookup(inode); + ci = __ksmbd_inode_lookup(dentry); if (ci) { ret = KSMBD_INODE_STATUS_OK; if (ci->m_flags & (S_DEL_PENDING | S_DEL_ON_CLS)) @@ -143,7 +133,7 @@ void ksmbd_fd_set_delete_on_close(struct ksmbd_file *fp, static void ksmbd_inode_hash(struct ksmbd_inode *ci) { struct hlist_head *b = inode_hashtable + - inode_hash(ci->m_inode->i_sb, ci->m_inode->i_ino); + inode_hash(d_inode(ci->m_de)->i_sb, (unsigned long)ci->m_de); hlist_add_head(&ci->m_hash, b); } @@ -157,7 +147,6 @@ static void ksmbd_inode_unhash(struct ksmbd_inode *ci) static int ksmbd_inode_init(struct ksmbd_inode *ci, struct ksmbd_file *fp) { - ci->m_inode = file_inode(fp->filp); atomic_set(&ci->m_count, 1); atomic_set(&ci->op_count, 0); atomic_set(&ci->sop_count, 0); @@ -166,6 +155,7 @@ static int ksmbd_inode_init(struct ksmbd_inode *ci, struct ksmbd_file *fp) INIT_LIST_HEAD(&ci->m_fp_list); INIT_LIST_HEAD(&ci->m_op_list); rwlock_init(&ci->m_lock); + ci->m_de = fp->filp->f_path.dentry; return 0; } @@ -488,12 +478,15 @@ struct ksmbd_file *ksmbd_lookup_fd_cguid(char *cguid) return fp; } -struct ksmbd_file *ksmbd_lookup_fd_inode(struct inode *inode) +struct ksmbd_file *ksmbd_lookup_fd_inode(struct dentry *dentry) { struct ksmbd_file *lfp; struct ksmbd_inode *ci; + struct inode *inode = d_inode(dentry); - ci = ksmbd_inode_lookup_by_vfsinode(inode); + read_lock(&inode_hash_lock); + ci = __ksmbd_inode_lookup(dentry); + read_unlock(&inode_hash_lock); if (!ci) return NULL; diff --git a/fs/smb/server/vfs_cache.h b/fs/smb/server/vfs_cache.h index 03d0bf941216..8325cf4527c4 100644 --- a/fs/smb/server/vfs_cache.h +++ b/fs/smb/server/vfs_cache.h @@ -51,7 +51,7 @@ struct ksmbd_inode { atomic_t op_count; /* opinfo count for streams */ atomic_t sop_count; - struct inode *m_inode; + struct dentry *m_de; unsigned int m_flags; struct hlist_node m_hash; struct list_head m_fp_list; @@ -140,7 +140,7 @@ struct ksmbd_file *ksmbd_lookup_fd_slow(struct ksmbd_work *work, u64 id, void ksmbd_fd_put(struct ksmbd_work *work, struct ksmbd_file *fp); struct ksmbd_file *ksmbd_lookup_durable_fd(unsigned long long id); struct ksmbd_file *ksmbd_lookup_fd_cguid(char *cguid); -struct ksmbd_file *ksmbd_lookup_fd_inode(struct inode *inode); +struct ksmbd_file *ksmbd_lookup_fd_inode(struct dentry *dentry); unsigned int ksmbd_open_durable_fd(struct ksmbd_file *fp); struct ksmbd_file *ksmbd_open_fd(struct ksmbd_work *work, struct file *filp); void ksmbd_close_tree_conn_fds(struct ksmbd_work *work); @@ -164,7 +164,7 @@ enum KSMBD_INODE_STATUS { KSMBD_INODE_STATUS_PENDING_DELETE, }; -int ksmbd_query_inode_status(struct inode *inode); +int ksmbd_query_inode_status(struct dentry *dentry); bool ksmbd_inode_pending_delete(struct ksmbd_file *fp); void ksmbd_set_inode_pending_delete(struct ksmbd_file *fp); void ksmbd_clear_inode_pending_delete(struct ksmbd_file *fp); From e4ae1953755803378b7e0200bddf26e1f61927f4 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 31 Dec 2023 16:19:10 +0900 Subject: [PATCH 1355/1397] ksmbd: move oplock handling after unlock parent dir [ Upstream commit 2e450920d58b4991a436c8cecf3484bcacd8e535 ] ksmbd should process secound parallel smb2 create request during waiting oplock break ack. parent lock range that is too large in smb2_open() causes smb2_open() to be serialized. Move the oplock handling to the bottom of smb2_open() and make it called after parent unlock. This fixes the failure of smb2.lease.breaking1 testcase. Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/server/smb2pdu.c | 121 +++++++++++++++++++++------------------- 1 file changed, 65 insertions(+), 56 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 28b61dad2749..e58504d0e9c1 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -2691,7 +2691,7 @@ int smb2_open(struct ksmbd_work *work) *(char *)req->Buffer == '\\') { pr_err("not allow directory name included leading slash\n"); rc = -EINVAL; - goto err_out1; + goto err_out2; } name = smb2_get_name(req->Buffer, @@ -2702,7 +2702,7 @@ int smb2_open(struct ksmbd_work *work) if (rc != -ENOMEM) rc = -ENOENT; name = NULL; - goto err_out1; + goto err_out2; } ksmbd_debug(SMB, "converted name = %s\n", name); @@ -2710,28 +2710,28 @@ int smb2_open(struct ksmbd_work *work) if (!test_share_config_flag(work->tcon->share_conf, KSMBD_SHARE_FLAG_STREAMS)) { rc = -EBADF; - goto err_out1; + goto err_out2; } rc = parse_stream_name(name, &stream_name, &s_type); if (rc < 0) - goto err_out1; + goto err_out2; } rc = ksmbd_validate_filename(name); if (rc < 0) - goto err_out1; + goto err_out2; if (ksmbd_share_veto_filename(share, name)) { rc = -ENOENT; ksmbd_debug(SMB, "Reject open(), vetoed file: %s\n", name); - goto err_out1; + goto err_out2; } } else { name = kstrdup("", GFP_KERNEL); if (!name) { rc = -ENOMEM; - goto err_out1; + goto err_out2; } } @@ -2744,14 +2744,14 @@ int smb2_open(struct ksmbd_work *work) le32_to_cpu(req->ImpersonationLevel)); rc = -EIO; rsp->hdr.Status = STATUS_BAD_IMPERSONATION_LEVEL; - goto err_out1; + goto err_out2; } if (req->CreateOptions && !(req->CreateOptions & CREATE_OPTIONS_MASK_LE)) { pr_err("Invalid create options : 0x%x\n", le32_to_cpu(req->CreateOptions)); rc = -EINVAL; - goto err_out1; + goto err_out2; } else { if (req->CreateOptions & FILE_SEQUENTIAL_ONLY_LE && req->CreateOptions & FILE_RANDOM_ACCESS_LE) @@ -2761,13 +2761,13 @@ int smb2_open(struct ksmbd_work *work) (FILE_OPEN_BY_FILE_ID_LE | CREATE_TREE_CONNECTION | FILE_RESERVE_OPFILTER_LE)) { rc = -EOPNOTSUPP; - goto err_out1; + goto err_out2; } if (req->CreateOptions & FILE_DIRECTORY_FILE_LE) { if (req->CreateOptions & FILE_NON_DIRECTORY_FILE_LE) { rc = -EINVAL; - goto err_out1; + goto err_out2; } else if (req->CreateOptions & FILE_NO_COMPRESSION_LE) { req->CreateOptions = ~(FILE_NO_COMPRESSION_LE); } @@ -2779,21 +2779,21 @@ int smb2_open(struct ksmbd_work *work) pr_err("Invalid create disposition : 0x%x\n", le32_to_cpu(req->CreateDisposition)); rc = -EINVAL; - goto err_out1; + goto err_out2; } if (!(req->DesiredAccess & DESIRED_ACCESS_MASK)) { pr_err("Invalid desired access : 0x%x\n", le32_to_cpu(req->DesiredAccess)); rc = -EACCES; - goto err_out1; + goto err_out2; } if (req->FileAttributes && !(req->FileAttributes & FILE_ATTRIBUTE_MASK_LE)) { pr_err("Invalid file attribute : 0x%x\n", le32_to_cpu(req->FileAttributes)); rc = -EINVAL; - goto err_out1; + goto err_out2; } if (req->CreateContextsOffset) { @@ -2801,19 +2801,19 @@ int smb2_open(struct ksmbd_work *work) context = smb2_find_context_vals(req, SMB2_CREATE_EA_BUFFER, 4); if (IS_ERR(context)) { rc = PTR_ERR(context); - goto err_out1; + goto err_out2; } else if (context) { ea_buf = (struct create_ea_buf_req *)context; if (le16_to_cpu(context->DataOffset) + le32_to_cpu(context->DataLength) < sizeof(struct create_ea_buf_req)) { rc = -EINVAL; - goto err_out1; + goto err_out2; } if (req->CreateOptions & FILE_NO_EA_KNOWLEDGE_LE) { rsp->hdr.Status = STATUS_ACCESS_DENIED; rc = -EACCES; - goto err_out1; + goto err_out2; } } @@ -2821,7 +2821,7 @@ int smb2_open(struct ksmbd_work *work) SMB2_CREATE_QUERY_MAXIMAL_ACCESS_REQUEST, 4); if (IS_ERR(context)) { rc = PTR_ERR(context); - goto err_out1; + goto err_out2; } else if (context) { ksmbd_debug(SMB, "get query maximal access context\n"); @@ -2832,11 +2832,11 @@ int smb2_open(struct ksmbd_work *work) SMB2_CREATE_TIMEWARP_REQUEST, 4); if (IS_ERR(context)) { rc = PTR_ERR(context); - goto err_out1; + goto err_out2; } else if (context) { ksmbd_debug(SMB, "get timewarp context\n"); rc = -EBADF; - goto err_out1; + goto err_out2; } if (tcon->posix_extensions) { @@ -2844,7 +2844,7 @@ int smb2_open(struct ksmbd_work *work) SMB2_CREATE_TAG_POSIX, 16); if (IS_ERR(context)) { rc = PTR_ERR(context); - goto err_out1; + goto err_out2; } else if (context) { struct create_posix *posix = (struct create_posix *)context; @@ -2852,7 +2852,7 @@ int smb2_open(struct ksmbd_work *work) le32_to_cpu(context->DataLength) < sizeof(struct create_posix) - 4) { rc = -EINVAL; - goto err_out1; + goto err_out2; } ksmbd_debug(SMB, "get posix context\n"); @@ -2864,7 +2864,7 @@ int smb2_open(struct ksmbd_work *work) if (ksmbd_override_fsids(work)) { rc = -ENOMEM; - goto err_out1; + goto err_out2; } rc = ksmbd_vfs_kern_path_locked(work, name, LOOKUP_NO_SYMLINKS, @@ -3177,11 +3177,6 @@ int smb2_open(struct ksmbd_work *work) fp->attrib_only = !(req->DesiredAccess & ~(FILE_READ_ATTRIBUTES_LE | FILE_WRITE_ATTRIBUTES_LE | FILE_SYNCHRONIZE_LE)); - if (!S_ISDIR(file_inode(filp)->i_mode) && open_flags & O_TRUNC && - !fp->attrib_only && !stream_name) { - smb_break_all_oplock(work, fp); - need_truncate = 1; - } /* fp should be searchable through ksmbd_inode.m_fp_list * after daccess, saccess, attrib_only, and stream are @@ -3197,13 +3192,39 @@ int smb2_open(struct ksmbd_work *work) goto err_out; } + rc = ksmbd_vfs_getattr(&path, &stat); + if (rc) + goto err_out; + + if (stat.result_mask & STATX_BTIME) + fp->create_time = ksmbd_UnixTimeToNT(stat.btime); + else + fp->create_time = ksmbd_UnixTimeToNT(stat.ctime); + if (req->FileAttributes || fp->f_ci->m_fattr == 0) + fp->f_ci->m_fattr = + cpu_to_le32(smb2_get_dos_mode(&stat, le32_to_cpu(req->FileAttributes))); + + if (!created) + smb2_update_xattrs(tcon, &path, fp); + else + smb2_new_xattrs(tcon, &path, fp); + + if (file_present || created) + ksmbd_vfs_kern_path_unlock(&parent_path, &path); + + if (!S_ISDIR(file_inode(filp)->i_mode) && open_flags & O_TRUNC && + !fp->attrib_only && !stream_name) { + smb_break_all_oplock(work, fp); + need_truncate = 1; + } + share_ret = ksmbd_smb_check_shared_mode(fp->filp, fp); if (!test_share_config_flag(work->tcon->share_conf, KSMBD_SHARE_FLAG_OPLOCKS) || (req_op_level == SMB2_OPLOCK_LEVEL_LEASE && !(conn->vals->capabilities & SMB2_GLOBAL_CAP_LEASING))) { if (share_ret < 0 && !S_ISDIR(file_inode(fp->filp)->i_mode)) { rc = share_ret; - goto err_out; + goto err_out1; } } else { if (req_op_level == SMB2_OPLOCK_LEVEL_LEASE) { @@ -3213,7 +3234,7 @@ int smb2_open(struct ksmbd_work *work) name, req_op_level, lc->req_state); rc = find_same_lease_key(sess, fp->f_ci, lc); if (rc) - goto err_out; + goto err_out1; } else if (open_flags == O_RDONLY && (req_op_level == SMB2_OPLOCK_LEVEL_BATCH || req_op_level == SMB2_OPLOCK_LEVEL_EXCLUSIVE)) @@ -3224,12 +3245,18 @@ int smb2_open(struct ksmbd_work *work) le32_to_cpu(req->hdr.Id.SyncId.TreeId), lc, share_ret); if (rc < 0) - goto err_out; + goto err_out1; } if (req->CreateOptions & FILE_DELETE_ON_CLOSE_LE) ksmbd_fd_set_delete_on_close(fp, file_info); + if (need_truncate) { + rc = smb2_create_truncate(&fp->filp->f_path); + if (rc) + goto err_out1; + } + if (req->CreateContextsOffset) { struct create_alloc_size_req *az_req; @@ -3237,7 +3264,7 @@ int smb2_open(struct ksmbd_work *work) SMB2_CREATE_ALLOCATION_SIZE, 4); if (IS_ERR(az_req)) { rc = PTR_ERR(az_req); - goto err_out; + goto err_out1; } else if (az_req) { loff_t alloc_size; int err; @@ -3246,7 +3273,7 @@ int smb2_open(struct ksmbd_work *work) le32_to_cpu(az_req->ccontext.DataLength) < sizeof(struct create_alloc_size_req)) { rc = -EINVAL; - goto err_out; + goto err_out1; } alloc_size = le64_to_cpu(az_req->AllocationSize); ksmbd_debug(SMB, @@ -3264,30 +3291,13 @@ int smb2_open(struct ksmbd_work *work) context = smb2_find_context_vals(req, SMB2_CREATE_QUERY_ON_DISK_ID, 4); if (IS_ERR(context)) { rc = PTR_ERR(context); - goto err_out; + goto err_out1; } else if (context) { ksmbd_debug(SMB, "get query on disk id context\n"); query_disk_id = 1; } } - rc = ksmbd_vfs_getattr(&path, &stat); - if (rc) - goto err_out; - - if (stat.result_mask & STATX_BTIME) - fp->create_time = ksmbd_UnixTimeToNT(stat.btime); - else - fp->create_time = ksmbd_UnixTimeToNT(stat.ctime); - if (req->FileAttributes || fp->f_ci->m_fattr == 0) - fp->f_ci->m_fattr = - cpu_to_le32(smb2_get_dos_mode(&stat, le32_to_cpu(req->FileAttributes))); - - if (!created) - smb2_update_xattrs(tcon, &path, fp); - else - smb2_new_xattrs(tcon, &path, fp); - memcpy(fp->client_guid, conn->ClientGUID, SMB2_CLIENT_GUID_SIZE); rsp->StructureSize = cpu_to_le16(89); @@ -3394,14 +3404,13 @@ int smb2_open(struct ksmbd_work *work) } err_out: - if (file_present || created) + if (rc && (file_present || created)) ksmbd_vfs_kern_path_unlock(&parent_path, &path); - if (fp && need_truncate) - rc = smb2_create_truncate(&fp->filp->f_path); - - ksmbd_revert_fsids(work); err_out1: + ksmbd_revert_fsids(work); + +err_out2: if (!rc) { ksmbd_update_fstate(&work->sess->file_table, fp, FP_INITED); rc = ksmbd_iov_pin_rsp(work, (void *)rsp, iov_len); From fa86141f357f91abe3847a01d0e183a686d3519c Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 31 Dec 2023 16:19:11 +0900 Subject: [PATCH 1356/1397] ksmbd: release interim response after sending status pending response [ Upstream commit 2a3f7857ec742e212d6cee7fbbf7b0e2ae7f5161 ] Add missing release async id and delete interim response entry after sending status pending response. This only cause when smb2 lease is enable. Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/server/ksmbd_work.c | 3 +++ fs/smb/server/oplock.c | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/smb/server/ksmbd_work.c b/fs/smb/server/ksmbd_work.c index 2510b9f3c8c1..d7c676c151e2 100644 --- a/fs/smb/server/ksmbd_work.c +++ b/fs/smb/server/ksmbd_work.c @@ -56,6 +56,9 @@ void ksmbd_free_work_struct(struct ksmbd_work *work) kfree(work->tr_buf); kvfree(work->request_buf); kfree(work->iov); + if (!list_empty(&work->interim_entry)) + list_del(&work->interim_entry); + if (work->async_id) ksmbd_release_id(&work->conn->async_ida, work->async_id); kmem_cache_free(work_cache, work); diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index 9bc0103720f5..50c68beb71d6 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -833,7 +833,8 @@ static int smb2_lease_break_noti(struct oplock_info *opinfo) interim_entry); setup_async_work(in_work, NULL, NULL); smb2_send_interim_resp(in_work, STATUS_PENDING); - list_del(&in_work->interim_entry); + list_del_init(&in_work->interim_entry); + release_async_work(in_work); } INIT_WORK(&work->work, __smb2_lease_break_noti); ksmbd_queue_work(work); From b06c9637317952c29d8a2ffbdc604c4328aee6c9 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 31 Dec 2023 16:19:12 +0900 Subject: [PATCH 1357/1397] ksmbd: move setting SMB2_FLAGS_ASYNC_COMMAND and AsyncId [ Upstream commit 9ac45ac7cf65b0623ceeab9b28b307a08efa22dc ] Directly set SMB2_FLAGS_ASYNC_COMMAND flags and AsyncId in smb2 header of interim response instead of current response header. Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/server/smb2pdu.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index e58504d0e9c1..de71532651d9 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -657,13 +657,9 @@ smb2_get_name(const char *src, const int maxlen, struct nls_table *local_nls) int setup_async_work(struct ksmbd_work *work, void (*fn)(void **), void **arg) { - struct smb2_hdr *rsp_hdr; struct ksmbd_conn *conn = work->conn; int id; - rsp_hdr = ksmbd_resp_buf_next(work); - rsp_hdr->Flags |= SMB2_FLAGS_ASYNC_COMMAND; - id = ksmbd_acquire_async_msg_id(&conn->async_ida); if (id < 0) { pr_err("Failed to alloc async message id\n"); @@ -671,7 +667,6 @@ int setup_async_work(struct ksmbd_work *work, void (*fn)(void **), void **arg) } work->asynchronous = true; work->async_id = id; - rsp_hdr->Id.AsyncId = cpu_to_le64(id); ksmbd_debug(SMB, "Send interim Response to inform async request id : %d\n", @@ -723,6 +718,8 @@ void smb2_send_interim_resp(struct ksmbd_work *work, __le32 status) __SMB2_HEADER_STRUCTURE_SIZE); rsp_hdr = smb2_get_msg(in_work->response_buf); + rsp_hdr->Flags |= SMB2_FLAGS_ASYNC_COMMAND; + rsp_hdr->Id.AsyncId = cpu_to_le64(work->async_id); smb2_set_err_rsp(in_work); rsp_hdr->Status = status; From 3da84670973ba926ce421077e57cb769f7595ccb Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 31 Dec 2023 16:19:13 +0900 Subject: [PATCH 1358/1397] ksmbd: don't update ->op_state as OPLOCK_STATE_NONE on error [ Upstream commit cd80ce7e68f1624ac29cd0a6b057789d1236641e ] ksmbd set ->op_state as OPLOCK_STATE_NONE on lease break ack error. op_state of lease should not be updated because client can send lease break ack again. This patch fix smb2.lease.breaking2 test failure. Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/server/smb2pdu.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index de71532651d9..5bff6746234d 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -8235,7 +8235,6 @@ static void smb21_lease_break_ack(struct ksmbd_work *work) return; err_out: - opinfo->op_state = OPLOCK_STATE_NONE; wake_up_interruptible_all(&opinfo->oplock_q); atomic_dec(&opinfo->breaking_cnt); wake_up_interruptible_all(&opinfo->oplock_brk); From bc025d49c507b7830c63382eeaddce972dccc143 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 31 Dec 2023 16:19:14 +0900 Subject: [PATCH 1359/1397] ksmbd: set epoch in create context v2 lease [ Upstream commit d045850b628aaf931fc776c90feaf824dca5a1cf ] To support v2 lease(directory lease), ksmbd set epoch in create context v2 lease response. Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/server/oplock.c | 5 ++++- fs/smb/server/oplock.h | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index 50c68beb71d6..ff5c83b1fb85 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -104,7 +104,7 @@ static int alloc_lease(struct oplock_info *opinfo, struct lease_ctx_info *lctx) lease->duration = lctx->duration; memcpy(lease->parent_lease_key, lctx->parent_lease_key, SMB2_LEASE_KEY_SIZE); lease->version = lctx->version; - lease->epoch = 0; + lease->epoch = le16_to_cpu(lctx->epoch); INIT_LIST_HEAD(&opinfo->lease_entry); opinfo->o_lease = lease; @@ -1032,6 +1032,7 @@ static void copy_lease(struct oplock_info *op1, struct oplock_info *op2) SMB2_LEASE_KEY_SIZE); lease2->duration = lease1->duration; lease2->flags = lease1->flags; + lease2->epoch = lease1->epoch++; } static int add_lease_global_list(struct oplock_info *opinfo) @@ -1364,6 +1365,7 @@ void create_lease_buf(u8 *rbuf, struct lease *lease) memcpy(buf->lcontext.LeaseKey, lease->lease_key, SMB2_LEASE_KEY_SIZE); buf->lcontext.LeaseFlags = lease->flags; + buf->lcontext.Epoch = cpu_to_le16(++lease->epoch); buf->lcontext.LeaseState = lease->state; memcpy(buf->lcontext.ParentLeaseKey, lease->parent_lease_key, SMB2_LEASE_KEY_SIZE); @@ -1423,6 +1425,7 @@ struct lease_ctx_info *parse_lease_state(void *open_req) memcpy(lreq->lease_key, lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE); lreq->req_state = lc->lcontext.LeaseState; lreq->flags = lc->lcontext.LeaseFlags; + lreq->epoch = lc->lcontext.Epoch; lreq->duration = lc->lcontext.LeaseDuration; memcpy(lreq->parent_lease_key, lc->lcontext.ParentLeaseKey, SMB2_LEASE_KEY_SIZE); diff --git a/fs/smb/server/oplock.h b/fs/smb/server/oplock.h index 4b0fe6da7694..ad31439c61fe 100644 --- a/fs/smb/server/oplock.h +++ b/fs/smb/server/oplock.h @@ -34,6 +34,7 @@ struct lease_ctx_info { __le32 flags; __le64 duration; __u8 parent_lease_key[SMB2_LEASE_KEY_SIZE]; + __le16 epoch; int version; }; From d7af4e499c308a38b07e585096057045ec303591 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 31 Dec 2023 16:19:15 +0900 Subject: [PATCH 1360/1397] ksmbd: set v2 lease capability [ Upstream commit 18dd1c367c31d0a060f737d48345747662369b64 ] Set SMB2_GLOBAL_CAP_DIRECTORY_LEASING to ->capabilities to inform server support directory lease to client. Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/server/oplock.c | 4 ---- fs/smb/server/smb2ops.c | 9 ++++++--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index ff5c83b1fb85..5ef6af68d0de 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -1105,10 +1105,6 @@ int smb_grant_oplock(struct ksmbd_work *work, int req_op_level, u64 pid, bool prev_op_has_lease; __le32 prev_op_state = 0; - /* not support directory lease */ - if (S_ISDIR(file_inode(fp->filp)->i_mode)) - return 0; - opinfo = alloc_opinfo(work, pid, tid); if (!opinfo) return -ENOMEM; diff --git a/fs/smb/server/smb2ops.c b/fs/smb/server/smb2ops.c index aed7704a0672..27a9dce3e03a 100644 --- a/fs/smb/server/smb2ops.c +++ b/fs/smb/server/smb2ops.c @@ -221,7 +221,8 @@ void init_smb3_0_server(struct ksmbd_conn *conn) conn->signing_algorithm = SIGNING_ALG_AES_CMAC_LE; if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES) - conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING; + conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING | + SMB2_GLOBAL_CAP_DIRECTORY_LEASING; if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION && conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION) @@ -245,7 +246,8 @@ void init_smb3_02_server(struct ksmbd_conn *conn) conn->signing_algorithm = SIGNING_ALG_AES_CMAC_LE; if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES) - conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING; + conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING | + SMB2_GLOBAL_CAP_DIRECTORY_LEASING; if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION || (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) && @@ -270,7 +272,8 @@ int init_smb3_11_server(struct ksmbd_conn *conn) conn->signing_algorithm = SIGNING_ALG_AES_CMAC_LE; if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES) - conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING; + conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING | + SMB2_GLOBAL_CAP_DIRECTORY_LEASING; if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION || (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) && From 572388ff429a3d39216e736a3b6d052a9f5c025b Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 31 Dec 2023 16:19:16 +0900 Subject: [PATCH 1361/1397] ksmbd: downgrade RWH lease caching state to RH for directory [ Upstream commit eb547407f3572d2110cb1194ecd8865b3371a7a4 ] RWH(Read + Write + Handle) caching state is not supported for directory. ksmbd downgrade it to RH for directory if client send RWH caching lease state. Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/server/oplock.c | 9 +++++++-- fs/smb/server/oplock.h | 2 +- fs/smb/server/smb2pdu.c | 8 ++++---- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index 5ef6af68d0de..57950ba7e925 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -1398,10 +1398,11 @@ void create_lease_buf(u8 *rbuf, struct lease *lease) /** * parse_lease_state() - parse lease context containted in file open request * @open_req: buffer containing smb2 file open(create) request + * @is_dir: whether leasing file is directory * * Return: oplock state, -ENOENT if create lease context not found */ -struct lease_ctx_info *parse_lease_state(void *open_req) +struct lease_ctx_info *parse_lease_state(void *open_req, bool is_dir) { struct create_context *cc; struct smb2_create_req *req = (struct smb2_create_req *)open_req; @@ -1419,7 +1420,11 @@ struct lease_ctx_info *parse_lease_state(void *open_req) struct create_lease_v2 *lc = (struct create_lease_v2 *)cc; memcpy(lreq->lease_key, lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE); - lreq->req_state = lc->lcontext.LeaseState; + if (is_dir) + lreq->req_state = lc->lcontext.LeaseState & + ~SMB2_LEASE_WRITE_CACHING_LE; + else + lreq->req_state = lc->lcontext.LeaseState; lreq->flags = lc->lcontext.LeaseFlags; lreq->epoch = lc->lcontext.Epoch; lreq->duration = lc->lcontext.LeaseDuration; diff --git a/fs/smb/server/oplock.h b/fs/smb/server/oplock.h index ad31439c61fe..672127318c75 100644 --- a/fs/smb/server/oplock.h +++ b/fs/smb/server/oplock.h @@ -109,7 +109,7 @@ void opinfo_put(struct oplock_info *opinfo); /* Lease related functions */ void create_lease_buf(u8 *rbuf, struct lease *lease); -struct lease_ctx_info *parse_lease_state(void *open_req); +struct lease_ctx_info *parse_lease_state(void *open_req, bool is_dir); __u8 smb2_map_lease_to_oplock(__le32 lease_state); int lease_read_to_write(struct oplock_info *opinfo); diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 5bff6746234d..c4b6adce178a 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -2732,10 +2732,6 @@ int smb2_open(struct ksmbd_work *work) } } - req_op_level = req->RequestedOplockLevel; - if (req_op_level == SMB2_OPLOCK_LEVEL_LEASE) - lc = parse_lease_state(req); - if (le32_to_cpu(req->ImpersonationLevel) > le32_to_cpu(IL_DELEGATE)) { pr_err("Invalid impersonationlevel : 0x%x\n", le32_to_cpu(req->ImpersonationLevel)); @@ -3215,6 +3211,10 @@ int smb2_open(struct ksmbd_work *work) need_truncate = 1; } + req_op_level = req->RequestedOplockLevel; + if (req_op_level == SMB2_OPLOCK_LEVEL_LEASE) + lc = parse_lease_state(req, S_ISDIR(file_inode(filp)->i_mode)); + share_ret = ksmbd_smb_check_shared_mode(fp->filp, fp); if (!test_share_config_flag(work->tcon->share_conf, KSMBD_SHARE_FLAG_OPLOCKS) || (req_op_level == SMB2_OPLOCK_LEVEL_LEASE && From 3c1e602a34e1ecfdede19fbf1b96d31bc8c21f89 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 31 Dec 2023 16:19:17 +0900 Subject: [PATCH 1362/1397] ksmbd: send v2 lease break notification for directory [ Upstream commit d47d9886aeef79feba7adac701a510d65f3682b5 ] If client send different parent key, different client guid, or there is no parent lease key flags in create context v2 lease, ksmbd send lease break to client. Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/common/smb2pdu.h | 1 + fs/smb/server/oplock.c | 56 +++++++++++++++++++++++++++++++++++---- fs/smb/server/oplock.h | 4 +++ fs/smb/server/smb2pdu.c | 7 +++++ fs/smb/server/vfs_cache.c | 13 ++++++++- fs/smb/server/vfs_cache.h | 2 ++ 6 files changed, 77 insertions(+), 6 deletions(-) diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h index ec20c83cc836..d58550c1c937 100644 --- a/fs/smb/common/smb2pdu.h +++ b/fs/smb/common/smb2pdu.h @@ -1228,6 +1228,7 @@ struct create_mxac_rsp { #define SMB2_LEASE_WRITE_CACHING_LE cpu_to_le32(0x04) #define SMB2_LEASE_FLAG_BREAK_IN_PROGRESS_LE cpu_to_le32(0x02) +#define SMB2_LEASE_FLAG_PARENT_LEASE_KEY_SET_LE cpu_to_le32(0x04) #define SMB2_LEASE_KEY_SIZE 16 diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index 57950ba7e925..147d98427ce8 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -102,6 +102,7 @@ static int alloc_lease(struct oplock_info *opinfo, struct lease_ctx_info *lctx) lease->new_state = 0; lease->flags = lctx->flags; lease->duration = lctx->duration; + lease->is_dir = lctx->is_dir; memcpy(lease->parent_lease_key, lctx->parent_lease_key, SMB2_LEASE_KEY_SIZE); lease->version = lctx->version; lease->epoch = le16_to_cpu(lctx->epoch); @@ -543,12 +544,13 @@ static struct oplock_info *same_client_has_lease(struct ksmbd_inode *ci, /* upgrading lease */ if ((atomic_read(&ci->op_count) + atomic_read(&ci->sop_count)) == 1) { - if (lease->state == - (lctx->req_state & lease->state)) { + if (lease->state != SMB2_LEASE_NONE_LE && + lease->state == (lctx->req_state & lease->state)) { lease->state |= lctx->req_state; if (lctx->req_state & SMB2_LEASE_WRITE_CACHING_LE) lease_read_to_write(opinfo); + } } else if ((atomic_read(&ci->op_count) + atomic_read(&ci->sop_count)) > 1) { @@ -900,7 +902,8 @@ static int oplock_break(struct oplock_info *brk_opinfo, int req_op_level) lease->new_state = SMB2_LEASE_READ_CACHING_LE; } else { - if (lease->state & SMB2_LEASE_HANDLE_CACHING_LE) + if (lease->state & SMB2_LEASE_HANDLE_CACHING_LE && + !lease->is_dir) lease->new_state = SMB2_LEASE_READ_CACHING_LE; else @@ -1082,6 +1085,48 @@ static void set_oplock_level(struct oplock_info *opinfo, int level, } } +void smb_send_parent_lease_break_noti(struct ksmbd_file *fp, + struct lease_ctx_info *lctx) +{ + struct oplock_info *opinfo; + struct ksmbd_inode *p_ci = NULL; + + if (lctx->version != 2) + return; + + p_ci = ksmbd_inode_lookup_lock(fp->filp->f_path.dentry->d_parent); + if (!p_ci) + return; + + read_lock(&p_ci->m_lock); + list_for_each_entry(opinfo, &p_ci->m_op_list, op_entry) { + if (!opinfo->is_lease) + continue; + + if (opinfo->o_lease->state != SMB2_OPLOCK_LEVEL_NONE && + (!(lctx->flags & SMB2_LEASE_FLAG_PARENT_LEASE_KEY_SET_LE) || + !compare_guid_key(opinfo, fp->conn->ClientGUID, + lctx->parent_lease_key))) { + if (!atomic_inc_not_zero(&opinfo->refcount)) + continue; + + atomic_inc(&opinfo->conn->r_count); + if (ksmbd_conn_releasing(opinfo->conn)) { + atomic_dec(&opinfo->conn->r_count); + continue; + } + + read_unlock(&p_ci->m_lock); + oplock_break(opinfo, SMB2_OPLOCK_LEVEL_NONE); + opinfo_conn_put(opinfo); + read_lock(&p_ci->m_lock); + } + } + read_unlock(&p_ci->m_lock); + + ksmbd_inode_put(p_ci); +} + /** * smb_grant_oplock() - handle oplock/lease request on file open * @work: smb work @@ -1420,10 +1465,11 @@ struct lease_ctx_info *parse_lease_state(void *open_req, bool is_dir) struct create_lease_v2 *lc = (struct create_lease_v2 *)cc; memcpy(lreq->lease_key, lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE); - if (is_dir) + if (is_dir) { lreq->req_state = lc->lcontext.LeaseState & ~SMB2_LEASE_WRITE_CACHING_LE; - else + lreq->is_dir = true; + } else lreq->req_state = lc->lcontext.LeaseState; lreq->flags = lc->lcontext.LeaseFlags; lreq->epoch = lc->lcontext.Epoch; diff --git a/fs/smb/server/oplock.h b/fs/smb/server/oplock.h index 672127318c75..b64d1536882a 100644 --- a/fs/smb/server/oplock.h +++ b/fs/smb/server/oplock.h @@ -36,6 +36,7 @@ struct lease_ctx_info { __u8 parent_lease_key[SMB2_LEASE_KEY_SIZE]; __le16 epoch; int version; + bool is_dir; }; struct lease_table { @@ -54,6 +55,7 @@ struct lease { __u8 parent_lease_key[SMB2_LEASE_KEY_SIZE]; int version; unsigned short epoch; + bool is_dir; struct lease_table *l_lb; }; @@ -125,4 +127,6 @@ struct oplock_info *lookup_lease_in_table(struct ksmbd_conn *conn, int find_same_lease_key(struct ksmbd_session *sess, struct ksmbd_inode *ci, struct lease_ctx_info *lctx); void destroy_lease_table(struct ksmbd_conn *conn); +void smb_send_parent_lease_break_noti(struct ksmbd_file *fp, + struct lease_ctx_info *lctx); #endif /* __KSMBD_OPLOCK_H */ diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index c4b6adce178a..cbd5c5572217 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -3225,6 +3225,13 @@ int smb2_open(struct ksmbd_work *work) } } else { if (req_op_level == SMB2_OPLOCK_LEVEL_LEASE) { + /* + * Compare parent lease using parent key. If there is no + * a lease that has same parent key, Send lease break + * notification. + */ + smb_send_parent_lease_break_noti(fp, lc); + req_op_level = smb2_map_lease_to_oplock(lc->req_state); ksmbd_debug(SMB, "lease req for(%s) req oplock state 0x%x, lease state 0x%x\n", diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c index ddf233994ddb..4e82ff627d12 100644 --- a/fs/smb/server/vfs_cache.c +++ b/fs/smb/server/vfs_cache.c @@ -87,6 +87,17 @@ static struct ksmbd_inode *ksmbd_inode_lookup(struct ksmbd_file *fp) return __ksmbd_inode_lookup(fp->filp->f_path.dentry); } +struct ksmbd_inode *ksmbd_inode_lookup_lock(struct dentry *d) +{ + struct ksmbd_inode *ci; + + read_lock(&inode_hash_lock); + ci = __ksmbd_inode_lookup(d); + read_unlock(&inode_hash_lock); + + return ci; +} + int ksmbd_query_inode_status(struct dentry *dentry) { struct ksmbd_inode *ci; @@ -199,7 +210,7 @@ static void ksmbd_inode_free(struct ksmbd_inode *ci) kfree(ci); } -static void ksmbd_inode_put(struct ksmbd_inode *ci) +void ksmbd_inode_put(struct ksmbd_inode *ci) { if (atomic_dec_and_test(&ci->m_count)) ksmbd_inode_free(ci); diff --git a/fs/smb/server/vfs_cache.h b/fs/smb/server/vfs_cache.h index 8325cf4527c4..4d4938d6029b 100644 --- a/fs/smb/server/vfs_cache.h +++ b/fs/smb/server/vfs_cache.h @@ -138,6 +138,8 @@ struct ksmbd_file *ksmbd_lookup_foreign_fd(struct ksmbd_work *work, u64 id); struct ksmbd_file *ksmbd_lookup_fd_slow(struct ksmbd_work *work, u64 id, u64 pid); void ksmbd_fd_put(struct ksmbd_work *work, struct ksmbd_file *fp); +struct ksmbd_inode *ksmbd_inode_lookup_lock(struct dentry *d); +void ksmbd_inode_put(struct ksmbd_inode *ci); struct ksmbd_file *ksmbd_lookup_durable_fd(unsigned long long id); struct ksmbd_file *ksmbd_lookup_fd_cguid(char *cguid); struct ksmbd_file *ksmbd_lookup_fd_inode(struct dentry *dentry); From ab5a0a1c40befa1fdfe9e018c2fab52c5fc3d44b Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 31 Dec 2023 16:19:18 +0900 Subject: [PATCH 1363/1397] ksmbd: lazy v2 lease break on smb2_write() [ Upstream commit c2a721eead71202a0d8ddd9b56ec8dce652c71d1 ] Don't immediately send directory lease break notification on smb2_write(). Instead, It postpones it until smb2_close(). Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/server/oplock.c | 45 +++++++++++++++++++++++++++++++++++++-- fs/smb/server/oplock.h | 1 + fs/smb/server/vfs.c | 3 +++ fs/smb/server/vfs_cache.h | 1 + 4 files changed, 48 insertions(+), 2 deletions(-) diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index 147d98427ce8..562b180459a1 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -396,8 +396,8 @@ void close_id_del_oplock(struct ksmbd_file *fp) { struct oplock_info *opinfo; - if (S_ISDIR(file_inode(fp->filp)->i_mode)) - return; + if (fp->reserve_lease_break) + smb_lazy_parent_lease_break_close(fp); opinfo = opinfo_get(fp); if (!opinfo) @@ -1127,6 +1127,47 @@ void smb_send_parent_lease_break_noti(struct ksmbd_file *fp, ksmbd_inode_put(p_ci); } +void smb_lazy_parent_lease_break_close(struct ksmbd_file *fp) +{ + struct oplock_info *opinfo; + struct ksmbd_inode *p_ci = NULL; + + rcu_read_lock(); + opinfo = rcu_dereference(fp->f_opinfo); + rcu_read_unlock(); + + if (!opinfo->is_lease || opinfo->o_lease->version != 2) + return; + + p_ci = ksmbd_inode_lookup_lock(fp->filp->f_path.dentry->d_parent); + if (!p_ci) + return; + + read_lock(&p_ci->m_lock); + list_for_each_entry(opinfo, &p_ci->m_op_list, op_entry) { + if (!opinfo->is_lease) + continue; + + if (opinfo->o_lease->state != SMB2_OPLOCK_LEVEL_NONE) { + if (!atomic_inc_not_zero(&opinfo->refcount)) + continue; + + atomic_inc(&opinfo->conn->r_count); + if (ksmbd_conn_releasing(opinfo->conn)) { + atomic_dec(&opinfo->conn->r_count); + continue; + } + read_unlock(&p_ci->m_lock); + oplock_break(opinfo, SMB2_OPLOCK_LEVEL_NONE); + opinfo_conn_put(opinfo); + read_lock(&p_ci->m_lock); + } + } + read_unlock(&p_ci->m_lock); + + ksmbd_inode_put(p_ci); +} + /** * smb_grant_oplock() - handle oplock/lease request on file open * @work: smb work diff --git a/fs/smb/server/oplock.h b/fs/smb/server/oplock.h index b64d1536882a..5b93ea9196c0 100644 --- a/fs/smb/server/oplock.h +++ b/fs/smb/server/oplock.h @@ -129,4 +129,5 @@ int find_same_lease_key(struct ksmbd_session *sess, struct ksmbd_inode *ci, void destroy_lease_table(struct ksmbd_conn *conn); void smb_send_parent_lease_break_noti(struct ksmbd_file *fp, struct lease_ctx_info *lctx); +void smb_lazy_parent_lease_break_close(struct ksmbd_file *fp); #endif /* __KSMBD_OPLOCK_H */ diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 9091dcd7a310..4277750a6da1 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -517,6 +517,9 @@ int ksmbd_vfs_write(struct ksmbd_work *work, struct ksmbd_file *fp, } } + /* Reserve lease break for parent dir at closing time */ + fp->reserve_lease_break = true; + /* Do we need to break any of a levelII oplock? */ smb_break_all_levII_oplock(work, fp, 1); diff --git a/fs/smb/server/vfs_cache.h b/fs/smb/server/vfs_cache.h index 4d4938d6029b..a528f0cc775a 100644 --- a/fs/smb/server/vfs_cache.h +++ b/fs/smb/server/vfs_cache.h @@ -105,6 +105,7 @@ struct ksmbd_file { struct ksmbd_readdir_data readdir_data; int dot_dotdot[2]; unsigned int f_state; + bool reserve_lease_break; }; static inline void set_ctx_actor(struct dir_context *ctx, From 861eaba7ca6c6f96537cd14d16503426b995dc1d Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 31 Dec 2023 16:19:19 +0900 Subject: [PATCH 1364/1397] ksmbd: avoid duplicate opinfo_put() call on error of smb21_lease_break_ack() [ Upstream commit 658609d9a618d8881bf549b5893c0ba8fcff4526 ] opinfo_put() could be called twice on error of smb21_lease_break_ack(). It will cause UAF issue if opinfo is referenced on other places. Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/server/smb2pdu.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index cbd5c5572217..fbd708bb4a5b 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -8219,6 +8219,11 @@ static void smb21_lease_break_ack(struct ksmbd_work *work) le32_to_cpu(req->LeaseState)); } + if (ret < 0) { + rsp->hdr.Status = err; + goto err_out; + } + lease_state = lease->state; opinfo->op_state = OPLOCK_STATE_NONE; wake_up_interruptible_all(&opinfo->oplock_q); @@ -8226,11 +8231,6 @@ static void smb21_lease_break_ack(struct ksmbd_work *work) wake_up_interruptible_all(&opinfo->oplock_brk); opinfo_put(opinfo); - if (ret < 0) { - rsp->hdr.Status = err; - goto err_out; - } - rsp->StructureSize = cpu_to_le16(36); rsp->Reserved = 0; rsp->Flags = 0; From 5b5599a7eee5e6101c6ae738682331bc6299a54d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 4 Oct 2023 14:52:37 -0400 Subject: [PATCH 1365/1397] fs: new accessor methods for atime and mtime [ Upstream commit 077c212f0344ae4198b2b51af128a94b614ccdf4 ] Recently, we converted the ctime accesses in the kernel to use new accessor functions. Linus recently pointed out though that if we add accessors for the atime and mtime, then that would allow us to seamlessly change how these timestamps are stored in the inode. Add new accessor functions for the atime and mtime that mirror the accessors for the ctime. Signed-off-by: Jeff Layton Link: https://lore.kernel.org/r/20231004185239.80830-1-jlayton@kernel.org Signed-off-by: Christian Brauner Stable-dep-of: 01fe654f78fd ("fs: cifs: Fix atime update check") Signed-off-by: Sasha Levin --- fs/libfs.c | 41 ++++++++++++++++------ include/linux/fs.h | 85 +++++++++++++++++++++++++++++++++++++++------- 2 files changed, 102 insertions(+), 24 deletions(-) diff --git a/fs/libfs.c b/fs/libfs.c index 189447cf4acf..dc0f7519045f 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -549,7 +549,8 @@ void simple_recursive_removal(struct dentry *dentry, dput(victim); // unpin it } if (victim == dentry) { - inode->i_mtime = inode_set_ctime_current(inode); + inode_set_mtime_to_ts(inode, + inode_set_ctime_current(inode)); if (d_is_dir(dentry)) drop_nlink(inode); inode_unlock(inode); @@ -590,7 +591,7 @@ static int pseudo_fs_fill_super(struct super_block *s, struct fs_context *fc) */ root->i_ino = 1; root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR; - root->i_atime = root->i_mtime = inode_set_ctime_current(root); + simple_inode_init_ts(root); s->s_root = d_make_root(root); if (!s->s_root) return -ENOMEM; @@ -646,8 +647,8 @@ int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *den { struct inode *inode = d_inode(old_dentry); - dir->i_mtime = inode_set_ctime_to_ts(dir, - inode_set_ctime_current(inode)); + inode_set_mtime_to_ts(dir, + inode_set_ctime_to_ts(dir, inode_set_ctime_current(inode))); inc_nlink(inode); ihold(inode); dget(dentry); @@ -681,8 +682,8 @@ int simple_unlink(struct inode *dir, struct dentry *dentry) { struct inode *inode = d_inode(dentry); - dir->i_mtime = inode_set_ctime_to_ts(dir, - inode_set_ctime_current(inode)); + inode_set_mtime_to_ts(dir, + inode_set_ctime_to_ts(dir, inode_set_ctime_current(inode))); drop_nlink(inode); dput(dentry); return 0; @@ -717,9 +718,10 @@ void simple_rename_timestamp(struct inode *old_dir, struct dentry *old_dentry, { struct inode *newino = d_inode(new_dentry); - old_dir->i_mtime = inode_set_ctime_current(old_dir); + inode_set_mtime_to_ts(old_dir, inode_set_ctime_current(old_dir)); if (new_dir != old_dir) - new_dir->i_mtime = inode_set_ctime_current(new_dir); + inode_set_mtime_to_ts(new_dir, + inode_set_ctime_current(new_dir)); inode_set_ctime_current(d_inode(old_dentry)); if (newino) inode_set_ctime_current(newino); @@ -934,7 +936,7 @@ int simple_fill_super(struct super_block *s, unsigned long magic, */ inode->i_ino = 1; inode->i_mode = S_IFDIR | 0755; - inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); + simple_inode_init_ts(inode); inode->i_op = &simple_dir_inode_operations; inode->i_fop = &simple_dir_operations; set_nlink(inode, 2); @@ -960,7 +962,7 @@ int simple_fill_super(struct super_block *s, unsigned long magic, goto out; } inode->i_mode = S_IFREG | files->mode; - inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); + simple_inode_init_ts(inode); inode->i_fop = files->ops; inode->i_ino = i; d_add(dentry, inode); @@ -1528,7 +1530,7 @@ struct inode *alloc_anon_inode(struct super_block *s) inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); inode->i_flags |= S_PRIVATE; - inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); + simple_inode_init_ts(inode); return inode; } EXPORT_SYMBOL(alloc_anon_inode); @@ -1920,3 +1922,20 @@ ssize_t direct_write_fallback(struct kiocb *iocb, struct iov_iter *iter, return direct_written + buffered_written; } EXPORT_SYMBOL_GPL(direct_write_fallback); + +/** + * simple_inode_init_ts - initialize the timestamps for a new inode + * @inode: inode to be initialized + * + * When a new inode is created, most filesystems set the timestamps to the + * current time. Add a helper to do this. + */ +struct timespec64 simple_inode_init_ts(struct inode *inode) +{ + struct timespec64 ts = inode_set_ctime_current(inode); + + inode_set_atime_to_ts(inode, ts); + inode_set_mtime_to_ts(inode, ts); + return ts; +} +EXPORT_SYMBOL(simple_inode_init_ts); diff --git a/include/linux/fs.h b/include/linux/fs.h index 4a40823c3c67..d08b97dacd2d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1511,24 +1511,81 @@ static inline bool fsuidgid_has_mapping(struct super_block *sb, struct timespec64 current_time(struct inode *inode); struct timespec64 inode_set_ctime_current(struct inode *inode); -/** - * inode_get_ctime - fetch the current ctime from the inode - * @inode: inode from which to fetch ctime - * - * Grab the current ctime from the inode and return it. - */ +static inline time64_t inode_get_atime_sec(const struct inode *inode) +{ + return inode->i_atime.tv_sec; +} + +static inline long inode_get_atime_nsec(const struct inode *inode) +{ + return inode->i_atime.tv_nsec; +} + +static inline struct timespec64 inode_get_atime(const struct inode *inode) +{ + return inode->i_atime; +} + +static inline struct timespec64 inode_set_atime_to_ts(struct inode *inode, + struct timespec64 ts) +{ + inode->i_atime = ts; + return ts; +} + +static inline struct timespec64 inode_set_atime(struct inode *inode, + time64_t sec, long nsec) +{ + struct timespec64 ts = { .tv_sec = sec, + .tv_nsec = nsec }; + return inode_set_atime_to_ts(inode, ts); +} + +static inline time64_t inode_get_mtime_sec(const struct inode *inode) +{ + return inode->i_mtime.tv_sec; +} + +static inline long inode_get_mtime_nsec(const struct inode *inode) +{ + return inode->i_mtime.tv_nsec; +} + +static inline struct timespec64 inode_get_mtime(const struct inode *inode) +{ + return inode->i_mtime; +} + +static inline struct timespec64 inode_set_mtime_to_ts(struct inode *inode, + struct timespec64 ts) +{ + inode->i_mtime = ts; + return ts; +} + +static inline struct timespec64 inode_set_mtime(struct inode *inode, + time64_t sec, long nsec) +{ + struct timespec64 ts = { .tv_sec = sec, + .tv_nsec = nsec }; + return inode_set_mtime_to_ts(inode, ts); +} + +static inline time64_t inode_get_ctime_sec(const struct inode *inode) +{ + return inode->__i_ctime.tv_sec; +} + +static inline long inode_get_ctime_nsec(const struct inode *inode) +{ + return inode->__i_ctime.tv_nsec; +} + static inline struct timespec64 inode_get_ctime(const struct inode *inode) { return inode->__i_ctime; } -/** - * inode_set_ctime_to_ts - set the ctime in the inode - * @inode: inode in which to set the ctime - * @ts: value to set in the ctime field - * - * Set the ctime in @inode to @ts - */ static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode, struct timespec64 ts) { @@ -1553,6 +1610,8 @@ static inline struct timespec64 inode_set_ctime(struct inode *inode, return inode_set_ctime_to_ts(inode, ts); } +struct timespec64 simple_inode_init_ts(struct inode *inode); + /* * Snapshotting support. */ From 23171df51f601c92177e4c810d4c683a198de1a0 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 4 Oct 2023 14:52:53 -0400 Subject: [PATCH 1366/1397] client: convert to new timestamp accessors [ Upstream commit 8f22ce7088835444418f0775efb455d10b825596 ] Convert to using the new inode timestamp accessor functions. Signed-off-by: Jeff Layton Link: https://lore.kernel.org/r/20231004185347.80880-66-jlayton@kernel.org Signed-off-by: Christian Brauner Stable-dep-of: 01fe654f78fd ("fs: cifs: Fix atime update check") Signed-off-by: Sasha Levin --- fs/smb/client/file.c | 18 ++++++++++-------- fs/smb/client/fscache.h | 6 +++--- fs/smb/client/inode.c | 17 ++++++++--------- fs/smb/client/smb2ops.c | 6 ++++-- 4 files changed, 25 insertions(+), 22 deletions(-) diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 2108b3b40ce9..cf17e3dd703e 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -1085,7 +1085,8 @@ int cifs_close(struct inode *inode, struct file *file) !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) && dclose) { if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) { - inode->i_mtime = inode_set_ctime_current(inode); + inode_set_mtime_to_ts(inode, + inode_set_ctime_current(inode)); } spin_lock(&cinode->deferred_lock); cifs_add_deferred_close(cfile, dclose); @@ -2596,7 +2597,7 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) write_data, to - from, &offset); cifsFileInfo_put(open_file); /* Does mm or vfs already set times? */ - inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); + simple_inode_init_ts(inode); if ((bytes_written > 0) && (offset)) rc = 0; else if (bytes_written < 0) @@ -4647,11 +4648,13 @@ static void cifs_readahead(struct readahead_control *ractl) static int cifs_readpage_worker(struct file *file, struct page *page, loff_t *poffset) { + struct inode *inode = file_inode(file); + struct timespec64 atime, mtime; char *read_data; int rc; /* Is the page cached? */ - rc = cifs_readpage_from_fscache(file_inode(file), page); + rc = cifs_readpage_from_fscache(inode, page); if (rc == 0) goto read_complete; @@ -4666,11 +4669,10 @@ static int cifs_readpage_worker(struct file *file, struct page *page, cifs_dbg(FYI, "Bytes read %d\n", rc); /* we do not want atime to be less than mtime, it broke some apps */ - file_inode(file)->i_atime = current_time(file_inode(file)); - if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime))) - file_inode(file)->i_atime = file_inode(file)->i_mtime; - else - file_inode(file)->i_atime = current_time(file_inode(file)); + atime = inode_set_atime_to_ts(inode, current_time(inode)); + mtime = inode_get_mtime(inode); + if (timespec64_compare(&atime, &mtime)) + inode_set_atime_to_ts(inode, inode_get_mtime(inode)); if (PAGE_SIZE > rc) memset(read_data + rc, 0, PAGE_SIZE - rc); diff --git a/fs/smb/client/fscache.h b/fs/smb/client/fscache.h index 84f3b09367d2..a3d73720914f 100644 --- a/fs/smb/client/fscache.h +++ b/fs/smb/client/fscache.h @@ -49,12 +49,12 @@ static inline void cifs_fscache_fill_coherency(struct inode *inode, struct cifs_fscache_inode_coherency_data *cd) { - struct cifsInodeInfo *cifsi = CIFS_I(inode); struct timespec64 ctime = inode_get_ctime(inode); + struct timespec64 mtime = inode_get_mtime(inode); memset(cd, 0, sizeof(*cd)); - cd->last_write_time_sec = cpu_to_le64(cifsi->netfs.inode.i_mtime.tv_sec); - cd->last_write_time_nsec = cpu_to_le32(cifsi->netfs.inode.i_mtime.tv_nsec); + cd->last_write_time_sec = cpu_to_le64(mtime.tv_sec); + cd->last_write_time_nsec = cpu_to_le32(mtime.tv_nsec); cd->last_change_time_sec = cpu_to_le64(ctime.tv_sec); cd->last_change_time_nsec = cpu_to_le32(ctime.tv_nsec); } diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index 6a856945f2b4..09c5c0f5c96e 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -82,6 +82,7 @@ cifs_revalidate_cache(struct inode *inode, struct cifs_fattr *fattr) { struct cifs_fscache_inode_coherency_data cd; struct cifsInodeInfo *cifs_i = CIFS_I(inode); + struct timespec64 mtime; cifs_dbg(FYI, "%s: revalidating inode %llu\n", __func__, cifs_i->uniqueid); @@ -101,7 +102,8 @@ cifs_revalidate_cache(struct inode *inode, struct cifs_fattr *fattr) /* revalidate if mtime or size have changed */ fattr->cf_mtime = timestamp_truncate(fattr->cf_mtime, inode); - if (timespec64_equal(&inode->i_mtime, &fattr->cf_mtime) && + mtime = inode_get_mtime(inode); + if (timespec64_equal(&mtime, &fattr->cf_mtime) && cifs_i->server_eof == fattr->cf_eof) { cifs_dbg(FYI, "%s: inode %llu is unchanged\n", __func__, cifs_i->uniqueid); @@ -164,10 +166,10 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr) fattr->cf_ctime = timestamp_truncate(fattr->cf_ctime, inode); /* we do not want atime to be less than mtime, it broke some apps */ if (timespec64_compare(&fattr->cf_atime, &fattr->cf_mtime) < 0) - inode->i_atime = fattr->cf_mtime; + inode_set_atime_to_ts(inode, fattr->cf_mtime); else - inode->i_atime = fattr->cf_atime; - inode->i_mtime = fattr->cf_mtime; + inode_set_atime_to_ts(inode, fattr->cf_atime); + inode_set_mtime_to_ts(inode, fattr->cf_mtime); inode_set_ctime_to_ts(inode, fattr->cf_ctime); inode->i_rdev = fattr->cf_rdev; cifs_nlink_fattr_to_inode(inode, fattr); @@ -1868,7 +1870,7 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry) when needed */ inode_set_ctime_current(inode); } - dir->i_mtime = inode_set_ctime_current(dir); + inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); cifs_inode = CIFS_I(dir); CIFS_I(dir)->time = 0; /* force revalidate of dir as well */ unlink_out: @@ -2183,7 +2185,7 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry) cifsInode->time = 0; inode_set_ctime_current(d_inode(direntry)); - inode->i_mtime = inode_set_ctime_current(inode); + inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); rmdir_exit: free_dentry_path(page); @@ -2389,9 +2391,6 @@ cifs_rename2(struct mnt_idmap *idmap, struct inode *source_dir, /* force revalidate to go get info when needed */ CIFS_I(source_dir)->time = CIFS_I(target_dir)->time = 0; - source_dir->i_mtime = target_dir->i_mtime = inode_set_ctime_to_ts(source_dir, - inode_set_ctime_current(target_dir)); - cifs_rename_exit: kfree(info_buf_source); free_dentry_path(page2); diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 2187921580ac..e917eeba9c77 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -1409,12 +1409,14 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon, /* Creation time should not need to be updated on close */ if (file_inf.LastWriteTime) - inode->i_mtime = cifs_NTtimeToUnix(file_inf.LastWriteTime); + inode_set_mtime_to_ts(inode, + cifs_NTtimeToUnix(file_inf.LastWriteTime)); if (file_inf.ChangeTime) inode_set_ctime_to_ts(inode, cifs_NTtimeToUnix(file_inf.ChangeTime)); if (file_inf.LastAccessTime) - inode->i_atime = cifs_NTtimeToUnix(file_inf.LastAccessTime); + inode_set_atime_to_ts(inode, + cifs_NTtimeToUnix(file_inf.LastAccessTime)); /* * i_blocks is not related to (i_size / i_blksize), From 9a49874443307ca97d5bc199fa4c5ae1591ff5a0 Mon Sep 17 00:00:00 2001 From: Zizhi Wo Date: Wed, 13 Dec 2023 10:23:53 +0800 Subject: [PATCH 1367/1397] fs: cifs: Fix atime update check [ Upstream commit 01fe654f78fd1ea4df046ef76b07ba92a35f8dbe ] Commit 9b9c5bea0b96 ("cifs: do not return atime less than mtime") indicates that in cifs, if atime is less than mtime, some apps will break. Therefore, it introduce a function to compare this two variables in two places where atime is updated. If atime is less than mtime, update it to mtime. However, the patch was handled incorrectly, resulting in atime and mtime being exactly equal. A previous commit 69738cfdfa70 ("fs: cifs: Fix atime update check vs mtime") fixed one place and forgot to fix another. Fix it. Fixes: 9b9c5bea0b96 ("cifs: do not return atime less than mtime") Cc: stable@vger.kernel.org Signed-off-by: Zizhi Wo Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index cf17e3dd703e..32a8525415d9 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -4671,7 +4671,7 @@ static int cifs_readpage_worker(struct file *file, struct page *page, /* we do not want atime to be less than mtime, it broke some apps */ atime = inode_set_atime_to_ts(inode, current_time(inode)); mtime = inode_get_mtime(inode); - if (timespec64_compare(&atime, &mtime)) + if (timespec64_compare(&atime, &mtime) < 0) inode_set_atime_to_ts(inode, inode_get_mtime(inode)); if (PAGE_SIZE > rc) From 28d6cde17f219133a68d530b575e8725fe17a90f Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Fri, 1 Dec 2023 11:33:03 +0800 Subject: [PATCH 1368/1397] virtio_ring: fix syncs DMA memory with different direction [ Upstream commit 1f475cd572ea77ae6474a17e693a96bca927efe9 ] Now the APIs virtqueue_dma_sync_single_range_for_{cpu,device} ignore the parameter 'dir', that is a mistake. [ 6.101666] ------------[ cut here ]------------ [ 6.102079] DMA-API: virtio-pci 0000:00:04.0: device driver syncs DMA memory with different direction [device address=0x00000000ae010000] [size=32752 bytes] [mapped with DMA_FROM_DEVICE] [synced with DMA_BIDIRECTIONAL] [ 6.103630] WARNING: CPU: 6 PID: 0 at kernel/dma/debug.c:1125 check_sync+0x53e/0x6c0 [ 6.107420] CPU: 6 PID: 0 Comm: swapper/6 Tainted: G E 6.6.0+ #290 [ 6.108030] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 [ 6.108936] RIP: 0010:check_sync+0x53e/0x6c0 [ 6.109289] Code: 24 10 e8 f5 d9 74 00 4c 8b 4c 24 10 4c 8b 44 24 18 48 8b 4c 24 20 48 89 c6 41 56 4c 89 ea 48 c7 c7 b0 f1 50 82 e8 32 fc f3 ff <0f> 0b 48 c7 c7 48 4b 4a 82 e8 74 d9 fc ff 8b 73 4c 48 8d 7b 50 31 [ 6.110750] RSP: 0018:ffffc90000180cd8 EFLAGS: 00010092 [ 6.111178] RAX: 00000000000000ce RBX: ffff888100aa5900 RCX: 0000000000000000 [ 6.111744] RDX: 0000000000000104 RSI: ffffffff824c3208 RDI: 00000000ffffffff [ 6.112316] RBP: ffffc90000180d40 R08: 0000000000000000 R09: 00000000fffeffff [ 6.112893] R10: ffffc90000180b98 R11: ffffffff82f63308 R12: ffffffff83d5af00 [ 6.113460] R13: ffff888100998200 R14: ffffffff824a4b5f R15: 0000000000000286 [ 6.114027] FS: 0000000000000000(0000) GS:ffff88842fd80000(0000) knlGS:0000000000000000 [ 6.114665] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 6.115128] CR2: 00007f10f1e03030 CR3: 0000000108272004 CR4: 0000000000770ee0 [ 6.115701] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 6.116272] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 6.116842] PKRU: 55555554 [ 6.117069] Call Trace: [ 6.117275] [ 6.117452] ? __warn+0x84/0x140 [ 6.117727] ? check_sync+0x53e/0x6c0 [ 6.118034] ? __report_bug+0xea/0x100 [ 6.118353] ? check_sync+0x53e/0x6c0 [ 6.118653] ? report_bug+0x41/0xc0 [ 6.118944] ? handle_bug+0x3c/0x70 [ 6.119237] ? exc_invalid_op+0x18/0x70 [ 6.119551] ? asm_exc_invalid_op+0x1a/0x20 [ 6.119900] ? check_sync+0x53e/0x6c0 [ 6.120199] ? check_sync+0x53e/0x6c0 [ 6.120499] debug_dma_sync_single_for_cpu+0x5c/0x70 [ 6.120906] ? dma_sync_single_for_cpu+0xb7/0x100 [ 6.121291] virtnet_rq_unmap+0x158/0x170 [virtio_net] [ 6.121716] virtnet_receive+0x196/0x220 [virtio_net] [ 6.122135] virtnet_poll+0x48/0x1b0 [virtio_net] [ 6.122524] __napi_poll+0x29/0x1b0 [ 6.123083] net_rx_action+0x282/0x360 [ 6.123612] __do_softirq+0xf3/0x2fb [ 6.124138] __irq_exit_rcu+0x8e/0xf0 [ 6.124663] common_interrupt+0xbc/0xe0 [ 6.125202] We need to enable CONFIG_DMA_API_DEBUG and work with need sync mode(such as swiotlb) to reproduce this warn. Fixes: 8bd2f71054bd ("virtio_ring: introduce dma sync api for virtqueue") Reported-by: "Ning, Hongyu" Closes: https://lore.kernel.org/all/f37cb55a-6fc8-4e21-8789-46d468325eea@linux.intel.com/ Suggested-by: Jason Wang Signed-off-by: Xuan Zhuo Message-Id: <20231201033303.25141-1-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Parav Pandit Acked-by: Jason Wang Tested-by: Hongyu Ning Signed-off-by: Sasha Levin --- drivers/virtio/virtio_ring.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 51d8f3299c10..49299b1f9ec7 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -3219,8 +3219,7 @@ void virtqueue_dma_sync_single_range_for_cpu(struct virtqueue *_vq, if (!vq->use_dma_api) return; - dma_sync_single_range_for_cpu(dev, addr, offset, size, - DMA_BIDIRECTIONAL); + dma_sync_single_range_for_cpu(dev, addr, offset, size, dir); } EXPORT_SYMBOL_GPL(virtqueue_dma_sync_single_range_for_cpu); @@ -3246,8 +3245,7 @@ void virtqueue_dma_sync_single_range_for_device(struct virtqueue *_vq, if (!vq->use_dma_api) return; - dma_sync_single_range_for_device(dev, addr, offset, size, - DMA_BIDIRECTIONAL); + dma_sync_single_range_for_device(dev, addr, offset, size, dir); } EXPORT_SYMBOL_GPL(virtqueue_dma_sync_single_range_for_device); From 78422b744ad90f0f99b8081c10a1da9182964efd Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 23 Oct 2023 13:01:54 +0200 Subject: [PATCH 1369/1397] kexec: fix KEXEC_FILE dependencies [ Upstream commit c1ad12ee0efc07244be37f69311e6f7c4ac98e62 ] The cleanup for the CONFIG_KEXEC Kconfig logic accidentally changed the 'depends on CRYPTO=y' dependency to a plain 'depends on CRYPTO', which causes a link failure when all the crypto support is in a loadable module and kexec_file support is built-in: x86_64-linux-ld: vmlinux.o: in function `__x64_sys_kexec_file_load': (.text+0x32e30a): undefined reference to `crypto_alloc_shash' x86_64-linux-ld: (.text+0x32e58e): undefined reference to `crypto_shash_update' x86_64-linux-ld: (.text+0x32e6ee): undefined reference to `crypto_shash_final' Both s390 and x86 have this problem, while ppc64 and riscv have the correct dependency already. On riscv, the dependency is only used for the purgatory, not for the kexec_file code itself, which may be a bit surprising as it means that with CONFIG_CRYPTO=m, it is possible to enable KEXEC_FILE but then the purgatory code is silently left out. Move this into the common Kconfig.kexec file in a way that is correct everywhere, using the dependency on CRYPTO_SHA256=y only when the purgatory code is available. This requires reversing the dependency between ARCH_SUPPORTS_KEXEC_PURGATORY and KEXEC_FILE, but the effect remains the same, other than making riscv behave like the other ones. On s390, there is an additional dependency on CRYPTO_SHA256_S390, which should technically not be required but gives better performance. Remove this dependency here, noting that it was not present in the initial Kconfig code but was brought in without an explanation in commit 71406883fd357 ("s390/kexec_file: Add kexec_file_load system call"). [arnd@arndb.de: fix riscv build] Link: https://lkml.kernel.org/r/67ddd260-d424-4229-a815-e3fcfb864a77@app.fastmail.com Link: https://lkml.kernel.org/r/20231023110308.1202042-1-arnd@kernel.org Fixes: 6af5138083005 ("x86/kexec: refactor for kernel/Kconfig.kexec") Signed-off-by: Arnd Bergmann Reviewed-by: Eric DeVolder Tested-by: Eric DeVolder Cc: Albert Ou Cc: Alexander Gordeev Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Christian Borntraeger Cc: Christophe Leroy Cc: Conor Dooley Cc: Dave Hansen Cc: David S. Miller Cc: Heiko Carstens Cc: Herbert Xu Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Peter Zijlstra Cc: Sven Schnelle Cc: Thomas Gleixner Cc: Vasily Gorbik Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- arch/powerpc/Kconfig | 4 ++-- arch/riscv/Kconfig | 4 +--- arch/s390/Kconfig | 4 ++-- arch/x86/Kconfig | 4 ++-- kernel/Kconfig.kexec | 1 + 5 files changed, 8 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index d5d5388973ac..4640cee33f12 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -607,10 +607,10 @@ config ARCH_SUPPORTS_KEXEC def_bool PPC_BOOK3S || PPC_E500 || (44x && !SMP) config ARCH_SUPPORTS_KEXEC_FILE - def_bool PPC64 && CRYPTO=y && CRYPTO_SHA256=y + def_bool PPC64 config ARCH_SUPPORTS_KEXEC_PURGATORY - def_bool KEXEC_FILE + def_bool y config ARCH_SELECTS_KEXEC_FILE def_bool y diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 6688cbbed0b4..9e6d442773ee 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -686,9 +686,7 @@ config ARCH_SELECTS_KEXEC_FILE select KEXEC_ELF config ARCH_SUPPORTS_KEXEC_PURGATORY - def_bool KEXEC_FILE - depends on CRYPTO=y - depends on CRYPTO_SHA256=y + def_bool ARCH_SUPPORTS_KEXEC_FILE config ARCH_SUPPORTS_CRASH_DUMP def_bool y diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index ae29e4392664..bd4782f23f66 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -252,13 +252,13 @@ config ARCH_SUPPORTS_KEXEC def_bool y config ARCH_SUPPORTS_KEXEC_FILE - def_bool CRYPTO && CRYPTO_SHA256 && CRYPTO_SHA256_S390 + def_bool y config ARCH_SUPPORTS_KEXEC_SIG def_bool MODULE_SIG_FORMAT config ARCH_SUPPORTS_KEXEC_PURGATORY - def_bool KEXEC_FILE + def_bool y config ARCH_SUPPORTS_CRASH_DUMP def_bool y diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 66bfabae8814..fe3292e310d4 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2034,7 +2034,7 @@ config ARCH_SUPPORTS_KEXEC def_bool y config ARCH_SUPPORTS_KEXEC_FILE - def_bool X86_64 && CRYPTO && CRYPTO_SHA256 + def_bool X86_64 config ARCH_SELECTS_KEXEC_FILE def_bool y @@ -2042,7 +2042,7 @@ config ARCH_SELECTS_KEXEC_FILE select HAVE_IMA_KEXEC if IMA config ARCH_SUPPORTS_KEXEC_PURGATORY - def_bool KEXEC_FILE + def_bool y config ARCH_SUPPORTS_KEXEC_SIG def_bool y diff --git a/kernel/Kconfig.kexec b/kernel/Kconfig.kexec index f9619ac6b71d..d3b8a2b1b573 100644 --- a/kernel/Kconfig.kexec +++ b/kernel/Kconfig.kexec @@ -36,6 +36,7 @@ config KEXEC config KEXEC_FILE bool "Enable kexec file based system call" depends on ARCH_SUPPORTS_KEXEC_FILE + depends on CRYPTO_SHA256=y || !ARCH_SUPPORTS_KEXEC_PURGATORY select KEXEC_CORE help This is new version of kexec system call. This system call is From 7844d7d8d8af0eed290004f5f139bc050e2d98bc Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 23 Oct 2023 13:01:55 +0200 Subject: [PATCH 1370/1397] kexec: select CRYPTO from KEXEC_FILE instead of depending on it [ Upstream commit e63bde3d9417f8318d6dd0d0fafa35ebf307aabd ] All other users of crypto code use 'select' instead of 'depends on', so do the same thing with KEXEC_FILE for consistency. In practice this makes very little difference as kernels with kexec support are very likely to also include some other feature that already selects both crypto and crypto_sha256, but being consistent here helps for usability as well as to avoid potential circular dependencies. This reverts the dependency back to what it was originally before commit 74ca317c26a3f ("kexec: create a new config option CONFIG_KEXEC_FILE for new syscall"), which changed changed it with the comment "This should be safer as "select" is not recursive", but that appears to have been done in error, as "select" is indeed recursive, and there are no other dependencies that prevent CRYPTO_SHA256 from being selected here. Link: https://lkml.kernel.org/r/20231023110308.1202042-2-arnd@kernel.org Fixes: 74ca317c26a3f ("kexec: create a new config option CONFIG_KEXEC_FILE for new syscall") Signed-off-by: Arnd Bergmann Reviewed-by: Eric DeVolder Tested-by: Eric DeVolder Acked-by: Baoquan He Cc: Herbert Xu Cc: "David S. Miller" Cc: Albert Ou Cc: Alexander Gordeev Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Christian Borntraeger Cc: Christophe Leroy Cc: Conor Dooley Cc: Dave Hansen Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Nicholas Piggin Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Peter Zijlstra Cc: Sven Schnelle Cc: Thomas Gleixner Cc: Vasily Gorbik Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- kernel/Kconfig.kexec | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/Kconfig.kexec b/kernel/Kconfig.kexec index d3b8a2b1b573..37e488d5b4fc 100644 --- a/kernel/Kconfig.kexec +++ b/kernel/Kconfig.kexec @@ -36,7 +36,8 @@ config KEXEC config KEXEC_FILE bool "Enable kexec file based system call" depends on ARCH_SUPPORTS_KEXEC_FILE - depends on CRYPTO_SHA256=y || !ARCH_SUPPORTS_KEXEC_PURGATORY + select CRYPTO + select CRYPTO_SHA256 select KEXEC_CORE help This is new version of kexec system call. This system call is From 466e9af1550724e3c47600e26904d740a4cf2a92 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 22 Nov 2023 23:18:11 +0100 Subject: [PATCH 1371/1397] linux/export: Fix alignment for 64-bit ksymtab entries [ Upstream commit f6847807c22f6944c71c981b630b9fff30801e73 ] An alignment of 4 bytes is wrong for 64-bit platforms which don't define CONFIG_HAVE_ARCH_PREL32_RELOCATIONS (which then store 64-bit pointers). Fix their alignment to 8 bytes. Fixes: ddb5cdbafaaa ("kbuild: generate KSYMTAB entries by modpost") Signed-off-by: Helge Deller Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- include/linux/export-internal.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/linux/export-internal.h b/include/linux/export-internal.h index 45fca09b2319..b842aeecef79 100644 --- a/include/linux/export-internal.h +++ b/include/linux/export-internal.h @@ -16,10 +16,13 @@ * and eliminates the need for absolute relocations that require runtime * processing on relocatable kernels. */ +#define __KSYM_ALIGN ".balign 4" #define __KSYM_REF(sym) ".long " #sym "- ." #elif defined(CONFIG_64BIT) +#define __KSYM_ALIGN ".balign 8" #define __KSYM_REF(sym) ".quad " #sym #else +#define __KSYM_ALIGN ".balign 4" #define __KSYM_REF(sym) ".long " #sym #endif @@ -42,7 +45,7 @@ " .asciz \"" ns "\"" "\n" \ " .previous" "\n" \ " .section \"___ksymtab" sec "+" #name "\", \"a\"" "\n" \ - " .balign 4" "\n" \ + __KSYM_ALIGN "\n" \ "__ksymtab_" #name ":" "\n" \ __KSYM_REF(sym) "\n" \ __KSYM_REF(__kstrtab_ ##name) "\n" \ From 183c8972b6a6f85de96c8975689eed92a0af0847 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 28 Dec 2023 11:36:03 +0100 Subject: [PATCH 1372/1397] linux/export: Ensure natural alignment of kcrctab array [ Upstream commit 753547de0daecbdbd1af3618987ddade325d9aaa ] The ___kcrctab section holds an array of 32-bit CRC values. Add a .balign 4 to tell the linker the correct memory alignment. Fixes: f3304ecd7f06 ("linux/export: use inline assembler to populate symbol CRCs") Signed-off-by: Helge Deller Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- include/linux/export-internal.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/export-internal.h b/include/linux/export-internal.h index b842aeecef79..528019477734 100644 --- a/include/linux/export-internal.h +++ b/include/linux/export-internal.h @@ -66,6 +66,7 @@ #define SYMBOL_CRC(sym, crc, sec) \ asm(".section \"___kcrctab" sec "+" #sym "\",\"a\"" "\n" \ + ".balign 4" "\n" \ "__crc_" #sym ":" "\n" \ ".long " #crc "\n" \ ".previous" "\n") From 34c7757aa56109b6533ad746d2168df185dabfab Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 23 Oct 2023 13:44:42 -0700 Subject: [PATCH 1373/1397] mptcp: refactor sndbuf auto-tuning [ Upstream commit 8005184fd1ca6aeb3fea36f4eb9463fc1b90c114 ] The MPTCP protocol account for the data enqueued on all the subflows to the main socket send buffer, while the send buffer auto-tuning algorithm set the main socket send buffer size as the max size among the subflows. That causes bad performances when at least one subflow is sndbuf limited, e.g. due to very high latency, as the MPTCP scheduler can't even fill such buffer. Change the send-buffer auto-tuning algorithm to compute the main socket send buffer size as the sum of all the subflows buffer size. Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20231023-send-net-next-20231023-2-v1-9-9dc60939d371@kernel.org Signed-off-by: Jakub Kicinski Stable-dep-of: 4fd19a307016 ("mptcp: fix inconsistent state on fastopen race") Signed-off-by: Sasha Levin --- net/mptcp/protocol.c | 18 +++++++++++++-- net/mptcp/protocol.h | 54 ++++++++++++++++++++++++++++++++++++++++---- net/mptcp/sockopt.c | 5 +++- net/mptcp/subflow.c | 3 +-- 4 files changed, 70 insertions(+), 10 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index c1527f520dce..44499e49d76e 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -893,6 +893,7 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk) mptcp_sockopt_sync_locked(msk, ssk); mptcp_subflow_joined(msk, ssk); mptcp_stop_tout_timer(sk); + __mptcp_propagate_sndbuf(sk, ssk); return true; } @@ -1079,15 +1080,16 @@ static void mptcp_enter_memory_pressure(struct sock *sk) struct mptcp_sock *msk = mptcp_sk(sk); bool first = true; - sk_stream_moderate_sndbuf(sk); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); if (first) tcp_enter_memory_pressure(ssk); sk_stream_moderate_sndbuf(ssk); + first = false; } + __mptcp_sync_sndbuf(sk); } /* ensure we get enough memory for the frag hdr, beyond some minimal amount of @@ -2452,6 +2454,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, WRITE_ONCE(msk->first, NULL); out: + __mptcp_sync_sndbuf(sk); if (need_push) __mptcp_push_pending(sk, 0); @@ -3223,7 +3226,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, * uses the correct data */ mptcp_copy_inaddrs(nsk, ssk); - mptcp_propagate_sndbuf(nsk, ssk); + __mptcp_propagate_sndbuf(nsk, ssk); mptcp_rcv_space_init(msk, ssk); bh_unlock_sock(nsk); @@ -3401,6 +3404,8 @@ static void mptcp_release_cb(struct sock *sk) __mptcp_set_connected(sk); if (__test_and_clear_bit(MPTCP_ERROR_REPORT, &msk->cb_flags)) __mptcp_error_report(sk); + if (__test_and_clear_bit(MPTCP_SYNC_SNDBUF, &msk->cb_flags)) + __mptcp_sync_sndbuf(sk); } __mptcp_update_rmem(sk); @@ -3445,6 +3450,14 @@ void mptcp_subflow_process_delegated(struct sock *ssk, long status) __set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->cb_flags); mptcp_data_unlock(sk); } + if (status & BIT(MPTCP_DELEGATE_SNDBUF)) { + mptcp_data_lock(sk); + if (!sock_owned_by_user(sk)) + __mptcp_sync_sndbuf(sk); + else + __set_bit(MPTCP_SYNC_SNDBUF, &mptcp_sk(sk)->cb_flags); + mptcp_data_unlock(sk); + } if (status & BIT(MPTCP_DELEGATE_ACK)) schedule_3rdack_retransmission(ssk); } @@ -3529,6 +3542,7 @@ bool mptcp_finish_join(struct sock *ssk) /* active subflow, already present inside the conn_list */ if (!list_empty(&subflow->node)) { mptcp_subflow_joined(msk, ssk); + mptcp_propagate_sndbuf(parent, ssk); return true; } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 3612545fa62e..40866acd91ad 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -123,6 +123,7 @@ #define MPTCP_RETRANSMIT 4 #define MPTCP_FLUSH_JOIN_LIST 5 #define MPTCP_CONNECTED 6 +#define MPTCP_SYNC_SNDBUF 7 struct mptcp_skb_cb { u64 map_seq; @@ -447,6 +448,7 @@ DECLARE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions); #define MPTCP_DELEGATE_SCHEDULED 0 #define MPTCP_DELEGATE_SEND 1 #define MPTCP_DELEGATE_ACK 2 +#define MPTCP_DELEGATE_SNDBUF 3 #define MPTCP_DELEGATE_ACTIONS_MASK (~BIT(MPTCP_DELEGATE_SCHEDULED)) /* MPTCP subflow context */ @@ -520,6 +522,9 @@ struct mptcp_subflow_context { u32 setsockopt_seq; u32 stale_rcv_tstamp; + int cached_sndbuf; /* sndbuf size when last synced with the msk sndbuf, + * protected by the msk socket lock + */ struct sock *tcp_sock; /* tcp sk backpointer */ struct sock *conn; /* parent mptcp_sock */ @@ -762,13 +767,52 @@ static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk) READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt); } -static inline bool mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk) +static inline void __mptcp_sync_sndbuf(struct sock *sk) { - if ((sk->sk_userlocks & SOCK_SNDBUF_LOCK) || ssk->sk_sndbuf <= READ_ONCE(sk->sk_sndbuf)) - return false; + struct mptcp_subflow_context *subflow; + int ssk_sndbuf, new_sndbuf; + + if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) + return; + + new_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[0]; + mptcp_for_each_subflow(mptcp_sk(sk), subflow) { + ssk_sndbuf = READ_ONCE(mptcp_subflow_tcp_sock(subflow)->sk_sndbuf); + + subflow->cached_sndbuf = ssk_sndbuf; + new_sndbuf += ssk_sndbuf; + } + + /* the msk max wmem limit is * tcp wmem[2] */ + WRITE_ONCE(sk->sk_sndbuf, new_sndbuf); +} + +/* The called held both the msk socket and the subflow socket locks, + * possibly under BH + */ +static inline void __mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + + if (READ_ONCE(ssk->sk_sndbuf) != subflow->cached_sndbuf) + __mptcp_sync_sndbuf(sk); +} + +/* the caller held only the subflow socket lock, either in process or + * BH context. Additionally this can be called under the msk data lock, + * so we can't acquire such lock here: let the delegate action acquires + * the needed locks in suitable order. + */ +static inline void mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + + if (likely(READ_ONCE(ssk->sk_sndbuf) == subflow->cached_sndbuf)) + return; - WRITE_ONCE(sk->sk_sndbuf, ssk->sk_sndbuf); - return true; + local_bh_disable(); + mptcp_subflow_delegate(subflow, MPTCP_DELEGATE_SNDBUF); + local_bh_enable(); } static inline void mptcp_write_space(struct sock *sk) diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 7539b9c8c2fb..116e3008231b 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -95,6 +95,7 @@ static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, in case SO_SNDBUFFORCE: ssk->sk_userlocks |= SOCK_SNDBUF_LOCK; WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); + mptcp_subflow_ctx(ssk)->cached_sndbuf = sk->sk_sndbuf; break; case SO_RCVBUF: case SO_RCVBUFFORCE: @@ -1418,8 +1419,10 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) if (sk->sk_userlocks & tx_rx_locks) { ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks; - if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) + if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) { WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); + mptcp_subflow_ctx(ssk)->cached_sndbuf = sk->sk_sndbuf; + } if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); } diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 9c1f8d1d63d2..d8827427ffc8 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -421,6 +421,7 @@ static bool subflow_use_different_dport(struct mptcp_sock *msk, const struct soc void __mptcp_set_connected(struct sock *sk) { + __mptcp_propagate_sndbuf(sk, mptcp_sk(sk)->first); if (sk->sk_state == TCP_SYN_SENT) { inet_sk_state_store(sk, TCP_ESTABLISHED); sk->sk_state_change(sk); @@ -472,7 +473,6 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) return; msk = mptcp_sk(parent); - mptcp_propagate_sndbuf(parent, sk); subflow->rel_write_seq = 1; subflow->conn_finished = 1; subflow->ssn_offset = TCP_SKB_CB(skb)->seq; @@ -1728,7 +1728,6 @@ static void subflow_state_change(struct sock *sk) msk = mptcp_sk(parent); if (subflow_simultaneous_connect(sk)) { - mptcp_propagate_sndbuf(parent, sk); mptcp_do_fallback(sk); mptcp_rcv_space_init(msk, sk); pr_fallback(msk); From 44ee4764c60a784c6da27680968b9ffe0604196a Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 14 Nov 2023 00:16:14 +0100 Subject: [PATCH 1374/1397] mptcp: fix possible NULL pointer dereference on close [ Upstream commit d109a7767273d1706b541c22b83a0323823dfde4 ] After the blamed commit below, the MPTCP release callback can dereference the first subflow pointer via __mptcp_set_connected() and send buffer auto-tuning. Such pointer is always expected to be valid, except at socket destruction time, when the first subflow is deleted and the pointer zeroed. If the connect event is handled by the release callback while the msk socket is finally released, MPTCP hits the following splat: general protection fault, probably for non-canonical address 0xdffffc00000000f2: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000790-0x0000000000000797] CPU: 1 PID: 26719 Comm: syz-executor.2 Not tainted 6.6.0-syzkaller-10102-gff269e2cd5ad #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/09/2023 RIP: 0010:mptcp_subflow_ctx net/mptcp/protocol.h:542 [inline] RIP: 0010:__mptcp_propagate_sndbuf net/mptcp/protocol.h:813 [inline] RIP: 0010:__mptcp_set_connected+0x57/0x3e0 net/mptcp/subflow.c:424 RAX: dffffc0000000000 RBX: 0000000000000000 RCX: ffffffff8a62323c RDX: 00000000000000f2 RSI: ffffffff8a630116 RDI: 0000000000000790 RBP: ffff88803334b100 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000034 R12: ffff88803334b198 R13: ffff888054f0b018 R14: 0000000000000000 R15: ffff88803334b100 FS: 0000000000000000(0000) GS:ffff8880b9900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fbcb4f75198 CR3: 000000006afb5000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: mptcp_release_cb+0xa2c/0xc40 net/mptcp/protocol.c:3405 release_sock+0xba/0x1f0 net/core/sock.c:3537 mptcp_close+0x32/0xf0 net/mptcp/protocol.c:3084 inet_release+0x132/0x270 net/ipv4/af_inet.c:433 inet6_release+0x4f/0x70 net/ipv6/af_inet6.c:485 __sock_release+0xae/0x260 net/socket.c:659 sock_close+0x1c/0x20 net/socket.c:1419 __fput+0x270/0xbb0 fs/file_table.c:394 task_work_run+0x14d/0x240 kernel/task_work.c:180 exit_task_work include/linux/task_work.h:38 [inline] do_exit+0xa92/0x2a20 kernel/exit.c:876 do_group_exit+0xd4/0x2a0 kernel/exit.c:1026 get_signal+0x23ba/0x2790 kernel/signal.c:2900 arch_do_signal_or_restart+0x90/0x7f0 arch/x86/kernel/signal.c:309 exit_to_user_mode_loop kernel/entry/common.c:168 [inline] exit_to_user_mode_prepare+0x11f/0x240 kernel/entry/common.c:204 __syscall_exit_to_user_mode_work kernel/entry/common.c:285 [inline] syscall_exit_to_user_mode+0x1d/0x60 kernel/entry/common.c:296 do_syscall_64+0x4b/0x110 arch/x86/entry/common.c:88 entry_SYSCALL_64_after_hwframe+0x63/0x6b RIP: 0033:0x7fb515e7cae9 Code: Unable to access opcode bytes at 0x7fb515e7cabf. RSP: 002b:00007fb516c560c8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: 000000000000003c RBX: 00007fb515f9c120 RCX: 00007fb515e7cae9 RDX: 0000000000000000 RSI: 0000000020000140 RDI: 0000000000000006 RBP: 00007fb515ec847a R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000000006e R14: 00007fb515f9c120 R15: 00007ffc631eb968 To avoid sparkling unneeded conditionals, address the issue explicitly checking msk->first only in the critical place. Fixes: 8005184fd1ca ("mptcp: refactor sndbuf auto-tuning") Cc: stable@vger.kernel.org Reported-by: Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/454 Reported-by: Eric Dumazet Closes: https://lore.kernel.org/netdev/CANn89iLZUA6S2a=K8GObnS62KK6Jt4B7PsAs7meMFooM8xaTgw@mail.gmail.com/ Signed-off-by: Paolo Abeni Reviewed-by: Eric Dumazet Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts Link: https://lore.kernel.org/r/20231114-upstream-net-20231113-mptcp-misc-fixes-6-7-rc2-v1-2-7b9cd6a7b7f4@kernel.org Signed-off-by: Jakub Kicinski Stable-dep-of: 4fd19a307016 ("mptcp: fix inconsistent state on fastopen race") Signed-off-by: Sasha Levin --- net/mptcp/protocol.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 44499e49d76e..dc030551cac1 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3397,10 +3397,11 @@ static void mptcp_release_cb(struct sock *sk) if (__test_and_clear_bit(MPTCP_CLEAN_UNA, &msk->cb_flags)) __mptcp_clean_una_wakeup(sk); if (unlikely(msk->cb_flags)) { - /* be sure to set the current sk state before tacking actions - * depending on sk_state, that is processing MPTCP_ERROR_REPORT + /* be sure to set the current sk state before taking actions + * depending on sk_state (MPTCP_ERROR_REPORT) + * On sk release avoid actions depending on the first subflow */ - if (__test_and_clear_bit(MPTCP_CONNECTED, &msk->cb_flags)) + if (__test_and_clear_bit(MPTCP_CONNECTED, &msk->cb_flags) && msk->first) __mptcp_set_connected(sk); if (__test_and_clear_bit(MPTCP_ERROR_REPORT, &msk->cb_flags)) __mptcp_error_report(sk); From cf742d0955850ca2e33410dda6afd360d6cd65bc Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 15 Dec 2023 17:04:25 +0100 Subject: [PATCH 1375/1397] mptcp: fix inconsistent state on fastopen race [ Upstream commit 4fd19a30701659af5839b7bd19d1f05f05933ebe ] The netlink PM can race with fastopen self-connect attempts, shutting down the first subflow via: MPTCP_PM_CMD_DEL_ADDR -> mptcp_nl_remove_id_zero_address -> mptcp_pm_nl_rm_subflow_received -> mptcp_close_ssk and transitioning such subflow to FIN_WAIT1 status before the syn-ack packet is processed. The MPTCP code does not react to such state change, leaving the connection in not-fallback status and the subflow handshake uncompleted, triggering the following splat: WARNING: CPU: 0 PID: 10630 at net/mptcp/subflow.c:1405 subflow_data_ready+0x39f/0x690 net/mptcp/subflow.c:1405 Modules linked in: CPU: 0 PID: 10630 Comm: kworker/u4:11 Not tainted 6.6.0-syzkaller-14500-g1c41041124bd #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/09/2023 Workqueue: bat_events batadv_nc_worker RIP: 0010:subflow_data_ready+0x39f/0x690 net/mptcp/subflow.c:1405 Code: 18 89 ee e8 e3 d2 21 f7 40 84 ed 75 1f e8 a9 d7 21 f7 44 89 fe bf 07 00 00 00 e8 0c d3 21 f7 41 83 ff 07 74 07 e8 91 d7 21 f7 <0f> 0b e8 8a d7 21 f7 48 89 df e8 d2 b2 ff ff 31 ff 89 c5 89 c6 e8 RSP: 0018:ffffc90000007448 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff888031efc700 RCX: ffffffff8a65baf4 RDX: ffff888043222140 RSI: ffffffff8a65baff RDI: 0000000000000005 RBP: 0000000000000000 R08: 0000000000000005 R09: 0000000000000007 R10: 000000000000000b R11: 0000000000000000 R12: 1ffff92000000e89 R13: ffff88807a534d80 R14: ffff888021c11a00 R15: 000000000000000b FS: 0000000000000000(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fa19a0ffc81 CR3: 000000007a2db000 CR4: 00000000003506f0 DR0: 000000000000d8dd DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Call Trace: tcp_data_ready+0x14c/0x5b0 net/ipv4/tcp_input.c:5128 tcp_data_queue+0x19c3/0x5190 net/ipv4/tcp_input.c:5208 tcp_rcv_state_process+0x11ef/0x4e10 net/ipv4/tcp_input.c:6844 tcp_v4_do_rcv+0x369/0xa10 net/ipv4/tcp_ipv4.c:1929 tcp_v4_rcv+0x3888/0x3b30 net/ipv4/tcp_ipv4.c:2329 ip_protocol_deliver_rcu+0x9f/0x480 net/ipv4/ip_input.c:205 ip_local_deliver_finish+0x2e4/0x510 net/ipv4/ip_input.c:233 NF_HOOK include/linux/netfilter.h:314 [inline] NF_HOOK include/linux/netfilter.h:308 [inline] ip_local_deliver+0x1b6/0x550 net/ipv4/ip_input.c:254 dst_input include/net/dst.h:461 [inline] ip_rcv_finish+0x1c4/0x2e0 net/ipv4/ip_input.c:449 NF_HOOK include/linux/netfilter.h:314 [inline] NF_HOOK include/linux/netfilter.h:308 [inline] ip_rcv+0xce/0x440 net/ipv4/ip_input.c:569 __netif_receive_skb_one_core+0x115/0x180 net/core/dev.c:5527 __netif_receive_skb+0x1f/0x1b0 net/core/dev.c:5641 process_backlog+0x101/0x6b0 net/core/dev.c:5969 __napi_poll.constprop.0+0xb4/0x540 net/core/dev.c:6531 napi_poll net/core/dev.c:6600 [inline] net_rx_action+0x956/0xe90 net/core/dev.c:6733 __do_softirq+0x21a/0x968 kernel/softirq.c:553 do_softirq kernel/softirq.c:454 [inline] do_softirq+0xaa/0xe0 kernel/softirq.c:441 __local_bh_enable_ip+0xf8/0x120 kernel/softirq.c:381 spin_unlock_bh include/linux/spinlock.h:396 [inline] batadv_nc_purge_paths+0x1ce/0x3c0 net/batman-adv/network-coding.c:471 batadv_nc_worker+0x9b1/0x10e0 net/batman-adv/network-coding.c:722 process_one_work+0x884/0x15c0 kernel/workqueue.c:2630 process_scheduled_works kernel/workqueue.c:2703 [inline] worker_thread+0x8b9/0x1290 kernel/workqueue.c:2784 kthread+0x33c/0x440 kernel/kthread.c:388 ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:242 To address the issue, catch the racing subflow state change and use it to cause the MPTCP fallback. Such fallback is also used to cause the first subflow state propagation to the msk socket via mptcp_set_connected(). After this change, the first subflow can additionally propagate the TCP_FIN_WAIT1 state, so rename the helper accordingly. Finally, if the state propagation is delayed to the msk release callback, the first subflow can change to a different state in between. Cache the relevant target state in a new msk-level field and use such value to update the msk state at release time. Fixes: 1e777f39b4d7 ("mptcp: add MSG_FASTOPEN sendmsg flag support") Cc: stable@vger.kernel.org Reported-by: Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/458 Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/mptcp/protocol.c | 6 +++--- net/mptcp/protocol.h | 9 ++++++--- net/mptcp/subflow.c | 28 +++++++++++++++++----------- 3 files changed, 26 insertions(+), 17 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index dc030551cac1..5c003a0f0fe5 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3397,12 +3397,12 @@ static void mptcp_release_cb(struct sock *sk) if (__test_and_clear_bit(MPTCP_CLEAN_UNA, &msk->cb_flags)) __mptcp_clean_una_wakeup(sk); if (unlikely(msk->cb_flags)) { - /* be sure to set the current sk state before taking actions + /* be sure to sync the msk state before taking actions * depending on sk_state (MPTCP_ERROR_REPORT) * On sk release avoid actions depending on the first subflow */ - if (__test_and_clear_bit(MPTCP_CONNECTED, &msk->cb_flags) && msk->first) - __mptcp_set_connected(sk); + if (__test_and_clear_bit(MPTCP_SYNC_STATE, &msk->cb_flags) && msk->first) + __mptcp_sync_state(sk, msk->pending_state); if (__test_and_clear_bit(MPTCP_ERROR_REPORT, &msk->cb_flags)) __mptcp_error_report(sk); if (__test_and_clear_bit(MPTCP_SYNC_SNDBUF, &msk->cb_flags)) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 40866acd91ad..07c5ac37d092 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -122,7 +122,7 @@ #define MPTCP_ERROR_REPORT 3 #define MPTCP_RETRANSMIT 4 #define MPTCP_FLUSH_JOIN_LIST 5 -#define MPTCP_CONNECTED 6 +#define MPTCP_SYNC_STATE 6 #define MPTCP_SYNC_SNDBUF 7 struct mptcp_skb_cb { @@ -293,6 +293,9 @@ struct mptcp_sock { bool use_64bit_ack; /* Set when we received a 64-bit DSN */ bool csum_enabled; bool allow_infinite_fallback; + u8 pending_state; /* A subflow asked to set this sk_state, + * protected by the msk data lock + */ u8 mpc_endpoint_id; u8 recvmsg_inq:1, cork:1, @@ -711,7 +714,7 @@ void mptcp_get_options(const struct sk_buff *skb, struct mptcp_options_received *mp_opt); void mptcp_finish_connect(struct sock *sk); -void __mptcp_set_connected(struct sock *sk); +void __mptcp_sync_state(struct sock *sk, int state); void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout); static inline void mptcp_stop_tout_timer(struct sock *sk) @@ -1101,7 +1104,7 @@ static inline bool subflow_simultaneous_connect(struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); - return sk->sk_state == TCP_ESTABLISHED && + return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_FIN_WAIT1) && is_active_ssk(subflow) && !subflow->conn_finished; } diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index d8827427ffc8..f3a1e4aa0e5e 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -419,22 +419,28 @@ static bool subflow_use_different_dport(struct mptcp_sock *msk, const struct soc return inet_sk(sk)->inet_dport != inet_sk((struct sock *)msk)->inet_dport; } -void __mptcp_set_connected(struct sock *sk) +void __mptcp_sync_state(struct sock *sk, int state) { - __mptcp_propagate_sndbuf(sk, mptcp_sk(sk)->first); + struct mptcp_sock *msk = mptcp_sk(sk); + + __mptcp_propagate_sndbuf(sk, msk->first); if (sk->sk_state == TCP_SYN_SENT) { - inet_sk_state_store(sk, TCP_ESTABLISHED); + inet_sk_state_store(sk, state); sk->sk_state_change(sk); } } -static void mptcp_set_connected(struct sock *sk) +static void mptcp_propagate_state(struct sock *sk, struct sock *ssk) { + struct mptcp_sock *msk = mptcp_sk(sk); + mptcp_data_lock(sk); - if (!sock_owned_by_user(sk)) - __mptcp_set_connected(sk); - else - __set_bit(MPTCP_CONNECTED, &mptcp_sk(sk)->cb_flags); + if (!sock_owned_by_user(sk)) { + __mptcp_sync_state(sk, ssk->sk_state); + } else { + msk->pending_state = ssk->sk_state; + __set_bit(MPTCP_SYNC_STATE, &msk->cb_flags); + } mptcp_data_unlock(sk); } @@ -496,7 +502,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow_set_remote_key(msk, subflow, &mp_opt); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK); mptcp_finish_connect(sk); - mptcp_set_connected(parent); + mptcp_propagate_state(parent, sk); } else if (subflow->request_join) { u8 hmac[SHA256_DIGEST_SIZE]; @@ -540,7 +546,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) } else if (mptcp_check_fallback(sk)) { fallback: mptcp_rcv_space_init(msk, sk); - mptcp_set_connected(parent); + mptcp_propagate_state(parent, sk); } return; @@ -1732,7 +1738,7 @@ static void subflow_state_change(struct sock *sk) mptcp_rcv_space_init(msk, sk); pr_fallback(msk); subflow->conn_finished = 1; - mptcp_set_connected(parent); + mptcp_propagate_state(parent, sk); } /* as recvmsg() does not acquire the subflow socket for ssk selection From b5f63f5e8a6820a6093b6caf35bb0f7c946c7d72 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 26 Dec 2023 08:15:24 +0000 Subject: [PATCH 1376/1397] block: renumber QUEUE_FLAG_HW_WC [ Upstream commit 02d374f3418df577c850f0cd45c3da9245ead547 ] For the QUEUE_FLAG_HW_WC to actually work, it needs to have a separate number from QUEUE_FLAG_FUA, doh. Fixes: 43c9835b144c ("block: don't allow enabling a cache on devices that don't support it") Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20231226081524.180289-1-hch@lst.de Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index eef450f25982..f59fcd5b499a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -538,7 +538,7 @@ struct request_queue { #define QUEUE_FLAG_ADD_RANDOM 10 /* Contributes to random pool */ #define QUEUE_FLAG_SYNCHRONOUS 11 /* always completes in submit context */ #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ -#define QUEUE_FLAG_HW_WC 18 /* Write back caching supported */ +#define QUEUE_FLAG_HW_WC 13 /* Write back caching supported */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ #define QUEUE_FLAG_STABLE_WRITES 15 /* don't modify blks until WB is done */ #define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */ From 8663b99c38a6f76b7552151e648840af6d137e91 Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Fri, 22 Dec 2023 19:25:43 -0800 Subject: [PATCH 1377/1397] platform/x86/intel/pmc: Add suspend callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 7c13f365aee68b01e7e68ee293a71fdc7571c111 ] Add a suspend callback to struct pmc for performing platform specific tasks before device suspend. This is needed in order to perform GBE LTR ignore on certain platforms at suspend-time instead of at probe-time and replace the GBE LTR ignore removal that was done in order to fix a bug introduced by commit 804951203aa5 ("platform/x86:intel/pmc: Combine core_init() and core_configure()"). Fixes: 804951203aa5 ("platform/x86:intel/pmc: Combine core_init() and core_configure()") Signed-off-by: "David E. Box" Link: https://lore.kernel.org/r/20231223032548.1680738-4-david.e.box@linux.intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen Signed-off-by: Sasha Levin --- drivers/platform/x86/intel/pmc/core.c | 3 +++ drivers/platform/x86/intel/pmc/core.h | 2 ++ 2 files changed, 5 insertions(+) diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c index e95d3011b999..5ab470d87ad7 100644 --- a/drivers/platform/x86/intel/pmc/core.c +++ b/drivers/platform/x86/intel/pmc/core.c @@ -1279,6 +1279,9 @@ static __maybe_unused int pmc_core_suspend(struct device *dev) struct pmc_dev *pmcdev = dev_get_drvdata(dev); struct pmc *pmc = pmcdev->pmcs[PMC_IDX_MAIN]; + if (pmcdev->suspend) + pmcdev->suspend(pmcdev); + /* Check if the syspend will actually use S0ix */ if (pm_suspend_via_firmware()) return 0; diff --git a/drivers/platform/x86/intel/pmc/core.h b/drivers/platform/x86/intel/pmc/core.h index 0729f593c6a7..38d888e3afa6 100644 --- a/drivers/platform/x86/intel/pmc/core.h +++ b/drivers/platform/x86/intel/pmc/core.h @@ -363,6 +363,7 @@ struct pmc { * @s0ix_counter: S0ix residency (step adjusted) * @num_lpm_modes: Count of enabled modes * @lpm_en_modes: Array of enabled modes from lowest to highest priority + * @suspend: Function to perform platform specific suspend * @resume: Function to perform platform specific resume * * pmc_dev contains info about power management controller device. @@ -379,6 +380,7 @@ struct pmc_dev { u64 s0ix_counter; int num_lpm_modes; int lpm_en_modes[LPM_MAX_NUM_MODES]; + void (*suspend)(struct pmc_dev *pmcdev); int (*resume)(struct pmc_dev *pmcdev); bool has_die_c6; From 91dcd5ee1e11c70ea11b1c879c157ee9ff35f38d Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Fri, 22 Dec 2023 19:25:44 -0800 Subject: [PATCH 1378/1397] platform/x86/intel/pmc: Allow reenabling LTRs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6f9cc5c1f94daa98846b2073733d03ced709704b ] Commit 804951203aa5 ("platform/x86:intel/pmc: Combine core_init() and core_configure()") caused a network performance regression due to the GBE LTR ignore that it added during probe. The fix will move the ignore to occur at suspend-time (so as to not affect suspend power). This will require the ability to enable the LTR again on resume. Modify pmc_core_send_ltr_ignore() to allow enabling an LTR. Fixes: 804951203aa5 ("platform/x86:intel/pmc: Combine core_init() and core_configure()") Signed-off-by: "David E. Box" Link: https://lore.kernel.org/r/20231223032548.1680738-5-david.e.box@linux.intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen Signed-off-by: Sasha Levin --- drivers/platform/x86/intel/pmc/adl.c | 2 +- drivers/platform/x86/intel/pmc/cnp.c | 2 +- drivers/platform/x86/intel/pmc/core.c | 9 ++++++--- drivers/platform/x86/intel/pmc/core.h | 2 +- drivers/platform/x86/intel/pmc/mtl.c | 2 +- drivers/platform/x86/intel/pmc/tgl.c | 2 +- 6 files changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/platform/x86/intel/pmc/adl.c b/drivers/platform/x86/intel/pmc/adl.c index 5006008e01be..a887c388cf16 100644 --- a/drivers/platform/x86/intel/pmc/adl.c +++ b/drivers/platform/x86/intel/pmc/adl.c @@ -323,7 +323,7 @@ int adl_core_init(struct pmc_dev *pmcdev) * when a cable is attached. Tell the PMC to ignore it. */ dev_dbg(&pmcdev->pdev->dev, "ignoring GBE LTR\n"); - pmc_core_send_ltr_ignore(pmcdev, 3); + pmc_core_send_ltr_ignore(pmcdev, 3, 1); return 0; } diff --git a/drivers/platform/x86/intel/pmc/cnp.c b/drivers/platform/x86/intel/pmc/cnp.c index 420aaa1d7c76..10498204962c 100644 --- a/drivers/platform/x86/intel/pmc/cnp.c +++ b/drivers/platform/x86/intel/pmc/cnp.c @@ -218,7 +218,7 @@ int cnp_core_init(struct pmc_dev *pmcdev) * when a cable is attached. Tell the PMC to ignore it. */ dev_dbg(&pmcdev->pdev->dev, "ignoring GBE LTR\n"); - pmc_core_send_ltr_ignore(pmcdev, 3); + pmc_core_send_ltr_ignore(pmcdev, 3, 1); return 0; } diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c index 5ab470d87ad7..022afb97d531 100644 --- a/drivers/platform/x86/intel/pmc/core.c +++ b/drivers/platform/x86/intel/pmc/core.c @@ -460,7 +460,7 @@ static int pmc_core_pll_show(struct seq_file *s, void *unused) } DEFINE_SHOW_ATTRIBUTE(pmc_core_pll); -int pmc_core_send_ltr_ignore(struct pmc_dev *pmcdev, u32 value) +int pmc_core_send_ltr_ignore(struct pmc_dev *pmcdev, u32 value, int ignore) { struct pmc *pmc; const struct pmc_reg_map *map; @@ -498,7 +498,10 @@ int pmc_core_send_ltr_ignore(struct pmc_dev *pmcdev, u32 value) mutex_lock(&pmcdev->lock); reg = pmc_core_reg_read(pmc, map->ltr_ignore_offset); - reg |= BIT(ltr_index); + if (ignore) + reg |= BIT(ltr_index); + else + reg &= ~BIT(ltr_index); pmc_core_reg_write(pmc, map->ltr_ignore_offset, reg); mutex_unlock(&pmcdev->lock); @@ -521,7 +524,7 @@ static ssize_t pmc_core_ltr_ignore_write(struct file *file, if (err) return err; - err = pmc_core_send_ltr_ignore(pmcdev, value); + err = pmc_core_send_ltr_ignore(pmcdev, value, 1); return err == 0 ? count : err; } diff --git a/drivers/platform/x86/intel/pmc/core.h b/drivers/platform/x86/intel/pmc/core.h index 38d888e3afa6..71ba7d691d69 100644 --- a/drivers/platform/x86/intel/pmc/core.h +++ b/drivers/platform/x86/intel/pmc/core.h @@ -488,7 +488,7 @@ extern const struct pmc_bit_map *mtl_ioem_lpm_maps[]; extern const struct pmc_reg_map mtl_ioem_reg_map; extern void pmc_core_get_tgl_lpm_reqs(struct platform_device *pdev); -extern int pmc_core_send_ltr_ignore(struct pmc_dev *pmcdev, u32 value); +int pmc_core_send_ltr_ignore(struct pmc_dev *pmcdev, u32 value, int ignore); int pmc_core_resume_common(struct pmc_dev *pmcdev); int get_primary_reg_base(struct pmc *pmc); diff --git a/drivers/platform/x86/intel/pmc/mtl.c b/drivers/platform/x86/intel/pmc/mtl.c index 2204bc666980..71dc11811e11 100644 --- a/drivers/platform/x86/intel/pmc/mtl.c +++ b/drivers/platform/x86/intel/pmc/mtl.c @@ -1006,7 +1006,7 @@ int mtl_core_init(struct pmc_dev *pmcdev) * when a cable is attached. Tell the PMC to ignore it. */ dev_dbg(&pmcdev->pdev->dev, "ignoring GBE LTR\n"); - pmc_core_send_ltr_ignore(pmcdev, 3); + pmc_core_send_ltr_ignore(pmcdev, 3, 1); return 0; } diff --git a/drivers/platform/x86/intel/pmc/tgl.c b/drivers/platform/x86/intel/pmc/tgl.c index 2449940102db..078263db24c7 100644 --- a/drivers/platform/x86/intel/pmc/tgl.c +++ b/drivers/platform/x86/intel/pmc/tgl.c @@ -268,7 +268,7 @@ int tgl_core_init(struct pmc_dev *pmcdev) * when a cable is attached. Tell the PMC to ignore it. */ dev_dbg(&pmcdev->pdev->dev, "ignoring GBE LTR\n"); - pmc_core_send_ltr_ignore(pmcdev, 3); + pmc_core_send_ltr_ignore(pmcdev, 3, 1); return 0; } From 33fd5fb1258b32976f2d83f46a59832dde784831 Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Fri, 22 Dec 2023 19:25:45 -0800 Subject: [PATCH 1379/1397] platform/x86/intel/pmc: Move GBE LTR ignore to suspend callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 70681aa0746ae61d7668b9f651221fad5e30c71e ] Commit 804951203aa5 ("platform/x86:intel/pmc: Combine core_init() and core_configure()") caused a network performance regression due to the GBE LTR ignore that it added at probe. This was needed in order to allow the SoC to enter the deepest Package C state. To fix the regression and at least support PC10 during suspend, move the LTR ignore from probe to the suspend callback, and enable it again on resume. This solution will allow PC10 during suspend but restrict Package C entry at runtime to no deeper than PC8/9 while a network cable it attach to the PCH LAN. Fixes: 804951203aa5 ("platform/x86:intel/pmc: Combine core_init() and core_configure()") Signed-off-by: "David E. Box" Link: https://lore.kernel.org/r/20231223032548.1680738-6-david.e.box@linux.intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen Signed-off-by: Sasha Levin --- drivers/platform/x86/intel/pmc/adl.c | 9 +++------ drivers/platform/x86/intel/pmc/cnp.c | 26 ++++++++++++++++++++------ drivers/platform/x86/intel/pmc/core.h | 3 +++ drivers/platform/x86/intel/pmc/mtl.c | 9 +++------ drivers/platform/x86/intel/pmc/tgl.c | 9 ++++----- 5 files changed, 33 insertions(+), 23 deletions(-) diff --git a/drivers/platform/x86/intel/pmc/adl.c b/drivers/platform/x86/intel/pmc/adl.c index a887c388cf16..606f7678bcb0 100644 --- a/drivers/platform/x86/intel/pmc/adl.c +++ b/drivers/platform/x86/intel/pmc/adl.c @@ -314,16 +314,13 @@ int adl_core_init(struct pmc_dev *pmcdev) struct pmc *pmc = pmcdev->pmcs[PMC_IDX_MAIN]; int ret; + pmcdev->suspend = cnl_suspend; + pmcdev->resume = cnl_resume; + pmc->map = &adl_reg_map; ret = get_primary_reg_base(pmc); if (ret) return ret; - /* Due to a hardware limitation, the GBE LTR blocks PC10 - * when a cable is attached. Tell the PMC to ignore it. - */ - dev_dbg(&pmcdev->pdev->dev, "ignoring GBE LTR\n"); - pmc_core_send_ltr_ignore(pmcdev, 3, 1); - return 0; } diff --git a/drivers/platform/x86/intel/pmc/cnp.c b/drivers/platform/x86/intel/pmc/cnp.c index 10498204962c..98b36651201a 100644 --- a/drivers/platform/x86/intel/pmc/cnp.c +++ b/drivers/platform/x86/intel/pmc/cnp.c @@ -204,21 +204,35 @@ const struct pmc_reg_map cnp_reg_map = { .etr3_offset = ETR3_OFFSET, }; +void cnl_suspend(struct pmc_dev *pmcdev) +{ + /* + * Due to a hardware limitation, the GBE LTR blocks PC10 + * when a cable is attached. To unblock PC10 during suspend, + * tell the PMC to ignore it. + */ + pmc_core_send_ltr_ignore(pmcdev, 3, 1); +} + +int cnl_resume(struct pmc_dev *pmcdev) +{ + pmc_core_send_ltr_ignore(pmcdev, 3, 0); + + return pmc_core_resume_common(pmcdev); +} + int cnp_core_init(struct pmc_dev *pmcdev) { struct pmc *pmc = pmcdev->pmcs[PMC_IDX_MAIN]; int ret; + pmcdev->suspend = cnl_suspend; + pmcdev->resume = cnl_resume; + pmc->map = &cnp_reg_map; ret = get_primary_reg_base(pmc); if (ret) return ret; - /* Due to a hardware limitation, the GBE LTR blocks PC10 - * when a cable is attached. Tell the PMC to ignore it. - */ - dev_dbg(&pmcdev->pdev->dev, "ignoring GBE LTR\n"); - pmc_core_send_ltr_ignore(pmcdev, 3, 1); - return 0; } diff --git a/drivers/platform/x86/intel/pmc/core.h b/drivers/platform/x86/intel/pmc/core.h index 71ba7d691d69..b66dacbfb94b 100644 --- a/drivers/platform/x86/intel/pmc/core.h +++ b/drivers/platform/x86/intel/pmc/core.h @@ -502,6 +502,9 @@ int tgl_core_init(struct pmc_dev *pmcdev); int adl_core_init(struct pmc_dev *pmcdev); int mtl_core_init(struct pmc_dev *pmcdev); +void cnl_suspend(struct pmc_dev *pmcdev); +int cnl_resume(struct pmc_dev *pmcdev); + #define pmc_for_each_mode(i, mode, pmcdev) \ for (i = 0, mode = pmcdev->lpm_en_modes[i]; \ i < pmcdev->num_lpm_modes; \ diff --git a/drivers/platform/x86/intel/pmc/mtl.c b/drivers/platform/x86/intel/pmc/mtl.c index 71dc11811e11..504e3e273c32 100644 --- a/drivers/platform/x86/intel/pmc/mtl.c +++ b/drivers/platform/x86/intel/pmc/mtl.c @@ -979,6 +979,8 @@ static void mtl_d3_fixup(void) static int mtl_resume(struct pmc_dev *pmcdev) { mtl_d3_fixup(); + pmc_core_send_ltr_ignore(pmcdev, 3, 0); + return pmc_core_resume_common(pmcdev); } @@ -989,6 +991,7 @@ int mtl_core_init(struct pmc_dev *pmcdev) mtl_d3_fixup(); + pmcdev->suspend = cnl_suspend; pmcdev->resume = mtl_resume; pmcdev->regmap_list = mtl_pmc_info_list; @@ -1002,11 +1005,5 @@ int mtl_core_init(struct pmc_dev *pmcdev) return ret; } - /* Due to a hardware limitation, the GBE LTR blocks PC10 - * when a cable is attached. Tell the PMC to ignore it. - */ - dev_dbg(&pmcdev->pdev->dev, "ignoring GBE LTR\n"); - pmc_core_send_ltr_ignore(pmcdev, 3, 1); - return 0; } diff --git a/drivers/platform/x86/intel/pmc/tgl.c b/drivers/platform/x86/intel/pmc/tgl.c index 078263db24c7..e88d3d00c853 100644 --- a/drivers/platform/x86/intel/pmc/tgl.c +++ b/drivers/platform/x86/intel/pmc/tgl.c @@ -259,16 +259,15 @@ int tgl_core_init(struct pmc_dev *pmcdev) int ret; pmc->map = &tgl_reg_map; + + pmcdev->suspend = cnl_suspend; + pmcdev->resume = cnl_resume; + ret = get_primary_reg_base(pmc); if (ret) return ret; pmc_core_get_tgl_lpm_reqs(pmcdev->pdev); - /* Due to a hardware limitation, the GBE LTR blocks PC10 - * when a cable is attached. Tell the PMC to ignore it. - */ - dev_dbg(&pmcdev->pdev->dev, "ignoring GBE LTR\n"); - pmc_core_send_ltr_ignore(pmcdev, 3, 1); return 0; } From 7d5f219f1ef69f27eb8cbfb794d634fc9c4d24ac Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 20 Dec 2023 15:52:11 +0900 Subject: [PATCH 1380/1397] ksmbd: fix slab-out-of-bounds in smb_strndup_from_utf16() commit d10c77873ba1e9e6b91905018e29e196fd5f863d upstream. If ->NameOffset/Length is bigger than ->CreateContextsOffset/Length, ksmbd_check_message doesn't validate request buffer it correctly. So slab-out-of-bounds warning from calling smb_strndup_from_utf16() in smb2_open() could happen. If ->NameLength is non-zero, Set the larger of the two sums (Name and CreateContext size) as the offset and length of the data area. Reported-by: Yang Chaoming Cc: stable@vger.kernel.org Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/smb2misc.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/smb/server/smb2misc.c b/fs/smb/server/smb2misc.c index 23bd3d1209df..03dded29a980 100644 --- a/fs/smb/server/smb2misc.c +++ b/fs/smb/server/smb2misc.c @@ -106,16 +106,25 @@ static int smb2_get_data_area_len(unsigned int *off, unsigned int *len, break; case SMB2_CREATE: { + unsigned short int name_off = + le16_to_cpu(((struct smb2_create_req *)hdr)->NameOffset); + unsigned short int name_len = + le16_to_cpu(((struct smb2_create_req *)hdr)->NameLength); + if (((struct smb2_create_req *)hdr)->CreateContextsLength) { *off = le32_to_cpu(((struct smb2_create_req *) hdr)->CreateContextsOffset); *len = le32_to_cpu(((struct smb2_create_req *) hdr)->CreateContextsLength); - break; + if (!name_len) + break; + + if (name_off + name_len < (u64)*off + *len) + break; } - *off = le16_to_cpu(((struct smb2_create_req *)hdr)->NameOffset); - *len = le16_to_cpu(((struct smb2_create_req *)hdr)->NameLength); + *off = name_off; + *len = name_len; break; } case SMB2_QUERY_INFO: From 11d41d01c088ff1bd2d3de4762117385b657bb44 Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Fri, 29 Dec 2023 15:39:11 +0900 Subject: [PATCH 1381/1397] platform/x86: p2sb: Allow p2sb_bar() calls during PCI device probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b28ff7a7c3245d7f62acc20f15b4361292fe4117 upstream. p2sb_bar() unhides P2SB device to get resources from the device. It guards the operation by locking pci_rescan_remove_lock so that parallel rescans do not find the P2SB device. However, this lock causes deadlock when PCI bus rescan is triggered by /sys/bus/pci/rescan. The rescan locks pci_rescan_remove_lock and probes PCI devices. When PCI devices call p2sb_bar() during probe, it locks pci_rescan_remove_lock again. Hence the deadlock. To avoid the deadlock, do not lock pci_rescan_remove_lock in p2sb_bar(). Instead, do the lock at fs_initcall. Introduce p2sb_cache_resources() for fs_initcall which gets and caches the P2SB resources. At p2sb_bar(), refer the cache and return to the caller. Suggested-by: Andy Shevchenko Fixes: 9745fb07474f ("platform/x86/intel: Add Primary to Sideband (P2SB) bridge support") Cc: stable@vger.kernel.org Signed-off-by: Shin'ichiro Kawasaki Reviewed-by: Andy Shevchenko Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/linux-pci/6xb24fjmptxxn5js2fjrrddjae6twex5bjaftwqsuawuqqqydx@7cl3uik5ef6j/ Link: https://lore.kernel.org/r/20231229063912.2517922-2-shinichiro.kawasaki@wdc.com Signed-off-by: Ilpo Järvinen Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/p2sb.c | 172 +++++++++++++++++++++++++++--------- 1 file changed, 131 insertions(+), 41 deletions(-) diff --git a/drivers/platform/x86/p2sb.c b/drivers/platform/x86/p2sb.c index 1cf2471d54dd..fcf1ce8bbdc5 100644 --- a/drivers/platform/x86/p2sb.c +++ b/drivers/platform/x86/p2sb.c @@ -26,6 +26,21 @@ static const struct x86_cpu_id p2sb_cpu_ids[] = { {} }; +/* + * Cache BAR0 of P2SB device functions 0 to 7. + * TODO: The constant 8 is the number of functions that PCI specification + * defines. Same definitions exist tree-wide. Unify this definition and + * the other definitions then move to include/uapi/linux/pci.h. + */ +#define NR_P2SB_RES_CACHE 8 + +struct p2sb_res_cache { + u32 bus_dev_id; + struct resource res; +}; + +static struct p2sb_res_cache p2sb_resources[NR_P2SB_RES_CACHE]; + static int p2sb_get_devfn(unsigned int *devfn) { unsigned int fn = P2SB_DEVFN_DEFAULT; @@ -39,8 +54,16 @@ static int p2sb_get_devfn(unsigned int *devfn) return 0; } +static bool p2sb_valid_resource(struct resource *res) +{ + if (res->flags) + return true; + + return false; +} + /* Copy resource from the first BAR of the device in question */ -static int p2sb_read_bar0(struct pci_dev *pdev, struct resource *mem) +static void p2sb_read_bar0(struct pci_dev *pdev, struct resource *mem) { struct resource *bar0 = &pdev->resource[0]; @@ -56,47 +79,64 @@ static int p2sb_read_bar0(struct pci_dev *pdev, struct resource *mem) mem->end = bar0->end; mem->flags = bar0->flags; mem->desc = bar0->desc; - - return 0; } -static int p2sb_scan_and_read(struct pci_bus *bus, unsigned int devfn, struct resource *mem) +static void p2sb_scan_and_cache_devfn(struct pci_bus *bus, unsigned int devfn) { + struct p2sb_res_cache *cache = &p2sb_resources[PCI_FUNC(devfn)]; struct pci_dev *pdev; - int ret; pdev = pci_scan_single_device(bus, devfn); if (!pdev) - return -ENODEV; + return; - ret = p2sb_read_bar0(pdev, mem); + p2sb_read_bar0(pdev, &cache->res); + cache->bus_dev_id = bus->dev.id; pci_stop_and_remove_bus_device(pdev); - return ret; + return; } -/** - * p2sb_bar - Get Primary to Sideband (P2SB) bridge device BAR - * @bus: PCI bus to communicate with - * @devfn: PCI slot and function to communicate with - * @mem: memory resource to be filled in - * - * The BIOS prevents the P2SB device from being enumerated by the PCI - * subsystem, so we need to unhide and hide it back to lookup the BAR. - * - * if @bus is NULL, the bus 0 in domain 0 will be used. - * If @devfn is 0, it will be replaced by devfn of the P2SB device. - * - * Caller must provide a valid pointer to @mem. - * - * Locking is handled by pci_rescan_remove_lock mutex. - * - * Return: - * 0 on success or appropriate errno value on error. - */ -int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem) +static int p2sb_scan_and_cache(struct pci_bus *bus, unsigned int devfn) +{ + unsigned int slot, fn; + + if (PCI_FUNC(devfn) == 0) { + /* + * When function number of the P2SB device is zero, scan it and + * other function numbers, and if devices are available, cache + * their BAR0s. + */ + slot = PCI_SLOT(devfn); + for (fn = 0; fn < NR_P2SB_RES_CACHE; fn++) + p2sb_scan_and_cache_devfn(bus, PCI_DEVFN(slot, fn)); + } else { + /* Scan the P2SB device and cache its BAR0 */ + p2sb_scan_and_cache_devfn(bus, devfn); + } + + if (!p2sb_valid_resource(&p2sb_resources[PCI_FUNC(devfn)].res)) + return -ENOENT; + + return 0; +} + +static struct pci_bus *p2sb_get_bus(struct pci_bus *bus) +{ + static struct pci_bus *p2sb_bus; + + bus = bus ?: p2sb_bus; + if (bus) + return bus; + + /* Assume P2SB is on the bus 0 in domain 0 */ + p2sb_bus = pci_find_bus(0, 0); + return p2sb_bus; +} + +static int p2sb_cache_resources(void) { - struct pci_dev *pdev_p2sb; + struct pci_bus *bus; unsigned int devfn_p2sb; u32 value = P2SBC_HIDE; int ret; @@ -106,8 +146,9 @@ int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem) if (ret) return ret; - /* if @bus is NULL, use bus 0 in domain 0 */ - bus = bus ?: pci_find_bus(0, 0); + bus = p2sb_get_bus(NULL); + if (!bus) + return -ENODEV; /* * Prevent concurrent PCI bus scan from seeing the P2SB device and @@ -115,17 +156,16 @@ int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem) */ pci_lock_rescan_remove(); - /* Unhide the P2SB device, if needed */ + /* + * The BIOS prevents the P2SB device from being enumerated by the PCI + * subsystem, so we need to unhide and hide it back to lookup the BAR. + * Unhide the P2SB device here, if needed. + */ pci_bus_read_config_dword(bus, devfn_p2sb, P2SBC, &value); if (value & P2SBC_HIDE) pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, 0); - pdev_p2sb = pci_scan_single_device(bus, devfn_p2sb); - if (devfn) - ret = p2sb_scan_and_read(bus, devfn, mem); - else - ret = p2sb_read_bar0(pdev_p2sb, mem); - pci_stop_and_remove_bus_device(pdev_p2sb); + ret = p2sb_scan_and_cache(bus, devfn_p2sb); /* Hide the P2SB device, if it was hidden */ if (value & P2SBC_HIDE) @@ -133,12 +173,62 @@ int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem) pci_unlock_rescan_remove(); - if (ret) - return ret; + return ret; +} + +/** + * p2sb_bar - Get Primary to Sideband (P2SB) bridge device BAR + * @bus: PCI bus to communicate with + * @devfn: PCI slot and function to communicate with + * @mem: memory resource to be filled in + * + * If @bus is NULL, the bus 0 in domain 0 will be used. + * If @devfn is 0, it will be replaced by devfn of the P2SB device. + * + * Caller must provide a valid pointer to @mem. + * + * Return: + * 0 on success or appropriate errno value on error. + */ +int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem) +{ + struct p2sb_res_cache *cache; + int ret; - if (mem->flags == 0) + bus = p2sb_get_bus(bus); + if (!bus) return -ENODEV; + if (!devfn) { + ret = p2sb_get_devfn(&devfn); + if (ret) + return ret; + } + + cache = &p2sb_resources[PCI_FUNC(devfn)]; + if (cache->bus_dev_id != bus->dev.id) + return -ENODEV; + + if (!p2sb_valid_resource(&cache->res)) + return -ENOENT; + + memcpy(mem, &cache->res, sizeof(*mem)); return 0; } EXPORT_SYMBOL_GPL(p2sb_bar); + +static int __init p2sb_fs_init(void) +{ + p2sb_cache_resources(); + return 0; +} + +/* + * pci_rescan_remove_lock to avoid access to unhidden P2SB devices can + * not be locked in sysfs pci bus rescan path because of deadlock. To + * avoid the deadlock, access to P2SB devices with the lock at an early + * step in kernel initialization and cache required resources. This + * should happen after subsys_initcall which initializes PCI subsystem + * and before device_initcall which requires P2SB resources. + */ +fs_initcall(p2sb_fs_init); From 2c30b8b105d690b5bb69aab31f198e2385be9b03 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Wed, 13 Dec 2023 12:50:57 -0800 Subject: [PATCH 1382/1397] maple_tree: do not preallocate nodes for slot stores commit 4249f13c11be8b8b7bf93204185e150c3bdc968d upstream. mas_preallocate() defaults to requesting 1 node for preallocation and then ,depending on the type of store, will update the request variable. There isn't a check for a slot store type, so slot stores are preallocating the default 1 node. Slot stores do not require any additional nodes, so add a check for the slot store case that will bypass node_count_gfp(). Update the tests to reflect that slot stores do not require allocations. User visible effects of this bug include increased memory usage from the unneeded node that was allocated. Link: https://lkml.kernel.org/r/20231213205058.386589-1-sidhartha.kumar@oracle.com Fixes: 0b8bb544b1a7 ("maple_tree: update mas_preallocate() testing") Signed-off-by: Sidhartha Kumar Cc: Liam R. Howlett Cc: Matthew Wilcox (Oracle) Cc: Peng Zhang Cc: [6.6+] Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- lib/maple_tree.c | 11 +++++++++++ tools/testing/radix-tree/maple.c | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index bb24d84a4922..684689457d77 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -5501,6 +5501,17 @@ int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp) mas_wr_end_piv(&wr_mas); node_size = mas_wr_new_end(&wr_mas); + + /* Slot store, does not require additional nodes */ + if (node_size == wr_mas.node_end) { + /* reuse node */ + if (!mt_in_rcu(mas->tree)) + return 0; + /* shifting boundary */ + if (wr_mas.offset_end - mas->offset == 1) + return 0; + } + if (node_size >= mt_slots[wr_mas.type]) { /* Split, worst case for now. */ request = 1 + mas_mt_height(mas) * 2; diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c index e5da1cad70ba..76a8990bb14e 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -35538,7 +35538,7 @@ static noinline void __init check_prealloc(struct maple_tree *mt) MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); - MT_BUG_ON(mt, allocated != 1); + MT_BUG_ON(mt, allocated != 0); mas_store_prealloc(&mas, ptr); MT_BUG_ON(mt, mas_allocated(&mas) != 0); From 09141f08fdf69a3c4ec58fc53904e5f8f7c680b0 Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Thu, 14 Dec 2023 15:19:30 +0500 Subject: [PATCH 1383/1397] selftests: secretmem: floor the memory size to the multiple of page_size commit 0aac13add26d546ac74c89d2883b3a5f0fbea039 upstream. The "locked-in-memory size" limit per process can be non-multiple of page_size. The mmap() fails if we try to allocate locked-in-memory with same size as the allowed limit if it isn't multiple of the page_size because mmap() rounds off the memory size to be allocated to next multiple of page_size. Fix this by flooring the length to be allocated with mmap() to the previous multiple of the page_size. This was getting triggered on KernelCI regularly because of different ulimit settings which wasn't multiple of the page_size. Find logs here: https://linux.kernelci.org/test/plan/id/657654bd8e81e654fae13532/ The bug in was present from the time test was first added. Link: https://lkml.kernel.org/r/20231214101931.1155586-1-usama.anjum@collabora.com Fixes: 76fe17ef588a ("secretmem: test: add basic selftest for memfd_secret(2)") Signed-off-by: Muhammad Usama Anjum Reported-by: "kernelci.org bot" Closes: https://linux.kernelci.org/test/plan/id/657654bd8e81e654fae13532/ Cc: "James E.J. Bottomley" Cc: Mike Rapoport (IBM) Cc: Shuah Khan Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/mm/memfd_secret.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/mm/memfd_secret.c b/tools/testing/selftests/mm/memfd_secret.c index 957b9e18c729..9b298f6a04b3 100644 --- a/tools/testing/selftests/mm/memfd_secret.c +++ b/tools/testing/selftests/mm/memfd_secret.c @@ -62,6 +62,9 @@ static void test_mlock_limit(int fd) char *mem; len = mlock_limit_cur; + if (len % page_size != 0) + len = (len/page_size) * page_size; + mem = mmap(NULL, len, prot, mode, fd, 0); if (mem == MAP_FAILED) { fail("unable to mmap secret memory\n"); From d16eb52c176ccfd9ba29d0e5830a271ecded5290 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Wed, 13 Dec 2023 14:23:24 +0800 Subject: [PATCH 1384/1397] mm/filemap: avoid buffered read/write race to read inconsistent data commit e2c27b803bb664748e090d99042ac128b3f88d92 upstream. The following concurrency may cause the data read to be inconsistent with the data on disk: cpu1 cpu2 ------------------------------|------------------------------ // Buffered write 2048 from 0 ext4_buffered_write_iter generic_perform_write copy_page_from_iter_atomic ext4_da_write_end ext4_da_do_write_end block_write_end __block_commit_write folio_mark_uptodate // Buffered read 4096 from 0 smp_wmb() ext4_file_read_iter set_bit(PG_uptodate, folio_flags) generic_file_read_iter i_size_write // 2048 filemap_read unlock_page(page) filemap_get_pages filemap_get_read_batch folio_test_uptodate(folio) ret = test_bit(PG_uptodate, folio_flags) if (ret) smp_rmb(); // Ensure that the data in page 0-2048 is up-to-date. // New buffered write 2048 from 2048 ext4_buffered_write_iter generic_perform_write copy_page_from_iter_atomic ext4_da_write_end ext4_da_do_write_end block_write_end __block_commit_write folio_mark_uptodate smp_wmb() set_bit(PG_uptodate, folio_flags) i_size_write // 4096 unlock_page(page) isize = i_size_read(inode) // 4096 // Read the latest isize 4096, but without smp_rmb(), there may be // Load-Load disorder resulting in the data in the 2048-4096 range // in the page is not up-to-date. copy_page_to_iter // copyout 4096 In the concurrency above, we read the updated i_size, but there is no read barrier to ensure that the data in the page is the same as the i_size at this point, so we may copy the unsynchronized page out. Hence adding the missing read memory barrier to fix this. This is a Load-Load reordering issue, which only occurs on some weak mem-ordering architectures (e.g. ARM64, ALPHA), but not on strong mem-ordering architectures (e.g. X86). And theoretically the problem doesn't only happen on ext4, filesystems that call filemap_read() but don't hold inode lock (e.g. btrfs, f2fs, ubifs ...) will have this problem, while filesystems with inode lock (e.g. xfs, nfs) won't have this problem. Link: https://lkml.kernel.org/r/20231213062324.739009-1-libaokun1@huawei.com Signed-off-by: Baokun Li Reviewed-by: Jan Kara Cc: Andreas Dilger Cc: Christoph Hellwig Cc: Dave Chinner Cc: Matthew Wilcox (Oracle) Cc: Ritesh Harjani (IBM) Cc: Theodore Ts'o Cc: yangerkun Cc: Yu Kuai Cc: Zhang Yi Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/filemap.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/mm/filemap.c b/mm/filemap.c index d40a20c9d59f..b1ef7be1205b 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2666,6 +2666,15 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter, goto put_folios; end_offset = min_t(loff_t, isize, iocb->ki_pos + iter->count); + /* + * Pairs with a barrier in + * block_write_end()->mark_buffer_dirty() or other page + * dirtying routines like iomap_write_end() to ensure + * changes to page contents are visible before we see + * increased inode size. + */ + smp_rmb(); + /* * Once we start copying data, we don't want to be touching any * cachelines that might be contended: From 07550b1461d4d0499165e7d6f7718cfd0e440427 Mon Sep 17 00:00:00 2001 From: Charan Teja Kalla Date: Thu, 14 Dec 2023 04:58:41 +0000 Subject: [PATCH 1385/1397] mm: migrate high-order folios in swap cache correctly commit fc346d0a70a13d52fe1c4bc49516d83a42cd7c4c upstream. Large folios occupy N consecutive entries in the swap cache instead of using multi-index entries like the page cache. However, if a large folio is re-added to the LRU list, it can be migrated. The migration code was not aware of the difference between the swap cache and the page cache and assumed that a single xas_store() would be sufficient. This leaves potentially many stale pointers to the now-migrated folio in the swap cache, which can lead to almost arbitrary data corruption in the future. This can also manifest as infinite loops with the RCU read lock held. [willy@infradead.org: modifications to the changelog & tweaked the fix] Fixes: 3417013e0d18 ("mm/migrate: Add folio_migrate_mapping()") Link: https://lkml.kernel.org/r/20231214045841.961776-1-willy@infradead.org Signed-off-by: Charan Teja Kalla Signed-off-by: Matthew Wilcox (Oracle) Reported-by: Charan Teja Kalla Closes: https://lkml.kernel.org/r/1700569840-17327-1-git-send-email-quic_charante@quicinc.com Cc: David Hildenbrand Cc: Johannes Weiner Cc: Kirill A. Shutemov Cc: Naoya Horiguchi Cc: Shakeel Butt Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/migrate.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/mm/migrate.c b/mm/migrate.c index 06086dc9da28..03bc2063ac87 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -405,6 +405,7 @@ int folio_migrate_mapping(struct address_space *mapping, int dirty; int expected_count = folio_expected_refs(mapping, folio) + extra_count; long nr = folio_nr_pages(folio); + long entries, i; if (!mapping) { /* Anonymous page without mapping */ @@ -442,8 +443,10 @@ int folio_migrate_mapping(struct address_space *mapping, folio_set_swapcache(newfolio); newfolio->private = folio_get_private(folio); } + entries = nr; } else { VM_BUG_ON_FOLIO(folio_test_swapcache(folio), folio); + entries = 1; } /* Move dirty while page refs frozen and newpage not yet exposed */ @@ -453,7 +456,11 @@ int folio_migrate_mapping(struct address_space *mapping, folio_set_dirty(newfolio); } - xas_store(&xas, newfolio); + /* Swap cache still stores N entries instead of a high-order entry */ + for (i = 0; i < entries; i++) { + xas_store(&xas, newfolio); + xas_next(&xas); + } /* * Drop cache reference from old page by unfreezing From 8c7da70d9ae4c1abdf62d91d0aa28feee85c7f1b Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 18 Dec 2023 13:58:37 +0000 Subject: [PATCH 1386/1397] mm/memory-failure: cast index to loff_t before shifting it commit 39ebd6dce62d8cfe3864e16148927a139f11bc9a upstream. On 32-bit systems, we'll lose the top bits of index because arithmetic will be performed in unsigned long instead of unsigned long long. This affects files over 4GB in size. Link: https://lkml.kernel.org/r/20231218135837.3310403-4-willy@infradead.org Fixes: 6100e34b2526 ("mm, memory_failure: Teach memory_failure() about dev_pagemap pages") Signed-off-by: Matthew Wilcox (Oracle) Cc: Dan Williams Cc: Naoya Horiguchi Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/memory-failure.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 4d6e43c88489..f2397a8cd6de 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1705,7 +1705,7 @@ static void unmap_and_kill(struct list_head *to_kill, unsigned long pfn, * mapping being torn down is communicated in siginfo, see * kill_proc() */ - loff_t start = (index << PAGE_SHIFT) & ~(size - 1); + loff_t start = ((loff_t)index << PAGE_SHIFT) & ~(size - 1); unmap_mapping_range(mapping, start, size, 0); } From d16c5d215b53b395df51668fc8387dd618867814 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 18 Dec 2023 13:58:36 +0000 Subject: [PATCH 1387/1397] mm/memory-failure: check the mapcount of the precise page commit c79c5a0a00a9457718056b588f312baadf44e471 upstream. A process may map only some of the pages in a folio, and might be missed if it maps the poisoned page but not the head page. Or it might be unnecessarily hit if it maps the head page, but not the poisoned page. Link: https://lkml.kernel.org/r/20231218135837.3310403-3-willy@infradead.org Fixes: 7af446a841a2 ("HWPOISON, hugetlb: enable error handling path for hugepage") Signed-off-by: Matthew Wilcox (Oracle) Cc: Dan Williams Cc: Naoya Horiguchi Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/memory-failure.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index f2397a8cd6de..16e002e08cf8 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1571,7 +1571,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn, * This check implies we don't kill processes if their pages * are in the swap cache early. Those are always late kills. */ - if (!page_mapped(hpage)) + if (!page_mapped(p)) return true; if (PageSwapCache(p)) { @@ -1622,10 +1622,10 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn, try_to_unmap(folio, ttu); } - unmap_success = !page_mapped(hpage); + unmap_success = !page_mapped(p); if (!unmap_success) pr_err("%#lx: failed to unmap page (mapcount=%d)\n", - pfn, page_mapcount(hpage)); + pfn, page_mapcount(p)); /* * try_to_unmap() might put mlocked page in lru cache, so call From c62b9a2daf2866622cc9e8d0451bf2fc97b541c9 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 18 Dec 2023 08:19:39 -0800 Subject: [PATCH 1388/1397] Revert "nvme-fc: fix race between error recovery and creating association" commit d3e8b1858734bf46cda495be4165787b9a3981a6 upstream. The commit was identified to might sleep in invalid context and is blocking regression testing. This reverts commit ee6fdc5055e916b1dd497f11260d4901c4c1e55e. Link: https://lore.kernel.org/linux-nvme/hkhl56n665uvc6t5d6h3wtx7utkcorw4xlwi7d2t2bnonavhe6@xaan6pu43ap6/ Link: https://lists.infradead.org/pipermail/linux-nvme/2023-December/043756.html Reported-by: Daniel Wagner Reported-by: Maurizio Lombardi Cc: Michael Liang Tested-by: Daniel Wagner Reviewed-by: Daniel Wagner Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/fc.c | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index a15b37750d6e..206f1b4e5eb1 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2548,24 +2548,17 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) * the controller. Abort any ios on the association and let the * create_association error path resolve things. */ - enum nvme_ctrl_state state; - unsigned long flags; - - spin_lock_irqsave(&ctrl->lock, flags); - state = ctrl->ctrl.state; - if (state == NVME_CTRL_CONNECTING) { - set_bit(ASSOC_FAILED, &ctrl->flags); - spin_unlock_irqrestore(&ctrl->lock, flags); + if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) { __nvme_fc_abort_outstanding_ios(ctrl, true); + set_bit(ASSOC_FAILED, &ctrl->flags); dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: transport error during (re)connect\n", ctrl->cnum); return; } - spin_unlock_irqrestore(&ctrl->lock, flags); /* Otherwise, only proceed if in LIVE state - e.g. on first error */ - if (state != NVME_CTRL_LIVE) + if (ctrl->ctrl.state != NVME_CTRL_LIVE) return; dev_warn(ctrl->ctrl.device, @@ -3179,16 +3172,12 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) else ret = nvme_fc_recreate_io_queues(ctrl); } - - spin_lock_irqsave(&ctrl->lock, flags); if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags)) ret = -EIO; - if (ret) { - spin_unlock_irqrestore(&ctrl->lock, flags); + if (ret) goto out_term_aen_ops; - } + changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); - spin_unlock_irqrestore(&ctrl->lock, flags); ctrl->ctrl.nr_reconnects = 0; From baa88944038bbecc6f712e0e7c602ac5cfa6686a Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 26 Dec 2023 12:59:02 -0500 Subject: [PATCH 1389/1397] ring-buffer: Fix wake ups when buffer_percent is set to 100 commit 623b1f896fa8a669a277ee5a258307a16c7377a3 upstream. The tracefs file "buffer_percent" is to allow user space to set a water-mark on how much of the tracing ring buffer needs to be filled in order to wake up a blocked reader. 0 - is to wait until any data is in the buffer 1 - is to wait for 1% of the sub buffers to be filled 50 - would be half of the sub buffers are filled with data 100 - is not to wake the waiter until the ring buffer is completely full Unfortunately the test for being full was: dirty = ring_buffer_nr_dirty_pages(buffer, cpu); return (dirty * 100) > (full * nr_pages); Where "full" is the value for "buffer_percent". There is two issues with the above when full == 100. 1. dirty * 100 > 100 * nr_pages will never be true That is, the above is basically saying that if the user sets buffer_percent to 100, more pages need to be dirty than exist in the ring buffer! 2. The page that the writer is on is never considered dirty, as dirty pages are only those that are full. When the writer goes to a new sub-buffer, it clears the contents of that sub-buffer. That is, even if the check was ">=" it would still not be equal as the most pages that can be considered "dirty" is nr_pages - 1. To fix this, add one to dirty and use ">=" in the compare. Link: https://lore.kernel.org/linux-trace-kernel/20231226125902.4a057f1d@gandalf.local.home Cc: stable@vger.kernel.org Cc: Mark Rutland Cc: Mathieu Desnoyers Acked-by: Masami Hiramatsu (Google) Fixes: 03329f9939781 ("tracing: Add tracefs file buffer_percentage") Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ring_buffer.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 783a500e89c5..a2e3f954f31b 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -881,9 +881,14 @@ static __always_inline bool full_hit(struct trace_buffer *buffer, int cpu, int f if (!nr_pages || !full) return true; - dirty = ring_buffer_nr_dirty_pages(buffer, cpu); + /* + * Add one as dirty will never equal nr_pages, as the sub-buffer + * that the writer is on is not counted as dirty. + * This is needed if "buffer_percent" is set to 100. + */ + dirty = ring_buffer_nr_dirty_pages(buffer, cpu) + 1; - return (dirty * 100) > (full * nr_pages); + return (dirty * 100) >= (full * nr_pages); } /* From a12754a8f5ac23f7792029afeec06f00f85546ef Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Fri, 29 Dec 2023 11:51:34 -0500 Subject: [PATCH 1390/1397] ftrace: Fix modification of direct_function hash while in use commit d05cb470663a2a1879277e544f69e660208f08f2 upstream. Masami Hiramatsu reported a memory leak in register_ftrace_direct() where if the number of new entries are added is large enough to cause two allocations in the loop: for (i = 0; i < size; i++) { hlist_for_each_entry(entry, &hash->buckets[i], hlist) { new = ftrace_add_rec_direct(entry->ip, addr, &free_hash); if (!new) goto out_remove; entry->direct = addr; } } Where ftrace_add_rec_direct() has: if (ftrace_hash_empty(direct_functions) || direct_functions->count > 2 * (1 << direct_functions->size_bits)) { struct ftrace_hash *new_hash; int size = ftrace_hash_empty(direct_functions) ? 0 : direct_functions->count + 1; if (size < 32) size = 32; new_hash = dup_hash(direct_functions, size); if (!new_hash) return NULL; *free_hash = direct_functions; direct_functions = new_hash; } The "*free_hash = direct_functions;" can happen twice, losing the previous allocation of direct_functions. But this also exposed a more serious bug. The modification of direct_functions above is not safe. As direct_functions can be referenced at any time to find what direct caller it should call, the time between: new_hash = dup_hash(direct_functions, size); and direct_functions = new_hash; can have a race with another CPU (or even this one if it gets interrupted), and the entries being moved to the new hash are not referenced. That's because the "dup_hash()" is really misnamed and is really a "move_hash()". It moves the entries from the old hash to the new one. Now even if that was changed, this code is not proper as direct_functions should not be updated until the end. That is the best way to handle function reference changes, and is the way other parts of ftrace handles this. The following is done: 1. Change add_hash_entry() to return the entry it created and inserted into the hash, and not just return success or not. 2. Replace ftrace_add_rec_direct() with add_hash_entry(), and remove the former. 3. Allocate a "new_hash" at the start that is made for holding both the new hash entries as well as the existing entries in direct_functions. 4. Copy (not move) the direct_function entries over to the new_hash. 5. Copy the entries of the added hash to the new_hash. 6. If everything succeeds, then use rcu_pointer_assign() to update the direct_functions with the new_hash. This simplifies the code and fixes both the memory leak as well as the race condition mentioned above. Link: https://lore.kernel.org/all/170368070504.42064.8960569647118388081.stgit@devnote2/ Link: https://lore.kernel.org/linux-trace-kernel/20231229115134.08dd5174@gandalf.local.home Cc: stable@vger.kernel.org Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Acked-by: Masami Hiramatsu (Google) Fixes: 763e34e74bb7d ("ftrace: Add register_ftrace_direct()") Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ftrace.c | 100 ++++++++++++++++++++---------------------- 1 file changed, 47 insertions(+), 53 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 8de8bec5f366..b01ae7d36021 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1183,18 +1183,19 @@ static void __add_hash_entry(struct ftrace_hash *hash, hash->count++; } -static int add_hash_entry(struct ftrace_hash *hash, unsigned long ip) +static struct ftrace_func_entry * +add_hash_entry(struct ftrace_hash *hash, unsigned long ip) { struct ftrace_func_entry *entry; entry = kmalloc(sizeof(*entry), GFP_KERNEL); if (!entry) - return -ENOMEM; + return NULL; entry->ip = ip; __add_hash_entry(hash, entry); - return 0; + return entry; } static void @@ -1349,7 +1350,6 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash) struct ftrace_func_entry *entry; struct ftrace_hash *new_hash; int size; - int ret; int i; new_hash = alloc_ftrace_hash(size_bits); @@ -1366,8 +1366,7 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash) size = 1 << hash->size_bits; for (i = 0; i < size; i++) { hlist_for_each_entry(entry, &hash->buckets[i], hlist) { - ret = add_hash_entry(new_hash, entry->ip); - if (ret < 0) + if (add_hash_entry(new_hash, entry->ip) == NULL) goto free_hash; } } @@ -2536,7 +2535,7 @@ ftrace_find_unique_ops(struct dyn_ftrace *rec) #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS /* Protected by rcu_tasks for reading, and direct_mutex for writing */ -static struct ftrace_hash *direct_functions = EMPTY_HASH; +static struct ftrace_hash __rcu *direct_functions = EMPTY_HASH; static DEFINE_MUTEX(direct_mutex); int ftrace_direct_func_count; @@ -2555,39 +2554,6 @@ unsigned long ftrace_find_rec_direct(unsigned long ip) return entry->direct; } -static struct ftrace_func_entry* -ftrace_add_rec_direct(unsigned long ip, unsigned long addr, - struct ftrace_hash **free_hash) -{ - struct ftrace_func_entry *entry; - - if (ftrace_hash_empty(direct_functions) || - direct_functions->count > 2 * (1 << direct_functions->size_bits)) { - struct ftrace_hash *new_hash; - int size = ftrace_hash_empty(direct_functions) ? 0 : - direct_functions->count + 1; - - if (size < 32) - size = 32; - - new_hash = dup_hash(direct_functions, size); - if (!new_hash) - return NULL; - - *free_hash = direct_functions; - direct_functions = new_hash; - } - - entry = kmalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - return NULL; - - entry->ip = ip; - entry->direct = addr; - __add_hash_entry(direct_functions, entry); - return entry; -} - static void call_direct_funcs(unsigned long ip, unsigned long pip, struct ftrace_ops *ops, struct ftrace_regs *fregs) { @@ -4223,8 +4189,8 @@ enter_record(struct ftrace_hash *hash, struct dyn_ftrace *rec, int clear_filter) /* Do nothing if it exists */ if (entry) return 0; - - ret = add_hash_entry(hash, rec->ip); + if (add_hash_entry(hash, rec->ip) == NULL) + ret = -ENOMEM; } return ret; } @@ -5266,7 +5232,8 @@ __ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove) return 0; } - return add_hash_entry(hash, ip); + entry = add_hash_entry(hash, ip); + return entry ? 0 : -ENOMEM; } static int @@ -5410,7 +5377,7 @@ static void remove_direct_functions_hash(struct ftrace_hash *hash, unsigned long */ int register_ftrace_direct(struct ftrace_ops *ops, unsigned long addr) { - struct ftrace_hash *hash, *free_hash = NULL; + struct ftrace_hash *hash, *new_hash = NULL, *free_hash = NULL; struct ftrace_func_entry *entry, *new; int err = -EBUSY, size, i; @@ -5436,17 +5403,44 @@ int register_ftrace_direct(struct ftrace_ops *ops, unsigned long addr) } } - /* ... and insert them to direct_functions hash. */ err = -ENOMEM; + + /* Make a copy hash to place the new and the old entries in */ + size = hash->count + direct_functions->count; + if (size > 32) + size = 32; + new_hash = alloc_ftrace_hash(fls(size)); + if (!new_hash) + goto out_unlock; + + /* Now copy over the existing direct entries */ + size = 1 << direct_functions->size_bits; + for (i = 0; i < size; i++) { + hlist_for_each_entry(entry, &direct_functions->buckets[i], hlist) { + new = add_hash_entry(new_hash, entry->ip); + if (!new) + goto out_unlock; + new->direct = entry->direct; + } + } + + /* ... and add the new entries */ + size = 1 << hash->size_bits; for (i = 0; i < size; i++) { hlist_for_each_entry(entry, &hash->buckets[i], hlist) { - new = ftrace_add_rec_direct(entry->ip, addr, &free_hash); + new = add_hash_entry(new_hash, entry->ip); if (!new) - goto out_remove; + goto out_unlock; + /* Update both the copy and the hash entry */ + new->direct = addr; entry->direct = addr; } } + free_hash = direct_functions; + rcu_assign_pointer(direct_functions, new_hash); + new_hash = NULL; + ops->func = call_direct_funcs; ops->flags = MULTI_FLAGS; ops->trampoline = FTRACE_REGS_ADDR; @@ -5454,17 +5448,17 @@ int register_ftrace_direct(struct ftrace_ops *ops, unsigned long addr) err = register_ftrace_function_nolock(ops); - out_remove: - if (err) - remove_direct_functions_hash(hash, addr); - out_unlock: mutex_unlock(&direct_mutex); - if (free_hash) { + if (free_hash && free_hash != EMPTY_HASH) { synchronize_rcu_tasks(); free_ftrace_hash(free_hash); } + + if (new_hash) + free_ftrace_hash(new_hash); + return err; } EXPORT_SYMBOL_GPL(register_ftrace_direct); @@ -6309,7 +6303,7 @@ ftrace_graph_set_hash(struct ftrace_hash *hash, char *buffer) if (entry) continue; - if (add_hash_entry(hash, rec->ip) < 0) + if (add_hash_entry(hash, rec->ip) == NULL) goto out; } else { if (entry) { From ccd48707d51170a7518b77074f4053815ec58186 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 28 Dec 2023 09:51:49 -0500 Subject: [PATCH 1391/1397] tracing: Fix blocked reader of snapshot buffer commit 39a7dc23a1ed0fe81141792a09449d124c5953bd upstream. If an application blocks on the snapshot or snapshot_raw files, expecting to be woken up when a snapshot occurs, it will not happen. Or it may happen with an unexpected result. That result is that the application will be reading the main buffer instead of the snapshot buffer. That is because when the snapshot occurs, the main and snapshot buffers are swapped. But the reader has a descriptor still pointing to the buffer that it originally connected to. This is fine for the main buffer readers, as they may be blocked waiting for a watermark to be hit, and when a snapshot occurs, the data that the main readers want is now on the snapshot buffer. But for waiters of the snapshot buffer, they are waiting for an event to occur that will trigger the snapshot and they can then consume it quickly to save the snapshot before the next snapshot occurs. But to do this, they need to read the new snapshot buffer, not the old one that is now receiving new data. Also, it does not make sense to have a watermark "buffer_percent" on the snapshot buffer, as the snapshot buffer is static and does not receive new data except all at once. Link: https://lore.kernel.org/linux-trace-kernel/20231228095149.77f5b45d@gandalf.local.home Cc: stable@vger.kernel.org Cc: Mathieu Desnoyers Cc: Mark Rutland Acked-by: Masami Hiramatsu (Google) Fixes: debdd57f5145f ("tracing: Make a snapshot feature available from userspace") Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ring_buffer.c | 3 ++- kernel/trace/trace.c | 20 +++++++++++++++++--- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index a2e3f954f31b..901a140e30fa 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -949,7 +949,8 @@ void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu) /* make sure the waiters see the new index */ smp_wmb(); - rb_wake_up_waiters(&rbwork->work); + /* This can be called in any context */ + irq_work_queue(&rbwork->work); } /** diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d3664b803623..a55f7d091355 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1893,6 +1893,9 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu, __update_max_tr(tr, tsk, cpu); arch_spin_unlock(&tr->max_lock); + + /* Any waiters on the old snapshot buffer need to wake up */ + ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS); } /** @@ -1944,12 +1947,23 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) static int wait_on_pipe(struct trace_iterator *iter, int full) { + int ret; + /* Iterators are static, they should be filled or empty */ if (trace_buffer_iter(iter, iter->cpu_file)) return 0; - return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, - full); + ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full); + +#ifdef CONFIG_TRACER_MAX_TRACE + /* + * Make sure this is still the snapshot buffer, as if a snapshot were + * to happen, this would now be the main buffer. + */ + if (iter->snapshot) + iter->array_buffer = &iter->tr->max_buffer; +#endif + return ret; } #ifdef CONFIG_FTRACE_STARTUP_TEST @@ -8514,7 +8528,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, wait_index = READ_ONCE(iter->wait_index); - ret = wait_on_pipe(iter, iter->tr->buffer_percent); + ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent); if (ret) goto out; From d673099085ddf1c28a6c0c1a245480919b5fe9e5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 16 Dec 2023 05:47:15 +0000 Subject: [PATCH 1392/1397] wifi: cfg80211: fix CQM for non-range use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7e7efdda6adb385fbdfd6f819d76bc68c923c394 upstream. [note: this is commit 4a7e92551618f3737b305f62451353ee05662f57 reapplied; that commit had been reverted in 6.6.6 because it caused regressions, see https://lore.kernel.org/stable/2023121450-habitual-transpose-68a1@gregkh/ for details] My prior race fix here broke CQM when ranges aren't used, as the reporting worker now requires the cqm_config to be set in the wdev, but isn't set when there's no range configured. Rather than continuing to special-case the range version, set the cqm_config always and configure accordingly, also tracking if range was used or not to be able to clear the configuration appropriately with the same API, which was actually not right if both were implemented by a driver for some reason, as is the case with mac80211 (though there the implementations are equivalent so it doesn't matter.) Also, the original multiple-RSSI commit lost checking for the callback, so might have potentially crashed if a driver had neither implementation, and userspace tried to use it despite not being advertised as supported. Cc: stable@vger.kernel.org Fixes: 4a4b8169501b ("cfg80211: Accept multiple RSSI thresholds for CQM") Fixes: 37c20b2effe9 ("wifi: cfg80211: fix cqm_config access race") Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman Signed-off-by: "Léo Lam" Signed-off-by: Greg Kroah-Hartman --- net/wireless/core.h | 1 + net/wireless/nl80211.c | 50 ++++++++++++++++++++++++++---------------- 2 files changed, 32 insertions(+), 19 deletions(-) diff --git a/net/wireless/core.h b/net/wireless/core.h index e536c0b615a0..f0a3a2317638 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -299,6 +299,7 @@ struct cfg80211_cqm_config { u32 rssi_hyst; s32 last_rssi_event_value; enum nl80211_cqm_rssi_threshold_event last_rssi_event_type; + bool use_range_api; int n_rssi_thresholds; s32 rssi_thresholds[] __counted_by(n_rssi_thresholds); }; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 931a03f4549c..6a82dd876f27 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -12824,10 +12824,6 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev, int i, n, low_index; int err; - /* RSSI reporting disabled? */ - if (!cqm_config) - return rdev_set_cqm_rssi_range_config(rdev, dev, 0, 0); - /* * Obtain current RSSI value if possible, if not and no RSSI threshold * event has been received yet, we should receive an event after a @@ -12902,18 +12898,6 @@ static int nl80211_set_cqm_rssi(struct genl_info *info, wdev->iftype != NL80211_IFTYPE_P2P_CLIENT) return -EOPNOTSUPP; - if (n_thresholds <= 1 && rdev->ops->set_cqm_rssi_config) { - if (n_thresholds == 0 || thresholds[0] == 0) /* Disabling */ - return rdev_set_cqm_rssi_config(rdev, dev, 0, 0); - - return rdev_set_cqm_rssi_config(rdev, dev, - thresholds[0], hysteresis); - } - - if (!wiphy_ext_feature_isset(&rdev->wiphy, - NL80211_EXT_FEATURE_CQM_RSSI_LIST)) - return -EOPNOTSUPP; - if (n_thresholds == 1 && thresholds[0] == 0) /* Disabling */ n_thresholds = 0; @@ -12921,6 +12905,20 @@ static int nl80211_set_cqm_rssi(struct genl_info *info, old = rcu_dereference_protected(wdev->cqm_config, lockdep_is_held(&wdev->mtx)); + /* if already disabled just succeed */ + if (!n_thresholds && !old) + return 0; + + if (n_thresholds > 1) { + if (!wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_CQM_RSSI_LIST) || + !rdev->ops->set_cqm_rssi_range_config) + return -EOPNOTSUPP; + } else { + if (!rdev->ops->set_cqm_rssi_config) + return -EOPNOTSUPP; + } + if (n_thresholds) { cqm_config = kzalloc(struct_size(cqm_config, rssi_thresholds, n_thresholds), @@ -12935,13 +12933,26 @@ static int nl80211_set_cqm_rssi(struct genl_info *info, memcpy(cqm_config->rssi_thresholds, thresholds, flex_array_size(cqm_config, rssi_thresholds, n_thresholds)); + cqm_config->use_range_api = n_thresholds > 1 || + !rdev->ops->set_cqm_rssi_config; rcu_assign_pointer(wdev->cqm_config, cqm_config); + + if (cqm_config->use_range_api) + err = cfg80211_cqm_rssi_update(rdev, dev, cqm_config); + else + err = rdev_set_cqm_rssi_config(rdev, dev, + thresholds[0], + hysteresis); } else { RCU_INIT_POINTER(wdev->cqm_config, NULL); + /* if enabled as range also disable via range */ + if (old->use_range_api) + err = rdev_set_cqm_rssi_range_config(rdev, dev, 0, 0); + else + err = rdev_set_cqm_rssi_config(rdev, dev, 0, 0); } - err = cfg80211_cqm_rssi_update(rdev, dev, cqm_config); if (err) { rcu_assign_pointer(wdev->cqm_config, old); kfree_rcu(cqm_config, rcu_head); @@ -19131,10 +19142,11 @@ void cfg80211_cqm_rssi_notify_work(struct wiphy *wiphy, struct wiphy_work *work) wdev_lock(wdev); cqm_config = rcu_dereference_protected(wdev->cqm_config, lockdep_is_held(&wdev->mtx)); - if (!wdev->cqm_config) + if (!cqm_config) goto unlock; - cfg80211_cqm_rssi_update(rdev, wdev->netdev, cqm_config); + if (cqm_config->use_range_api) + cfg80211_cqm_rssi_update(rdev, wdev->netdev, cqm_config); rssi_level = cqm_config->last_rssi_event_value; rssi_event = cqm_config->last_rssi_event_type; From e904e81fd3c2cbe322779b36d12c039a9c7b19af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20Lam?= Date: Sat, 16 Dec 2023 05:47:17 +0000 Subject: [PATCH 1393/1397] wifi: nl80211: fix deadlock in nl80211_set_cqm_rssi (6.6.x) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 008afb9f3d57 ("wifi: cfg80211: fix CQM for non-range use" backported to 6.6.x) causes nl80211_set_cqm_rssi not to release the wdev lock in some of the error paths. Of course, the ensuing deadlock causes userland network managers to break pretty badly, and on typical systems this also causes lockups on on suspend, poweroff and reboot. See [1], [2], [3] for example reports. The upstream commit 7e7efdda6adb ("wifi: cfg80211: fix CQM for non-range use"), committed in November 2023, is completely fine because there was another commit in August 2023 that removed the wdev lock: see commit 076fc8775daf ("wifi: cfg80211: remove wdev mutex"). The reason things broke in 6.6.5 is that commit 4338058f6009 was applied without also applying 076fc8775daf. Commit 076fc8775daf ("wifi: cfg80211: remove wdev mutex") is a rather large commit; adjusting the error handling (which is what this commit does) yields a much simpler patch and was tested to work properly. Fix the deadlock by releasing the lock before returning. [1] https://bugzilla.kernel.org/show_bug.cgi?id=218247 [2] https://bbs.archlinux.org/viewtopic.php?id=290976 [3] https://lore.kernel.org/all/87sf4belmm.fsf@turtle.gmx.de/ Link: https://lore.kernel.org/stable/e374bb16-5b13-44cc-b11a-2f4eefb1ecf5@manjaro.org/ Fixes: 008afb9f3d57 ("wifi: cfg80211: fix CQM for non-range use") Tested-by: "Léo Lam" Tested-by: Philip Müller Cc: stable@vger.kernel.org Cc: Johannes Berg Signed-off-by: "Léo Lam" Signed-off-by: Greg Kroah-Hartman --- net/wireless/nl80211.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 6a82dd876f27..0b0dfecedc50 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -12906,17 +12906,23 @@ static int nl80211_set_cqm_rssi(struct genl_info *info, lockdep_is_held(&wdev->mtx)); /* if already disabled just succeed */ - if (!n_thresholds && !old) - return 0; + if (!n_thresholds && !old) { + err = 0; + goto unlock; + } if (n_thresholds > 1) { if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_CQM_RSSI_LIST) || - !rdev->ops->set_cqm_rssi_range_config) - return -EOPNOTSUPP; + !rdev->ops->set_cqm_rssi_range_config) { + err = -EOPNOTSUPP; + goto unlock; + } } else { - if (!rdev->ops->set_cqm_rssi_config) - return -EOPNOTSUPP; + if (!rdev->ops->set_cqm_rssi_config) { + err = -EOPNOTSUPP; + goto unlock; + } } if (n_thresholds) { From b7f1c01b55ad2a5da12f08e5ec3c76dabb99882a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 19 Dec 2023 19:44:49 +0100 Subject: [PATCH 1394/1397] netfilter: nf_tables: skip set commit for deleted/destroyed sets commit 7315dc1e122c85ffdfc8defffbb8f8b616c2eb1a upstream. NFT_MSG_DELSET deactivates all elements in the set, skip set->ops->commit() to avoid the unnecessary clone (for the pipapo case) as well as the sync GC cycle, which could deactivate again expired elements in such set. Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") Reported-by: Kevin Rich Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index fb5c62aa8d9c..5a14e1ab0b13 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -9849,7 +9849,7 @@ static void nft_set_commit_update(struct list_head *set_update_list) list_for_each_entry_safe(set, next, set_update_list, pending_update) { list_del_init(&set->pending_update); - if (!set->ops->commit) + if (!set->ops->commit || set->dead) continue; set->ops->commit(set); From 9b603077e29c84836a44325593e959da818274c7 Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Thu, 4 Jan 2024 20:40:50 +0900 Subject: [PATCH 1395/1397] Revert "platform/x86: p2sb: Allow p2sb_bar() calls during PCI device probe" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b20712e853305cbd04673f02b7e52ba5b12c11a9 upstream. This reverts commit b28ff7a7c3245d7f62acc20f15b4361292fe4117. The commit introduced P2SB device scan and resource cache during the boot process to avoid deadlock. But it caused detection failure of IDE controllers on old systems [1]. The IDE controllers on old systems and P2SB devices on newer systems have same PCI DEVFN. It is suspected the confusion between those two is the failure cause. Revert the change at this moment until the proper solution gets ready. Link: https://lore.kernel.org/platform-driver-x86/CABq1_vjfyp_B-f4LAL6pg394bP6nDFyvg110TOLHHb0x4aCPeg@mail.gmail.com/T/#m07b30468d9676fc5e3bb2122371121e4559bb383 [1] Signed-off-by: Shin'ichiro Kawasaki Link: https://lore.kernel.org/r/20240104114050.3142690-1-shinichiro.kawasaki@wdc.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/p2sb.c | 172 +++++++++--------------------------- 1 file changed, 41 insertions(+), 131 deletions(-) diff --git a/drivers/platform/x86/p2sb.c b/drivers/platform/x86/p2sb.c index fcf1ce8bbdc5..1cf2471d54dd 100644 --- a/drivers/platform/x86/p2sb.c +++ b/drivers/platform/x86/p2sb.c @@ -26,21 +26,6 @@ static const struct x86_cpu_id p2sb_cpu_ids[] = { {} }; -/* - * Cache BAR0 of P2SB device functions 0 to 7. - * TODO: The constant 8 is the number of functions that PCI specification - * defines. Same definitions exist tree-wide. Unify this definition and - * the other definitions then move to include/uapi/linux/pci.h. - */ -#define NR_P2SB_RES_CACHE 8 - -struct p2sb_res_cache { - u32 bus_dev_id; - struct resource res; -}; - -static struct p2sb_res_cache p2sb_resources[NR_P2SB_RES_CACHE]; - static int p2sb_get_devfn(unsigned int *devfn) { unsigned int fn = P2SB_DEVFN_DEFAULT; @@ -54,16 +39,8 @@ static int p2sb_get_devfn(unsigned int *devfn) return 0; } -static bool p2sb_valid_resource(struct resource *res) -{ - if (res->flags) - return true; - - return false; -} - /* Copy resource from the first BAR of the device in question */ -static void p2sb_read_bar0(struct pci_dev *pdev, struct resource *mem) +static int p2sb_read_bar0(struct pci_dev *pdev, struct resource *mem) { struct resource *bar0 = &pdev->resource[0]; @@ -79,64 +56,47 @@ static void p2sb_read_bar0(struct pci_dev *pdev, struct resource *mem) mem->end = bar0->end; mem->flags = bar0->flags; mem->desc = bar0->desc; + + return 0; } -static void p2sb_scan_and_cache_devfn(struct pci_bus *bus, unsigned int devfn) +static int p2sb_scan_and_read(struct pci_bus *bus, unsigned int devfn, struct resource *mem) { - struct p2sb_res_cache *cache = &p2sb_resources[PCI_FUNC(devfn)]; struct pci_dev *pdev; + int ret; pdev = pci_scan_single_device(bus, devfn); if (!pdev) - return; + return -ENODEV; - p2sb_read_bar0(pdev, &cache->res); - cache->bus_dev_id = bus->dev.id; + ret = p2sb_read_bar0(pdev, mem); pci_stop_and_remove_bus_device(pdev); - return; -} - -static int p2sb_scan_and_cache(struct pci_bus *bus, unsigned int devfn) -{ - unsigned int slot, fn; - - if (PCI_FUNC(devfn) == 0) { - /* - * When function number of the P2SB device is zero, scan it and - * other function numbers, and if devices are available, cache - * their BAR0s. - */ - slot = PCI_SLOT(devfn); - for (fn = 0; fn < NR_P2SB_RES_CACHE; fn++) - p2sb_scan_and_cache_devfn(bus, PCI_DEVFN(slot, fn)); - } else { - /* Scan the P2SB device and cache its BAR0 */ - p2sb_scan_and_cache_devfn(bus, devfn); - } - - if (!p2sb_valid_resource(&p2sb_resources[PCI_FUNC(devfn)].res)) - return -ENOENT; - - return 0; -} - -static struct pci_bus *p2sb_get_bus(struct pci_bus *bus) -{ - static struct pci_bus *p2sb_bus; - - bus = bus ?: p2sb_bus; - if (bus) - return bus; - - /* Assume P2SB is on the bus 0 in domain 0 */ - p2sb_bus = pci_find_bus(0, 0); - return p2sb_bus; + return ret; } -static int p2sb_cache_resources(void) +/** + * p2sb_bar - Get Primary to Sideband (P2SB) bridge device BAR + * @bus: PCI bus to communicate with + * @devfn: PCI slot and function to communicate with + * @mem: memory resource to be filled in + * + * The BIOS prevents the P2SB device from being enumerated by the PCI + * subsystem, so we need to unhide and hide it back to lookup the BAR. + * + * if @bus is NULL, the bus 0 in domain 0 will be used. + * If @devfn is 0, it will be replaced by devfn of the P2SB device. + * + * Caller must provide a valid pointer to @mem. + * + * Locking is handled by pci_rescan_remove_lock mutex. + * + * Return: + * 0 on success or appropriate errno value on error. + */ +int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem) { - struct pci_bus *bus; + struct pci_dev *pdev_p2sb; unsigned int devfn_p2sb; u32 value = P2SBC_HIDE; int ret; @@ -146,9 +106,8 @@ static int p2sb_cache_resources(void) if (ret) return ret; - bus = p2sb_get_bus(NULL); - if (!bus) - return -ENODEV; + /* if @bus is NULL, use bus 0 in domain 0 */ + bus = bus ?: pci_find_bus(0, 0); /* * Prevent concurrent PCI bus scan from seeing the P2SB device and @@ -156,16 +115,17 @@ static int p2sb_cache_resources(void) */ pci_lock_rescan_remove(); - /* - * The BIOS prevents the P2SB device from being enumerated by the PCI - * subsystem, so we need to unhide and hide it back to lookup the BAR. - * Unhide the P2SB device here, if needed. - */ + /* Unhide the P2SB device, if needed */ pci_bus_read_config_dword(bus, devfn_p2sb, P2SBC, &value); if (value & P2SBC_HIDE) pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, 0); - ret = p2sb_scan_and_cache(bus, devfn_p2sb); + pdev_p2sb = pci_scan_single_device(bus, devfn_p2sb); + if (devfn) + ret = p2sb_scan_and_read(bus, devfn, mem); + else + ret = p2sb_read_bar0(pdev_p2sb, mem); + pci_stop_and_remove_bus_device(pdev_p2sb); /* Hide the P2SB device, if it was hidden */ if (value & P2SBC_HIDE) @@ -173,62 +133,12 @@ static int p2sb_cache_resources(void) pci_unlock_rescan_remove(); - return ret; -} - -/** - * p2sb_bar - Get Primary to Sideband (P2SB) bridge device BAR - * @bus: PCI bus to communicate with - * @devfn: PCI slot and function to communicate with - * @mem: memory resource to be filled in - * - * If @bus is NULL, the bus 0 in domain 0 will be used. - * If @devfn is 0, it will be replaced by devfn of the P2SB device. - * - * Caller must provide a valid pointer to @mem. - * - * Return: - * 0 on success or appropriate errno value on error. - */ -int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem) -{ - struct p2sb_res_cache *cache; - int ret; - - bus = p2sb_get_bus(bus); - if (!bus) - return -ENODEV; - - if (!devfn) { - ret = p2sb_get_devfn(&devfn); - if (ret) - return ret; - } + if (ret) + return ret; - cache = &p2sb_resources[PCI_FUNC(devfn)]; - if (cache->bus_dev_id != bus->dev.id) + if (mem->flags == 0) return -ENODEV; - if (!p2sb_valid_resource(&cache->res)) - return -ENOENT; - - memcpy(mem, &cache->res, sizeof(*mem)); return 0; } EXPORT_SYMBOL_GPL(p2sb_bar); - -static int __init p2sb_fs_init(void) -{ - p2sb_cache_resources(); - return 0; -} - -/* - * pci_rescan_remove_lock to avoid access to unhidden P2SB devices can - * not be locked in sysfs pci bus rescan path because of deadlock. To - * avoid the deadlock, access to P2SB devices with the lock at an early - * step in kernel initialization and cache required resources. This - * should happen after subsys_initcall which initializes PCI subsystem - * and before device_initcall which requires P2SB resources. - */ -fs_initcall(p2sb_fs_init); From c9a51ebb4bac69ed3fee9c0ebe0c2b5149e80845 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 5 Jan 2024 15:19:45 +0100 Subject: [PATCH 1396/1397] Linux 6.6.10 Link: https://lore.kernel.org/r/20240103164834.970234661@linuxfoundation.org Tested-by: Nam Cao Tested-by: Ronald Warsow Tested-by: SeongJae Park Tested-by: Salvatore Bonaccorso Tested-by: Florian Fainelli Tested-by: Kelsey Steele Tested-by: Shuah Khan Tested-by: Takeshi Ogasawara Tested-by: Bagas Sanjaya Tested-by: Ron Economos Tested-by: Harshit Mogalapalli Tested-by: Jon Hunter Tested-by: Allen Pais Tested-by: Guenter Roeck Tested-by: Namjae Jeon Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4d1d5e925bb2..50a862316e15 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 6 -SUBLEVEL = 9 +SUBLEVEL = 10 EXTRAVERSION = NAME = Hurr durr I'ma ninja sloth From e15045fd597d02a1d9a8260a0210933b9114d9e4 Mon Sep 17 00:00:00 2001 From: Umer Saleem Date: Thu, 11 Jan 2024 14:41:36 +0500 Subject: [PATCH 1397/1397] Update changelog after merging v6.6.10 Signed-off-by: Umer Saleem --- scripts/package/truenas/changelog | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/package/truenas/changelog b/scripts/package/truenas/changelog index a99bbf79ee9c..8e570bf0317f 100644 --- a/scripts/package/truenas/changelog +++ b/scripts/package/truenas/changelog @@ -1,3 +1,9 @@ +linux-6.6.10+truenas (6.6.10+truenas-1) sid; urgency=low + + * Merge upstream v6.6.10 release. + + -- Debian Packages List Wed, 10 Jan 2024 19:00:00 +0500 + linux-6.6.2+truenas (6.6.2+truenas-1) sid; urgency=low * Rebase to upstream v6.6.2 release